# 快速上手-聚合分析

嵌套聚合,下钻分析,聚合分析

# 计算每个 tag 下的商品数量

语法:

  • aggs:聚合函数
  • NAME:给这个操作取一个名字
  • AGG_TYPE:聚合类型
GET /ecommerce/product/_search
{
  "aggs": {
    "NAME": {
      "AGG_TYPE": {}
    }
  }
}
1
2
3
4
5
6
7
8
GET /ecommerce/product/_search
{
  "aggs": {
    "group_by_tags": {
      "terms": {
        "field": "tags"
      }
    }
  }
}

------------ 响应

{
  "error": {
    "root_cause": [
      {
        "type": "illegal_argument_exception",
        "reason": "Fielddata is disabled on text fields by default. Set fielddata=true on [tags] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory."
      }
    ],
    "type": "search_phase_execution_exception",
    "reason": "all shards failed",
    "phase": "query",
    "grouped": true,
    "failed_shards": [
      {
        "shard": 0,
        "index": "ecommerce",
        "node": "sEvAlYxFRJe598mrSDwUjQ",
        "reason": {
          "type": "illegal_argument_exception",
          "reason": "Fielddata is disabled on text fields by default. Set fielddata=true on [tags] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory."
        }
      }
    ],
    "caused_by": {
      "type": "illegal_argument_exception",
      "reason": "Fielddata is disabled on text fields by default. Set fielddata=true on [tags] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory."
    }
  },
  "status": 400
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43

error.root_cause.reason 中说:在 text 字段上默认 fielddata=false, 需要设置为 true,通过生成正向索引并加载到内存中进行计算;

也就是说需要映射的修改:

PUT /ecommerce/product/_mapping
{
  "properties": {
    "tags":{
      "type": "text",
      "fielddata": true
    }
  }
}

--------------- 响应

{
  "acknowledged": true
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
GET /ecommerce/product/_search
{
  "aggs": {
    "group_by_tags": {
      "terms": { // 可以理解为是分组的意思
        "field": "tags"
      }
    }
  },
  "size": 0
}

--------------- 响应

{
  "took": 146,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 4,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "group_by_tags": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "fangzhu",
          "doc_count": 2
        },
        {
          "key": "meibai",
          "doc_count": 2
        },
        {
          "key": "qingxin",
          "doc_count": 1
        }
      ]
    }
  }
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48

在请求的时候使用了一个 size=0,这个参数影响响应数据中的 hits.hits 中的数据

  • hits.total:总共参与聚合运算的目标数据,这里是 4 条
  • hits.hits:这 4条 数据的详细信息(原数据)
  • aggregations 聚合的响应
  • aggregations.group_by_tags.buckets:桶,也就是聚合的结果

doc_count_error_upper_bound 和 sum_other_doc_count 后面再讲解;

对于 buckets 返回的数据条数也可以通过 size 控制,aggs.group_by_tags.terms.size = n

# 先搜索,再聚合

需求:对名称中包含 yagao 的商品,计算每个 tag 下的商品数量

GET /ecommerce/product/_search
{
  "query": {
    "match": {
      "name": "yagao"
    }
  },
  "aggs": {
    "group_by_tags": {
      "terms": {
        "field": "tags"
      }
    }
  },
  "size": 0
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

很简单,之前的搜索,再加上 aggs 即可

# 嵌套聚合

需求:计算每个 tag 下的商品平均价格

也就是说:需要先对 tags 进行分组,再计算商品的平均价格

GET /ecommerce/product/_search
{
  "aggs": {
    "group_by_tags": {
      "terms": {
        "field": "tags"
      },
      "aggs": {
        "avg_by_price": {
          "avg": {
            "field": "price"
          }
        }
      }
    }
  },
  "size": 0
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18

响应

{
  "took": 3,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 4,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "group_by_tags": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "fangzhu",
          "doc_count": 2,
          "avg_by_price": {
            "value": 27.5
          }
        },
        {
          "key": "meibai",
          "doc_count": 2,
          "avg_by_price": {
            "value": 40
          }
        },
        {
          "key": "qingxin",
          "doc_count": 1,
          "avg_by_price": {
            "value": 40
          }
        }
      ]
    }
  }
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43

# 在聚合分析后再排序

需求:给上一个例子加上排序,按照计算结果(商品平均价格)降序排列

GET /ecommerce/product/_search
{
  "aggs": {
    "group_by_tags": {
      "terms": {
        "field": "tags",
        "order": {
          "avg_by_price": "desc"
        }
      },
      "aggs": {
        "avg_by_price": {
          "avg": {
            "field": "price"
          }
        }
      }
    }
  },
  "size": 0
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21

在 terms 中增加 order 属性,并对 avg_by_price 这个计算结果进行排序

# 说明:为什么不用 java 代码讲解

我们现在全部都是用 es 的 restful api 在学习和讲解 es 的所欲知识点和功能点,但是没有使用一些编程语言去讲解(比如 java),原因有以下:

  1. es 最重要的 api,让我们进行各种尝试、学习甚至在某些环境下进行使用的 api,就是 restful api。如果你学习不用 es restful api,比如我上来就用 java api 来讲 es,也是可以的,但是你根本就漏掉了 es 知识的一大块,你都不知道它最重要的 restful api 是怎么用的
  2. 讲知识点,用 es restful api,更加方便,快捷,不用每次都写大量的 java 代码,能加快讲课的效率和速度,更加易于同学们关注es本身的知识和功能的学习
  3. 我们通常会讲完 es 知识点后,开始详细讲解 java api,如何用 java api 执行各种操作
  4. 我们每个篇章都会搭配一个项目实战,项目实战是完全基于 java 去开发的真实项目和系统

# 多次嵌套(下钻操作)

下钻操作:前面的列子,先分组,再聚合计算,这样的操作称为下钻操作(来源没有说)

需求:按照指定的价格区间进行分组,然后再每组内再按照 tags 进行分组,并计算每组内的商品平均价格,并按照平均价格进行降序排列

GET /ecommerce/product/_search
{
  "aggs": {
    "group_by_price": {
      "range": {
        "field": "price",
        "ranges": [
          {
            "from": 0,
            "to": 20
          },
          {
            "from": 20,
             "to": 40
          },
          {
            "from": 40,
            "to": 50
          }
        ]
      },
      "aggs": {
        "group_by_tags": {
          "terms": {
            "field": "tags",
            "order": {
              "group_by_avg": "desc"
            }
          },
          "aggs": {
            "group_by_avg": {
              "avg": {
                "field": "price"
              }
            }
          }
        }
      }
    }
  },
  "size": 0
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42

响应

{
  "took": 3,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 4,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "group_by_price": {
      "buckets": [
        {
          "key": "0.0-20.0",
          "from": 0,
          "to": 20,
          "doc_count": 0,
          "group_by_tags": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        },
        {
          "key": "20.0-40.0",
          "from": 20,
          "to": 40,
          "doc_count": 2,
          "group_by_tags": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "meibai",
                "doc_count": 1,
                "group_by_avg": {
                  "value": 30
                }
              },
              {
                "key": "fangzhu",
                "doc_count": 2,
                "group_by_avg": {
                  "value": 27.5
                }
              }
            ]
          }
        },
        {
          "key": "40.0-50.0",
          "from": 40,
          "to": 50,
          "doc_count": 1,
          "group_by_tags": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "qingxin",
                "doc_count": 1,
                "group_by_avg": {
                  "value": 40
                }
              }
            ]
          }
        }
      ]
    }
  }
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76