# 对文件系统进行数据建模以及文件搜索实战
本章主要讲解对文件路径的存储于搜索
# path_hierarchy analyzer
这个分词的作用是什么呢?直接看一个分词例子即可知道
GET /_analyze
{
"text": "H:/dev/note/note-book",
"tokenizer": "path_hierarchy"
}
{
"tokens": [
{
"token": "H:",
"start_offset": 0,
"end_offset": 2,
"type": "word",
"position": 0
},
{
"token": "H:/dev",
"start_offset": 0,
"end_offset": 6,
"type": "word",
"position": 0
},
{
"token": "H:/dev/note",
"start_offset": 0,
"end_offset": 11,
"type": "word",
"position": 0
},
{
"token": "H:/dev/note/note-book",
"start_offset": 0,
"end_offset": 21,
"type": "word",
"position": 0
}
]
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# 数据构造
先 mappings,
fs: filesystem
PUT /fs
{
"settings": {
"analysis": {
"analyzer": {
"paths":{
"tokenizer": "path_hierarchy"
}
}
}
}
}
PUT /fs/_mapping/file
{
"properties": {
"name": {
"type": "keyword"
},
"path": {
"type": "keyword",
"fields": {
"tree": {
"type": "text",
"analyzer": "paths"
}
}
}
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
插入数据
PUT /fs/file/1
{
"name": "README.txt",
"path": "/workspace/projects/helloworld",
"contents": "这是我的第一个 elasticsearch 程序"
}
1
2
3
4
5
6
2
3
4
5
6
# 搜索数据
需求:搜索文件内容包含 elasticsearch,且在 /workspace/projects 下的文件
GET /fs/file/_search
{
"query": {
"bool": {
"must": [
{"match": {
"contents": "elasticsearch"
}}
],
"filter": {
"term": {
"path.tree": "/workspace/projects"
}
}
}
}
}
GET /fs/file/_search
{
"query": {
"bool": {
"must": [
{"match": {
"contents": "elasticsearch"
}},
{
"constant_score": {
"filter": {
"term": {
"path.tree": "/workspace/projects"
}
}
}
}
]
}
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
上面是两种 filter 的用法,实现结果一致,不同的只是评分结果不一致
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1.284885,
"hits": [
{
"_index": "fs",
"_type": "file",
"_id": "1",
"_score": 1.284885,
"_source": {
"name": "README.txt",
"path": "/workspace/projects/helloworld",
"contents": "这是我的第一个 elasticsearch 程序"
}
}
]
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26