Es-ElasticSearch
2019-02-27 14:05:35 7 举报
AI智能生成
elasticsearch 原生操作 脑图
作者其他创作
大纲/内容
搜索详解
一般term查询(即词项查询)
match_all
post books/_search {"query":{"match_all":{}}}
term
post books/_search {"query":{"term":{"title":"思想"}}}
score 过滤
post books/_search {"min_score":0.6, "query":{"term":{"title":"java"}}}
terms
post books/_search {"query":{"terms":{"title":["java", "python"]}}}
结果处理
结果分页
post books/_search {"from":0, "size":100, "query":{"term":{"title":"思想"}}}
筛选返回字段
post books/_search {"_source":["title", "author"],"query":{"term":{"title":"思想"}}}
返回文档 version
post books/_search {"version":true, "query":{...}}
返回搜索字段高亮
post books/_search {"query":{"term":{"title":"编程"}}, "highlight":{"fields":{"title":{}}}}
match query 分词查询
post books/_search {"query":{"match":{"title":{"query":"java编程思想", "operator":"or"}}}}
post books/_search {"query":{"match":{"title":{"query":"java编程思想", "operator":"and"}}}}
post books/_search {"query":{"match":{"title":{"query":"java编程思想", "operator":"or"}}}}
match_phrase query 分词全匹顺序 查询
post test/_search {"query":{"match_phrase":{"foo":"hello word"}}}
match_phrase_prefix 分词全匹顺序(支持最后一个term的前缀匹配) 查询
post test/_search {"query":{"match_phrase_prefix":{"foo":"hello w"}}}
multi_match 多字段query
post books/_search {"query":{"multi_query":{"query":"java编程", "fields":["title", "description"]}}}
支持在要搜索的字段名称使用通配符
post books/_search {"query":{"multi_match":{"query":"java编程", "fields":["title", "*_name"]}}}
支持字段中出现的权重
post books/_search {"query":{"multi_match":{"query":"java编程", "fields":["title^3", "description"]}}}
common_terms query(停用词优化搜索)
post /_search {"query":{"common":{"body":{"query":"nelly the elephant as a cartoon", "cutoff_frequency":0.001, "low_freq_operator":"and"}}}}
query_string query 在查询语句中使用连接词
simple_query_string
range query
gt gte lt lte
post books/_search {"query":{"range":{"price":{"gt":50, "lte":70}}}}
exists query
post books/_search {"query":{"exists":{"field":"user"}}}
prefix query
post books/_search {"query":{"prefix":{"description":"win"}}}
wildcard query(通配符查询?*)
post books/_search {"query":{"wildcard":{"author":"张若?"}}}
regexp query
post _search {"query":{"regexp":{"postcode":"W[0-9].+"}}}
fuzzy query(矫正搜索)
post books/_search {"query":{"fuzzy":{"title":"javc"}}}
javc -> java
type query
post _search {"query":{"type":{"value":"IT"}}}
ids query
post books/_search {"query":{"ids":{"type":"IT", "values":["1", "3", "5"]}}}
复合查询
constant_score query 过滤查询条件且返回相同评分的记录
post books/_search {"query":{"constant_score":{"filter":"term":{"title":"java"}}, "boost":1.2}}}
bool query (组合多个简单查询)
must,should,must_not, filter(minimum_should_match 表示should的最低匹配度)
post books/_search {query:{"bool":{"minimum_should_match":1, "must":{"match":{"title", "java"}},"should":[{"match:{"description":"虚拟机"}"}], "must_not":{"range":{"price":{"gte":80}}}}}
dis_max query 最佳匹配查询条件分数
post _search {"query":{"dis_max":{"tie_breaker":0.7, "boost":1.2, "queries":[{"term":"age":34}, {"term":{"age":"35"}}]}}}
function_score query 函数式修改 文档得分
post books/_search {
"query":{
"function_score":{
"query":{"match_all":{}},
"boost":"5",
"random_score":{},
"boost_mode":"multiply"
}
}
}
"query":{
"function_score":{
"query":{"match_all":{}},
"boost":"5",
"random_score":{},
"boost_mode":"multiply"
}
}
}
返回books中的所有文档,文档最大得分5,每个文档得分随机产生,权重计算模式为相乘模式
post books/_search{
"query":{
"function_score":{
"query":{
"match":{"title":"java"}
},
"script_score":{
"script":{
"inline":"Math.sqrt(doc['price'].value/10)"
}
}
}
}
}
"query":{
"function_score":{
"query":{
"match":{"title":"java"}
},
"script_score":{
"script":{
"inline":"Math.sqrt(doc['price'].value/10)"
}
}
}
}
}
自定义评分公式,price的十分之一的开方 作为文档得分
boosting query 评分调整
indices query (query和no_match_query)
post _search {"query":{"indices":{"indces":["books", "books2"], "query":{"match":{"title": "javascript"}}, "no_match_query":{"term":{"title":"basketball"}}}}}
查询 books books2 下面的title - javascript,其他的索引 查询title
嵌套查询
nested query
has_child query
1 put /company {"mappings":{"branch":{}, "employee":{"_parent":"branch"}}}
2 post company/branch/_bulk {.....}
3 post company/employee/_bluk {...}
4 post company/branch/_search {"query":{"has_child":{"type":"employee", "query":{"range":{"dob":{"gte":"1980-0101"}}}}}}
5 post /company/branch/_search {"query":{"has_child":{"type":"emplyee","query":{"match":{"name":"Alice Smith"}}}}}
6 post /compnay/branch/_search {"query":{"has_child":{"type":"employee", "min_children":2, "query":{"match_all":{}}}}}
通过筛选子类数量 返回结果
has_parent query
post company/employee/_search {"query":{"has_parent":{"parent_type":"branch", "query":{"match":{"country":"UK"}}}}}
位置查询
数据准备
1 put geo {"mappings":{"city":{"properties":{"name":{"type":"keyword"}, "location":{"type":"geo_point"}}}}}
2 json文件准备
3 数据入 ES
geo_distance query 点半径以内查询
post geo/_search {"query":{"bool":{"must":{"match_all":{}}, "filter":{"geo_distance":{"distance":"200km","location":{"lat":"33.0", "lon":"177.0"}}}}}}
筛选此点200km内的数据
post geo/_search {"query":{"match_all":{}}, "sort":[{"_geo_dinstance":{"location":"11.1,22,2", "unit":"km"}}]}}
距离此点 距离近远排序
geo_bounding_box query 2点形成的框内的点
post geo/_search {
"query": {
"bool": {
"must": {
"match_all": {}
},
"filter": {
"geo_bounding_box": {
"location": {
"top_left": {
"lat": "38.4864400000",
"lon": "106.2324800000"
},
"bottom_right": {
"lat": "28.6820200000",
"lon": "115.857940000"
}
}
}
}
}
}
}
"query": {
"bool": {
"must": {
"match_all": {}
},
"filter": {
"geo_bounding_box": {
"location": {
"top_left": {
"lat": "38.4864400000",
"lon": "106.2324800000"
},
"bottom_right": {
"lat": "28.6820200000",
"lon": "115.857940000"
}
}
}
}
}
}
}
geo_polygon query 多边形内的点
{
"query": {
"bool": {
"must": {
"match_all": {}
},
"filter": {
"geo_polygon": {
"location": {
"points": [
{
"lat": "40.84",
"lon": "111.75"
},
{
"lat": "29.56",
"lon": "106.55"
},
{
"lat": "31.23",
"lon": "121.47"
}
]
}
}
}
}
}
}
geo_shape query
1 put geoshape {"mappings":{"city":{"properties":{"name":{"type":"keyword"},"location":{"type":"geo_shape"}}}}}
2 put geoshape/city/1 {
"name": "西安-银川",
"location": {
"type": "linestring",
"coordinates": [
[
108.93984,
34.34127
],
[
113.6587142944,
"34.7447157466"
]
]
}
}
3 post geoshape/_search {
"query": {
"bool": {
"must": {
"match_all": {}
},
"filter": {
"geo_shape": {
"location": {
"shape": {
"type": "envelope",
"coordinates": [
[
106.23,
38.48
],
[
115.85,
28.868
]
]
},
"relation": "within"
}
}
}
}
}
}
特殊查询
more_like_this query 查询和提供文本类似的文档
post books/_search {"query":{"more_like_this":{'fields':["title", "description"], "like":"java virtual machine", "min_term_freq":1, "max_query_terms":12}}}
fields 要匹配的字段,默认_all
like 要匹配的文本
min_term_freq 文档中的词项的最低频率,默认为2
max_query_terms query 中包含的最大词项数目,默认25
min_doc_freq 最小的文档频率 默认5
max_doc_freq 最大的文档频率
min_word_length 单词的最小长度
max_word_length 最大单词长度
stop_words停用词列表
analyzer分词器
minimum_should_match 文档匹配的最小词项数。默认为query分词后的30%
boost_tems 词项的权重
include 是否把输入文档作为结果返回
boost 整个query的权重,默认为1.0
script query
post books/_search {"query":{"bool":{"must":[{"script":{"script":{"inline":"doc['price'].value > 80", "lang":"painless"}}} ]}}}
percolate query 先注册查询条件
搜索高亮
自定义高亮片段
post books/_search {"query":{"match":{"title":"javascript"}}, "highlight":{"fields":{"title":{"pre_tags":["<strong>"], "post_tags":["</strong>"]}}}}
多字段高亮
post books/_search {"query":{"match":{"titile":"javascrip"}}, "highlight":{"require_fields_match":false,"fields":{"title":{}, "description"":{}}}}
高亮性能分析
highlightter
postings-highlighterr
fast-vector-highlighter
搜索排序
默认排序
term
post books/_search {"query":{"term":{"title":"java"}}}
默认按照评分的降序排序
post book/_search {"query":{"term":{"title":"java"}}, "sort":[{"_score":{"order":"asc"}}]}
match_all (没有评分的)
post books/_search {"query":{"match_all":{}}, "sort":[{"_doc":{"order":"desc"}}]}
多字段排序
post books/_search {"sort":[{"price":{"order":"desc"}},{"year":{"order":"asc"}}]}
聚合分析
指标聚合
max aggregation
post books/_search {"size":0, "aggs":{"max_price":{"max":{"field":"pricce"}}}}
min aggregation
post books/_search {"size":0, "aggs":{"min_year":{"min":{"field":"publi_time"}}}}
avg aggregation
post books/_search {"szie":0, "aggs":{"avg_price":{"avg":{"filed":"price"}}}}
sum aggregation
post books/_search {"size":0, "aggs":{"sum_price":{"sum":{"field":"price"}}}}
cardinality aggregation (排重去求数量)
post books/_search {"size":0, "aggs":{"cardinality_lan":{"cardinality":{"field":"language"}}}}
stats aggregation (count,max,min,avg,sum)
post books/_search {"size":0, "aggs":{"stats_price":{"stats":{"field":"price"}}}}
extended stats aggregation (count,max,min,avg,sum, 方差,标准差。。。)
post books/_search {"size":0, "aggs":{"extended_stats_perice":{"field":"price"}}}
percentiles aggregation(百分位数)
post books/_search {"size":0, "aggs":{"percentiles_price":{"percentiles":{"field":"price"}}}}
value_count aggregation(按字段统计文档数量)
post books/_search {"size":0, "aggs":{"value_count_author":{"value_count":{"field":"author"}}}}
桶聚合
terms aggregation (分组聚合)
post books/_search {"size":0, "aggs":{"per_count":{"terms":{"field":"language"}}}}
aggs 嵌套
post books/_search {"size":0, "aggs":{"per_count":{"terms":{"field":"language"},"aggs":{"avg_price":{"avg":{"field":"price"}}}}}}
filter aggregation (查询聚合)
post books/_search {"size":0, "aggs":{"java_avg_price":{"filter":{"term":{"title":"java"}}, "aggs":{"avg_price":{"avg":{"field":"price"}}}}}}
filters aggregation(多查询聚合)
post books/_search {
"size": 0,
"aggs": {
"pro_avg_price": {
"filters": {
"filters": [
{
"match": {
"title": "java"
}
},
{
"match": {
"title": "python"
}
}
]
},
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
}
}
}
}
}
range aggregation
post books/_search {
"size": 0,
"aggs": {
"price_range": {
"range": {
"field": "price",
"ranges": [
{
"to": 50
},
{
"from": 50,
"to": 80
},
{
"from": 100
}
]
}
}
}
}
date range aggregation
post books/_search {
"size": 0,
"aggs": {
"range_price": {
"date_range": {
"field": "publi_time",
"format": "yyyy-MM-dd",
"ranges": [
{
"to": "now-24M/M"
},
{
"from": "now-24M/M"
}
]
}
}
}
}
date histogram aggregation(时间直方图集合)
post books/_search {"size":0, "aggs":{"date_pb":{"date_histogram":{"field":"publi_time", "interval":"month"}}}}
missing aggregation(空值聚合)
post books/_search {"size":0, "aggs":{"miss_price":{"missing":{"filed":"price"}}}}
children aggregation(父子关系聚合)
post company/_search {
"size": 0,
"aggs": {
"xx": {
"children": {
"type": "employee"
}
}
}
}
geo distance aggregation
post books/_search {
"size": 0,
"aggs": {
"city_from_xian": {
"geo_distance": {
"field": "location",
"origin": "34.34,108.93",
"unit": "km",
"ranges": [
{
"to": 500
},
{
"from": 500,
"to": 1000
},
{
"from": 1000
}
]
}
}
}
}
ip range aggregation
post ip/_search {}{
"size": 0,
"aggs": {
"ip_ranges": {
"ip_range": {
"field": "ip",
"ranges": [
{
"to": "10.0.0.5"
},
{
"from": "10.0.0.5"
}
]
}
}
}
}
注意事项
一个字段 可以都配置 两种 字段类型 keyword和text
{"query":{"term":{"carBrand.keyword":"一汽"}}}
这样就会 查询es的keyword的不分词记录
聚合后带出其他字段
需要再一次使用aggs嵌套
"aggs": {
"top_hits": {
"size": 1
}
}
"top_hits": {
"size": 1
}
}
完成例子
POST kaleido/_search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"term": {
"year_month": "201810"
}
},
{
"term": {
"car_brand.keyword": "大众"
}
},
{
"term": {
"factory.keyword": "上汽大众"
}
}
]
}
},
"aggs": {
"xx": {
"terms": {
"field": "city.keyword",
"size": 400
},
"aggs": {
"yy": {
"terms": {
"field": "sub_car_type.keyword",
"size": 400
},
"aggs": {
"sum_sale": {
"sum": {
"field": "sale_count"
}
},
"aggs": {
"top_hits": {
"size": 1
}
}
}
}
}
}
}
}
{
"size": 0,
"query": {
"bool": {
"must": [
{
"term": {
"year_month": "201810"
}
},
{
"term": {
"car_brand.keyword": "大众"
}
},
{
"term": {
"factory.keyword": "上汽大众"
}
}
]
}
},
"aggs": {
"xx": {
"terms": {
"field": "city.keyword",
"size": 400
},
"aggs": {
"yy": {
"terms": {
"field": "sub_car_type.keyword",
"size": 400
},
"aggs": {
"sum_sale": {
"sum": {
"field": "sale_count"
}
},
"aggs": {
"top_hits": {
"size": 1
}
}
}
}
}
}
}
}
索引管理
新建索引
put bolg
put bolg {"settings":{"number_of_shards":3, "number_of_replicas":0}}
索引为小写,不允许大写
索引名重复会报错
更新副本数量设置
put blog/_settings {"number_of_replicas": 2}
读写权限
bolcks.read_only:true 只读
blocks.read:true 禁止读
blocks.write:true 禁止写
put blog/_settings {"blocks.write": true}
blocks.write:false 开启写
put blog/_settings {"blocks.write": false}
查看索引
get blog/_settings
get blog,twitter/_settings
get _all/_settings
删除索引
delete bolg
索引的打开和关闭
post blog/_open
post blog/_close
post test1,test2/_close
post test1,test2/_open
post test1,test2,blog/_close?ignore_unavailable=true 忽略不存在的index
post _all/_open
post _all/_close
post test*/_close
复制索引
直接复制
post _reindex {"source":{"index", "blog"}, "dest":{"index":"blog_news"}}
查询复制
post _reindex {"source":{"index":"blog", "type":"artcile", "query":{"term":{"title":"git"}}}, "dest":{"index":"blog_news"}}
收缩索引
索引别名
get /test/_aliases
get /_aliases
文档管理
新建文档
put blog/article/1 {"xxx":x, "yyy":c} 指定id方式
post /blog/article {"xxx":x, "yyy":c} 自动生成id方式
获取文档
get blog/article/1
head blog/article/100 判断文档是否存在
MGET
get _mget {"docs":[{"_index":"blog", "_type":"article","_id":1},{"_index":"teitter", "_type":"tweet","_id":1} ]}
get blog/_mget {"docs":[{"_type":"typea", "_id":1}, {"_type":"typeb", "_id":2} ]}
get blog/article/_mget {"docs":[{"_id":1}, {"_id":2} ]}
get blog/article/_mget {"ids":[1, 2]}
更新文档
更新已存在文档的值
1 put test/type1/1 {"counter":1, "tags":["red"]}
2 post test/type1/1/_update {"script": {"inline":"ctx._source.counter += params.count"}, "lang":"painless", "params":{"count":4}}
inline 是执行脚本
ctx是脚本语言的一个执行对象
ctx._source,ctx._index,ctx._type,ctx._id,ctx._version,ctx._routing,ctx._parent
ctx是脚本语言的一个执行对象
ctx._source,ctx._index,ctx._type,ctx._id,ctx._version,ctx._routing,ctx._parent
painless是es内置的一个脚本语言
params是参数集合
3 post test/type1/1/_update {"script":{"inline": "ctx._source.tags.add(params.tag)"}, "lang":"painless", "params":{ "tag":"blue"}}
4 post test/type1/1/_update {"script":{"inline":" if(ctx._source.tag.contains(params.tag)){ctx.op=\"delete\"} else{ ctx.op=\"none\"}", "lang":"painless", "params":{"tag":"red"}}}
删除tags中包含"red"的文档,ctx.op="delete"为删除,ctx.opt="none"为什么也不做
新增\移除字段(自动产生maping的情况)
post test/type1/1/_update {"script": "ctx._source.new_field = \"value_of_new_field\""}
post test/type1/1/_update {"script": "ctx._source.remove(\"new_field\")"}
upsert 存在记录则更新,否则新建文档
post test/type1/1/_update {"script":{"inline":"ctx._source.counter += params.count"}, "lang":"painless", "params":{"count": 4}, "upsert":{"counter": 1}}
存在则执行脚本,否则新建这个文档,且新增counter字段
查询更新文档
post blog/_update_by_query {"script":{"inline":"ctx._source.category=params.categiry", "lang":"painless", "params":{"category":"git"}}, "query":{"term":{"title":"git"}}}
删除文档
delete blog/article/1
delete blog/article/1?routing=user123
如果在索引时指定了路由,删除时也可以增加路由参数
查询删除
post blog/_delete_by_query {"query":{"term":{"title":"hibernate"}}}
post blog/csdn/_delete_by_query {"query": "match_all":{}}
json 文件批量操作
1 创建JOSN文件,每行结尾必须显式\n结尾
2 {"index": {"_index":"blog", "_type":"article", "_id":"1"}}
{"title":"blog title"}
"create":{"_index":"blog", "_type":"article", "_id":"1"} //如果此ID已存在,create则会报错,index则不会
{"title":"blog title"}
{"index":{"_index":"blog", "_type":"article"}} //这样没有指定id,则会自动产生ID
{"title":"blog title"}
{"delete":{"_index":website, "_type":"article", "_id":"123"}}
{"update":{"_index":"website", "_type":"blog", "_id":"123", "_retry_on_conflict":3}}
{"doc":{"title":"My updated blog post"}}\r\n
3 curl -XPOST "http://localhost:9200/website/_bluk?pretty" --data-binary @blog.json
4 建议 每个bluk的大小在1-5M
版本控制
获取特定版本号
get website/blog/1?version=1
路由机制
主要 用来指定主分片地址
映射管理
获取映射
get blog/_mapping
设置映射(可以自主控制字段类型,分词器等)
设置 静态 Mapping
put books {"mappings":{"it":{"dynamic":"strict", "properties":{"title":{"type":"text"}, "publish_date":{"type":"date"}}}}}
在不存在index的情况下
其中 dynamic 的值可以为 true 自动添加,false 忽略新的字段,strict 发现新的字段抛出异常
put /my_index {"mappings":{"it":{"date_detection":false, "dynamic":"strict","properties":{"x1":{"type":"text"}}}}}
关闭日期格式检测
新增字段mapping
post sq_data/_mapping/kaleido {"properties":{"yearMonth2":{"type":"date", "format":"yyyy-mm-dd"}}}
然后在 使用 update_by_query 把一个字段的值放复制到另一个字段
设置静态映射(可以自主控制字段类型,分词器等)
可以在创建索引时手工指定索引映射
put my_index
{
"mappings":{
"user":{
"_all":{"enabled":false},
"properties":{
"title":{"type":"text"},
"name":{"type":"text"},
"age":{"type":"integer"}
}
},
"blogpost":{
"_all":{"enabled", false},
"properties":{
"title":{"type":"text"},
"body":{"type":"text"},
"user_id":{"type":"keyword"},
"created":{"type":"date", "format":"strict_date_optional_time || epoch_millis" }
}
}
}
}
{
"mappings":{
"user":{
"_all":{"enabled":false},
"properties":{
"title":{"type":"text"},
"name":{"type":"text"},
"age":{"type":"integer"}
}
},
"blogpost":{
"_all":{"enabled", false},
"properties":{
"title":{"type":"text"},
"body":{"type":"text"},
"user_id":{"type":"keyword"},
"created":{"type":"date", "format":"strict_date_optional_time || epoch_millis" }
}
}
}
}
日期检测
有时候可能需要关闭日期自动检测
put /my_index {"mappings":{"my_type":{"date_detection":false}}}
映射参数
analyzer
索引和查询的分词器
put my_index {"mappings":{"my_type":{"properties":{"content":{"type":"text", "analyzer":"ik_max_word"}}}}}
search_analyzer
查询分词器
normalizer
解析前的标准化配置
boost
设置字段权重
coerce
清除脏数据
copy_to
合并字段到新的字段
doc_value
用来控制是否 额外增加一个列式存储映射来加快排序和聚合操作
dynamic
是否可以动态新增字段
enabled
当一个字段没有索引和查询的必要时
fielddata
加速 text的聚合速度
format
设置日期格式
ignore_above
指定字段分词和索引的字符串的最大长度
ignore_malformed
忽略不规则数据
include_in_all
指定字段的值是否包含_all
index
index_options
fiels
norms
标准化文档
null_value
position_increate_gap
properties
similarity
store
term_vector
0 条评论
下一页