java-ee – 在两个节点上导致相同弹性搜索查询的不同搜索结果的
我有一个双节点弹性搜索设置,其中一个节点上的相同搜索查询导致不同于另一个节点的结果,我想找出原因.细节:
>相同的文档(相同的内容和ID)在两个节点上具有不同的分数,从而导致不同的排序顺序. _cluster /状态: { "cluster_name": "elasticsearch.abc","version": 330,"master_node": "HexGKOoHSxqRaMmwduCVIA","blocks": {},"nodes": { "rUZDrUfMR1-RWcy4t0YQNw": { "name": "Owl","transport_address": "inet[/10.123.123.123:9303]","attributes": {} },"HexGKOoHSxqRaMmwduCVIA": { "name": "Bloodlust II","transport_address": "inet[/10.123.123.124:9303]","attributes": {} } },"metadata": { "templates": {},"indices": { "abc": { "state": "open","settings": { "index": { "creation_date": "1432297566361","uuid": "LKx6Ro9CRXq6JZ9a29jWeA","analysis": { "filter": { "substring": { "type": "nGram","min_gram": "1","max_gram": "50" } },"analyzer": { "str_index_analyzer": { "filter": [ "lowercase","substring" ],"tokenizer": "keyword" },"str_search_analyzer": { "filter": [ "lowercase" ],"tokenizer": "keyword" } } },"number_of_replicas": "1","number_of_shards": "5","version": { "created": "1050099" } } },"mappings": { "some_mapping": { ... } ... },"aliases": [] } } },"routing_table": { "indices": { "abc": { "shards": { "0": [ { "state": "STARTED","primary": true,"node": "HexGKOoHSxqRaMmwduCVIA","relocating_node": null,"shard": 0,"index": "abc" },{ "state": "STARTED","primary": false,"node": "rUZDrUfMR1-RWcy4t0YQNw","index": "abc" } ],"1": [ { "state": "STARTED","shard": 1,"2": [ { "state": "STARTED","shard": 2,"3": [ { "state": "STARTED","shard": 3,"4": [ { "state": "STARTED","shard": 4,"index": "abc" } ] } } } },"routing_nodes": { "unassigned": [],"nodes": { "HexGKOoHSxqRaMmwduCVIA": [ { "state": "STARTED","index": "abc" },{ "state": "STARTED","index": "abc" } ],"rUZDrUfMR1-RWcy4t0YQNw": [ { "state": "STARTED","index": "abc" } ] } },"allocations": [] } _cluster /健康 { "cluster_name": "elasticsearch.abc","status": "green","timed_out": false,"number_of_nodes": 2,"number_of_data_nodes": 2,"active_primary_shards": 5,"active_shards": 10,"relocating_shards": 0,"initializing_shards": 0,"unassigned_shards": 0,"number_of_pending_tasks": 0 } _cluster /统计 { "timestamp": 1432312770877,"cluster_name": "elasticsearch.abc","indices": { "count": 1,"shards": { "total": 10,"primaries": 5,"replication": 1,"index": { "shards": { "min": 10,"max": 10,"avg": 10 },"primaries": { "min": 5,"max": 5,"avg": 5 },"replication": { "min": 1,"max": 1,"avg": 1 } } },"docs": { "count": 19965,"deleted": 4 },"store": { "size_in_bytes": 399318082,"throttle_time_in_millis": 0 },"fielddata": { "memory_size_in_bytes": 60772,"evictions": 0 },"filter_cache": { "memory_size_in_bytes": 15284,"id_cache": { "memory_size_in_bytes": 0 },"completion": { "size_in_bytes": 0 },"segments": { "count": 68,"memory_in_bytes": 10079288,"index_writer_memory_in_bytes": 0,"index_writer_max_memory_in_bytes": 5120000,"version_map_memory_in_bytes": 0,"fixed_bit_set_memory_in_bytes": 0 },"percolate": { "total": 0,"time_in_millis": 0,"current": 0,"memory_size_in_bytes": -1,"memory_size": "-1b","queries": 0 } },"nodes": { "count": { "total": 2,"master_only": 0,"data_only": 0,"master_data": 2,"client": 0 },"versions": [ "1.5.0" ],"os": { "available_processors": 8,"mem": { "total_in_bytes": 0 },"cpu": [] },"process": { "cpu": { "percent": 0 },"open_file_descriptors": { "min": 649,"max": 654,"avg": 651 } },"jvm": { "max_uptime_in_millis": 2718272183,"versions": [ { "version": "1.7.0_40","vm_name": "Java HotSpot(TM) 64-Bit Server VM","vm_version": "24.0-b56","vm_vendor": "Oracle Corporation","count": 2 } ],"mem": { "heap_used_in_bytes": 2665186528,"heap_max_in_bytes": 4060086272 },"threads": 670 },"fs": { "total_in_bytes": 631353901056,"free_in_bytes": 209591468032,"available_in_bytes": 209591468032 },"plugins": [] } } 示例查询: /_search?from=22&size=1 { "query": { "bool": { "should": [{ "match": { "address.city": { "query": "Bremen","boost": 2 } } }],"must": [{ "match": { "type": "L" } }] } } } 对第一个请求的响应 { "took": 30,"_shards": { "total": 5,"successful": 5,"failed": 0 },"hits": { "total": 19543,"max_score": 6.407021,"hits": [{ "_index": "abc","_type": "xyz","_id": "ABC123","_score": 5.8341036,"_source": { ... } }] } } 对第二个请求的响应 { "took": 27,"failed": 0 },"hits": [ { "_index": "abc","_id": "FGH12343","_source": { ... } } ] } } 可能是什么原因导致我如何确保不同节点的结果相同? 按要求解释查询:search / abc / mytype / _search?from = 0& size = 1& search_type = dfs_query_then_fetch& explain = { "query": { "bool": { "should": [{ "match": { "address.city": { "query": "Karlsruhe","boost": 2 } } }] } } } 对第一个请求的响应 { "took": 5,"_shards": { "total": 5,"failed": 0 },"hits": { "total": 41,"max_score": 7.211497,"hits": [ { "_shard": 0,"_node": "rUZDrUfMR1-RWcy4t0YQNw","_index": "abc","_type": "mytype","_id": "abc123","_score": 7.211497,"_source": {... },"_explanation": { "value": 7.211497,"description": "weight(address.city:karlsruhe^2.0 in 1598) [PerFieldSimilarity],result of:","details": [ { "value": 7.211497,"description": "fieldWeight in 1598,product of:","details": [ { "value": 1,"description": "tf(freq=1.0),with freq of:","details": [ { "value": 1,"description": "termFreq=1.0" } ] },{ "value": 7.211497,"description": "idf(docFreq=46,maxDocs=23427)" },{ "value": 1,"description": "fieldNorm(doc=1598)" } ] } ] } } ] } } 对第二个请求的响应 { "took": 6,"max_score": 7.194322,"_score": 7.194322,"_explanation": { "value": 7.194322,"details": [ { "value": 7.194322,{ "value": 7.194322,"description": "idf(docFreq=48,maxDocs=24008)" },"description": "fieldNorm(doc=1598)" } ] } ] } } ] } } 解决方法
命中不匹配很可能是因为主分片和副本之间不同步.如果您有一个节点离开集群(无论出于何种原因)但仍继续对文档进行更改(索引,删除,更新),则会发生这种情况.
评分部分是一个不同的故事,可以通过this blog post的“相关性评分”部分来解释:
我会在搜索时尝试“DFS Query Then Fetch”,这意味着_search?search_type = dfs_query_then_fetch ….这应该有助于评分的准确性. 此外,在节点断开期间由文档更改导致的不同文档计数会影响甚至在删除和重建索引之后的分数计算.这可能是因为对副本和主分片上的文档更改发生了不同,更具体地说,文档已被删除.删除的文档将在合并时间的段中从索引中永久删除.除非在底层Lucene实例中满足某些条件,否则不会发生段合并. 强制合并可以通过POST启动/ _optimize?max_num_segments = 1.警告:这需要很长时间(取决于索引的大小),并且需要大量的IO资源和CPU,不应该在正在进行更改的索引上运行.文件:Optimize,Segments Merging (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |