
  • Post author:
  • Post category:其他






注意 在有分片的情况下 需要用routing来限制在一个shard里再搞,不然都搞不清楚什么情况

# 按照routing来索引
PUT /hjx_test_index/doc/1?routing=text1&refresh=true
{ "text" : "quick brown fox 11" }
PUT /hjx_test_index/doc/2?routing=text1&refresh=true
{ "text" : "bad fox" }
PUT /hjx_test_index/doc/3?routing=text1&refresh=true
{ "text" : "some test more some 35"

GET /hjx_test_index/doc/_search?explain=true&routing=text1
  "query": {
    "term": {
      "text": "fox"

GET /hjx_test_index/doc/_search?routing=text1

  "took": 1,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  "hits": {
    "total": 2,
    "max_score": 1.2310667,
    "hits": [
        "_shard": "[hjx_test_index][4]",
        "_node": "JUltA86XS-WNJ3baNQWlcg",
        "_index": "hjx_test_index",
        "_type": "doc",
        "_id": "2",
        "_score": 1.2310667,
        "_routing": "text1",
        "_source": {
          "text": "bad fox"
        "_explanation": {
          "value": 1.2310667,
          "description": "weight(text:fox in 0) [PerFieldSimilarity], result of:",
          "details": [
              "value": 1.2310667,
              "description": "score(doc=0,freq=1.0 = termFreq=1.0\n), product of:",
              "details": [
                  "value": 1.0296195,
                  "description": "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
                  "details": [
                      "value": 2,
                      "description": "docFreq",
                      "details": []
                      "value": 6,
                      "description": "docCount",
                      "details": []
                  "value": 1.1956521,
                  "description": "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:",
                  "details": [
                      "value": 1,
                      "description": "termFreq=1.0",
                      "details": []
                      "value": 1.2,
                      "description": "parameter k1",
                      "details": []
                      "value": 0.75,
                      "description": "parameter b",
                      "details": []
                      "value": 3.3333333,
                      "description": "avgFieldLength",
                      "details": []
                      "value": 2,
                      "description": "fieldLength",
                      "details": []
        "_shard": "[hjx_test_index][4]",
        "_node": "JUltA86XS-WNJ3baNQWlcg",
        "_index": "hjx_test_index",
        "_type": "doc",
        "_id": "1",
        "_score": 0.9517491,
        "_routing": "text1",
        "_source": {
          "text": "quick brown fox 11"
        "_explanation": {
          "value": 0.9517491,
          "description": "weight(text:fox in 0) [PerFieldSimilarity], result of:",
          "details": [
              "value": 0.9517491,
              "description": "score(doc=0,freq=1.0 = termFreq=1.0\n), product of:",
              "details": [
                  "value": 1.0296195,
                  "description": "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
                  "details": [
                      "value": 2,
                      "description": "docFreq",
                      "details": []
                      "value": 6,
                      "description": "docCount",
                      "details": []
                  "value": 0.92436975,
                  "description": "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:",
                  "details": [
                      "value": 1,
                      "description": "termFreq=1.0",
                      "details": []
                      "value": 1.2,
                      "description": "parameter k1",
                      "details": []
                      "value": 0.75,
                      "description": "parameter b",
                      "details": []
                      "value": 3.3333333,
                      "description": "avgFieldLength",
                      "details": []
                      "value": 4,
                      "description": "fieldLength",
                      "details": []



最终是三个因素的乘积(tf * idf * fieldNorm)



  "took": 1,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  "hits": {
    "total": 3,
    "max_score": 1.5924733,
    "hits": [
        "_shard": "[hjx_test_index][4]",
        "_node": "JUltA86XS-WNJ3baNQWlcg",
        "_index": "hjx_test_index",
        "_type": "doc",
        "_id": "1",
        "_score": 1.5924733,
        "_routing": "text1",
        "_source": {
          "text": "quick brown fox 11"
        "_explanation": {
          "value": 1.5924734,
          "description": "sum of:",
          "details": [
              "value": 0.9517491,
              "description": "weight(text:quick in 0) [PerFieldSimilarity], result of:",
              "details": [
                  "value": 0.9517491,
                  "description": "score(doc=0,freq=1.0 = termFreq=1.0\n), product of:",
                  "details": [
                      "value": 1.0296195,
                      "description": "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
                      "details": [
                          "value": 2,
                          "description": "docFreq",
                          "details": []
                          "value": 6,
                          "description": "docCount",
                          "details": []
                      "value": 0.92436975,
                      "description": "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:",
                      "details": [
                          "value": 1,
                          "description": "termFreq=1.0",
                          "details": []
                          "value": 1.2,
                          "description": "parameter k1",
                          "details": []
                          "value": 0.75,
                          "description": "parameter b",
                          "details": []
                          "value": 3.3333333,
                          "description": "avgFieldLength",
                          "details": []
                          "value": 4,
                          "description": "fieldLength",
                          "details": []
              "value": 0.6407243,
              "description": "weight(text:fox in 0) [PerFieldSimilarity], result of:",
              "details": [
                  "value": 0.6407243,
                  "description": "score(doc=0,freq=1.0 = termFreq=1.0\n), product of:",
                  "details": [
                      "value": 0.6931472,
                      "description": "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
                      "details": [
                          "value": 3,
                          "description": "docFreq",
                          "details": []
                          "value": 6,
                          "description": "docCount",
                          "details": []
                      "value": 0.92436975,
                      "description": "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:",
                      "details": [
                          "value": 1,
                          "description": "termFreq=1.0",
                          "details": []
                          "value": 1.2,
                          "description": "parameter k1",
                          "details": []
                          "value": 0.75,
                          "description": "parameter b",
                          "details": []
                          "value": 3.3333333,
                          "description": "avgFieldLength",
                          "details": []
                          "value": 4,
                          "description": "fieldLength",
                          "details": []
        "_shard": "[hjx_test_index][4]",
        "_node": "JUltA86XS-WNJ3baNQWlcg",
        "_index": "hjx_test_index",
        "_type": "doc",
        "_id": "3",
        "_score": 1.4302213,
        "_routing": "text1",
        "_source": {
          "text": "quick fox more some 35"
        "_explanation": {
          "value": 1.4302213,
          "description": "sum of:",
          "details": [
              "value": 0.8547784,
              "description": "weight(text:quick in 0) [PerFieldSimilarity], result of:",
              "details": [
                  "value": 0.8547784,
                  "description": "score(doc=0,freq=1.0 = termFreq=1.0\n), product of:",
                  "details": [
                      "value": 1.0296195,
                      "description": "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
                      "details": [
                          "value": 2,
                          "description": "docFreq",
                          "details": []
                          "value": 6,
                          "description": "docCount",
                          "details": []
                      "value": 0.8301887,
                      "description": "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:",
                      "details": [
                          "value": 1,
                          "description": "termFreq=1.0",
                          "details": []
                          "value": 1.2,
                          "description": "parameter k1",
                          "details": []
                          "value": 0.75,
                          "description": "parameter b",
                          "details": []
                          "value": 3.3333333,
                          "description": "avgFieldLength",
                          "details": []
                          "value": 5,
                          "description": "fieldLength",
                          "details": []
              "value": 0.57544297,
              "description": "weight(text:fox in 0) [PerFieldSimilarity], result of:",
              "details": [
                  "value": 0.57544297,
                  "description": "score(doc=0,freq=1.0 = termFreq=1.0\n), product of:",
                  "details": [
                      "value": 0.6931472,
                      "description": "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
                      "details": [
                          "value": 3,
                          "description": "docFreq",
                          "details": []
                          "value": 6,
                          "description": "docCount",
                          "details": []
                      "value": 0.8301887,
                      "description": "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:",
                      "details": [
                          "value": 1,
                          "description": "termFreq=1.0",
                          "details": []
                          "value": 1.2,
                          "description": "parameter k1",
                          "details": []
                          "value": 0.75,
                          "description": "parameter b",
                          "details": []
                          "value": 3.3333333,
                          "description": "avgFieldLength",
                          "details": []
                          "value": 5,
                          "description": "fieldLength",
                          "details": []
        "_shard": "[hjx_test_index][4]",
        "_node": "JUltA86XS-WNJ3baNQWlcg",
        "_index": "hjx_test_index",
        "_type": "doc",
        "_id": "2",
        "_score": 0.8287629,
        "_routing": "text1",
        "_source": {
          "text": "bad fox"
        "_explanation": {
          "value": 0.8287629,
          "description": "sum of:",
          "details": [
              "value": 0.8287629,
              "description": "weight(text:fox in 0) [PerFieldSimilarity], result of:",
              "details": [
                  "value": 0.8287629,
                  "description": "score(doc=0,freq=1.0 = termFreq=1.0\n), product of:",
                  "details": [
                      "value": 0.6931472,
                      "description": "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
                      "details": [
                          "value": 3,
                          "description": "docFreq",
                          "details": []
                          "value": 6,
                          "description": "docCount",
                          "details": []
                      "value": 1.1956521,
                      "description": "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:",
                      "details": [
                          "value": 1,
                          "description": "termFreq=1.0",
                          "details": []
                          "value": 1.2,
                          "description": "parameter k1",
                          "details": []
                          "value": 0.75,
                          "description": "parameter b",
                          "details": []
                          "value": 3.3333333,
                          "description": "avgFieldLength",
                          "details": []
                          "value": 2,
                          "description": "fieldLength",
                          "details": []








Lucene scoring uses a combination of the Vector Space Model (VSM) of Information Retrieval and the Boolean model to determine how relevant a given Document is to a User’s query. In general, the idea behind the VSM is the more times a query term appears in a document relative to the number of times the term appears in all the documents in the collection, the more relevant that document is to the query. It uses the Boolean model to first narrow down the documents that need to be scored based on the use of boolean logic in the Query specification. Lucene also adds some capabilities and refinements onto this model to support boolean and fuzzy searching, but it essentially remains a VSM based system at the heart. For some valuable references on VSM and IR in general refer to the Lucene Wiki IR references.


版权声明:本文为waltonhuang原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。