Follow

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use
Contact

How to disable auto-generation of default fields in ElasticSearch Query with TermsAggregationBuilder and QueryBuilders

I am migrating from ElasticSearch v1.0.0 to v7.13.1. I know that support for Type specification has been removed by ElasticSearch version beyond 7.0.0.
Also, there are certain improvements done by the ElasticSearch in terms of classes such as TermsAggregationBuilder replaces TermsBuilder.

But when I am preparing queries using QueryBuilders and AggregationBuilder, I could see some extra fields generated, that I don’t want.

Is there any way to avoid them programmatically?

MEDevel.com: Open-source for Healthcare and Education

Collecting and validating open-source software for healthcare, education, enterprise, development, medical imaging, medical records, and digital pathology.

Visit Medevel

Before

private TermsBuilder createAggreationsUriDetails() {
        return AggregationBuilders
                .terms(xxxxxxxx)...

After

private TermsAggregationBuilder createAggreationsUriDetails() {
        return AggregationBuilders
                .terms(ElasticConstants.URI)...

Also I am using matchQuery() to prepare match query with the upgraded ES version. Still I could see some extra fields. Same goes for Order as well.

Query comparison with old and new elasticsearch version

Before

{
  "size": 0,
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "uri.raw": {
              "query": "sample_uri",
              "type": "boolean"
            }
          }
        },
        {
          "range": {
            "@timestamp": {
              "from": 1655145000000,
              "to": 1655231400000,
              "include_lower": "true",
              "include_upper": "false",
              "format": "epoch_millis"
            }
          }
        }
      ]
    }
  },
  "aggs": {
    "uri": {
      "terms": {
        "field": "uri.raw",
        "size": 1,
        "order": {
          "_count": "desc"
        }
      },
      "aggregations": {
        "client_id": {
          "terms": {
            "field": "client_id",
            "size": 10000,
            "order": {
              "_count": "desc"
            }
          },
          "aggregations": {
            "response_code": {
              "terms": {
                "field": "response_code.raw",
                "size": 8,
                "order": {
                  "_count": "desc"
                }
              },
              "aggregations": {
                "datetime": {
                  "date_histogram": {
                    "field": "@timestamp",
                    "interval": "1m",
                    "min_doc_count": 1
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

Query developed with new ES version QueryBuilder

{
  "size": 0,
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "uri.raw": {
              "query": "sample_url",
              "operator": "OR",
              "prefix_length": 0,
              "max_expansions": 50,
              "fuzzy_transpositions": "true",
              "lenient": "false",
              "zero_terms_query": "NONE",
              "auto_generate_synonyms_phrase_query": "true",
              "boost": 1
            }
          }
        },
        {
          "range": {
            "@timestamp": {
              "from": 1655145000000,
              "to": 1655231400000,
              "include_lower": "true",
              "include_upper": "false",
              "format": "epoch_millis",
              "boost": 1
            }
          }
        }
      ],
      "adjust_pure_negative": "true",
      "boost": 1
    }
  },
  "aggs": {
    "uri": {
      "terms": {
        "field": "uri.raw",
        "size": 1,
        "min_doc_count": 1,
        "shard_min_doc_count": 0,
        "show_term_doc_count_error": "false",
        "order": [
          {
            "_count": "desc"
          },
          {
            "_key": "asc"
          }
        ]
      },
      "aggregations": {
        "client_id": {
          "terms": {
            "field": "client_id",
            "size": 10000,
            "min_doc_count": 1,
            "shard_min_doc_count": 0,
            "show_term_doc_count_error": "false",
            "order": [
              {
                "_count": "desc"
              },
              {
                "_key": "asc"
              }
            ]
          },
          "aggregations": {
            "response_code": {
              "terms": {
                "field": "response_code.raw",
                "size": 8,
                "min_doc_count": 1,
                "shard_min_doc_count": 0,
                "show_term_doc_count_error": "false",
                "order": [
                  {
                    "_count": "desc"
                  },
                  {
                    "_key": "asc"
                  }
                ]
              },
              "aggregations": {
                "datetime": {
                  "date_histogram": {
                    "field": "@timestamp",
                    "interval": "60000ms",
                    "offset": 0,
                    "order": {
                      "_key": "asc"
                    },
                    "keyed": "false",
                    "min_doc_count": 1
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

>Solution :

What you see as a extra fields are actually the params of the query for example match query in your 7.X looks like below:

"match": {
            "uri.raw": {
              "query": "sample_url",
              "operator": "OR",
              "prefix_length": 0,
              "max_expansions": 50,
              "fuzzy_transpositions": "true",
              "lenient": "false",
              "zero_terms_query": "NONE",
              "auto_generate_synonyms_phrase_query": "true",
              "boost": 1
            }
          }

These operator, prefix_length, lenient all are the params of match query and even if you don’t provide it will be added with their default values, these will be added at Elasticsearch side when you hit the query in JSON format without these params, so don’t worry about them, if you want you can change some of these params value to see the corresponding impact on your query results, like change operator to AND and number of search results for multi-terms will be less.

Note: You can also check the code of MatchQueryBuilder in the Elasticsearch code base to understand they are using the builder design pattern, and how they are passing the default values of the params.

Hope this helps.

Add a comment

Leave a Reply

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use

Discover more from Dev solutions

Subscribe now to keep reading and get access to the full archive.

Continue reading