how to set word variations

  • wpsolr
    Keymaster
    1 year, 5 months ago #31398

    Here is a workaround to configure a synonyms file on a WPSOLR index. You will need to reindex all your data after this procedure, and every time you update the synonyms file.

    1. Install your synonym.txt file in your Elasticsearch server’s elasticsearch/config/analysis/ folder
    2. Create a new index from Kibana with the command:

    PUT /my_new_index
    {
      "settings": {
        "analysis": {
          "filter": {
            "english_stop": {
              "type": "stop",
              "stopwords": "_english_"
            },
            "english_stemmer": {
              "type": "stemmer",
              "language": "english"
            },
            "english_possessive_stemmer": {
              "type": "stemmer",
              "language": "possessive_english"
            },
            "synonym": {
              "type": "synonym_graph",
              "synonyms_path": "analysis/synonym.txt",
              "updateable": false
            },
            "stemmer": {
              "type": "stemmer",
              "language": "english"
            },
            "autocompleteFilter": {
              "max_shingle_size": "4",
              "min_shingle_size": "2",
              "type": "shingle"
            },
            "stopwords": {
              "type": "stop",
              "stopwords": [
                "_english_"
              ]
            }
          },
          "analyzer": {
            "english_with_synonym": {
              "type": "custom",
              "tokenizer": "standard",
              "filter": [
                "english_possessive_stemmer",
                "lowercase",
                "english_stop",
                "english_stemmer",
                "synonym"
              ]
            },
            "wpsolr_analyser_did_you_mean": {
              "filter": [
                "lowercase"
              ],
              "char_filter": [
                "html_strip"
              ],
              "type": "custom",
              "tokenizer": "standard"
            },
            "wpsolr_analyser_autocomplete": {
              "filter": [
                "lowercase",
                "autocompleteFilter"
              ],
              "char_filter": [
                "html_strip"
              ],
              "type": "custom",
              "tokenizer": "standard"
            },
            "default": {
              "type": "english"
            },
            "default_search": {
              "type": "english"
            }
          }
        }
      },
      "mappings": {
        "dynamic_templates": [
          {
            "text_en": {
              "match": "text",
              "mapping": {
                "type": "text"
              }
            }
          },
          {
            "*_i": {
              "match": "*_i",
              "mapping": {
                "type": "long"
              }
            }
          },
          {
            "*_s": {
              "match": "*_s",
              "mapping": {
                "type": "keyword"
              }
            }
          },
          {
            "*_str": {
              "match": "*_str",
              "mapping": {
                "type": "keyword"
              }
            }
          },
          {
            "*_l": {
              "match": "*_l",
              "mapping": {
                "type": "long"
              }
            }
          },
          {
            "*_t": {
              "match": "*_t",
              "mapping": {
                "type": "text",
                "analyzer": "english_with_synonym"
              }
            }
          },
          {
            "*_b": {
              "match": "*_b",
              "mapping": {
                "type": "boolean"
              }
            }
          },
          {
            "*_f": {
              "match": "*_f",
              "mapping": {
                "type": "float"
              }
            }
          },
          {
            "*_d": {
              "match": "*_d",
              "mapping": {
                "type": "double"
              }
            }
          },
          {
            "*_dt": {
              "match": "*_dt",
              "mapping": {
                "type": "date"
              }
            }
          },
          {
            "*_ll": {
              "match": "*_ll",
              "mapping": {
                "type": "geo_point"
              }
            }
          },
          {
            "*_nobj": {
              "match": "*_nobj",
              "mapping": {
                "type": "nested"
              }
            }
          }
        ],
        "properties": {
          "id": {
            "type": "keyword"
          },
          "PID": {
            "type": "keyword"
          },
          "blogid": {
            "type": "keyword"
          },
          "blogdomain": {
            "type": "keyword"
          },
          "blogpath": {
            "type": "keyword"
          },
          "wp": {
            "type": "keyword"
          },
          "permalink": {
            "type": "keyword",
            "copy_to": [
              "text"
            ]
          },
          "title": {
            "type": "text",
            "analyzer": "english_with_synonym",
            "copy_to": [
              "text",
              "spell",
              "autocomplete"
            ]
          },
          "content": {
            "type": "text",
            "analyzer": "english_with_synonym",
            "copy_to": [
              "text",
              "spell",
              "autocomplete"
            ]
          },
          "numcomments": {
            "type": "integer"
          },
          "comments": {
            "type": "text",
            "analyzer": "english_with_synonym"
          },
          "categories": {
            "type": "text",
            "analyzer": "english_with_synonym",
            "copy_to": [
              "text",
              "spell",
              "autocomplete"
            ]
          },
          "categoriessrch": {
            "type": "text",
            "analyzer": "english_with_synonym"
          },
          "tags": {
            "type": "keyword",
            "copy_to": [
              "text",
              "spell",
              "autocomplete"
            ]
          },
          "tagssrch": {
            "type": "text",
            "analyzer": "english_with_synonym"
          },
          "author": {
            "type": "keyword"
          },
          "type": {
            "type": "keyword"
          },
          "date": {
            "type": "date"
          },
          "modified": {
            "type": "date"
          },
          "displaydate": {
            "type": "date"
          },
          "displaymodified": {
            "type": "date"
          },
          "spell": {
            "type": "text",
            "analyzer": "wpsolr_analyser_did_you_mean"
          },
          "autocomplete": {
            "type": "completion",
            "contexts": [
              {
                "name": "context_type",
                "type": "category",
                "path": "type"
              },
              {
                "name": "context_blog_name",
                "type": "category",
                "path": "blog_name_str"
              }
            ]
          },
          "text": {
            "type": "text",
            "analyzer": "english_with_synonym"
          }
        }
      }
    }

    3. Create your index in WPSOLR with the same index name my_new_index
    4. Configure your index in WPSOLR
    5. Index your data in WPSOLR

Viewing 16 post (of 16 total)

You must be logged in to reply to this topic.