From f7e43cf0052ef8ba0d85307153e2718ed5934664 Mon Sep 17 00:00:00 2001 From: Shinsuke Sugaya <shinsuke@yahoo.co.jp> Date: Wed, 23 Sep 2015 08:41:06 +0900 Subject: [PATCH] modify index mapping --- pom.xml | 18 +- .../codelibs/fess/client/FessEsClient.java | 17 +- .../org/codelibs/fess/helper/QueryHelper.java | 42 +- .../codelibs/fess/util/QueryResponseList.java | 2 +- src/main/resources/esclient.xml | 4 +- src/main/resources/fess_indices/fess.json | 59 ++ src/main/resources/fess_indices/fess/doc.json | 590 +++++++++++++++--- .../fess_indices/fess/{ja => }/synonym.txt | 0 8 files changed, 620 insertions(+), 112 deletions(-) rename src/main/resources/fess_indices/fess/{ja => }/synonym.txt (100%) diff --git a/pom.xml b/pom.xml index 2ffde27dd..786ede705 100644 --- a/pom.xml +++ b/pom.xml @@ -62,7 +62,7 @@ <pdfbox.version>1.8.7</pdfbox.version> <!-- Elasticsearch --> - <elasticsearch.version>1.7.1</elasticsearch.version> + <elasticsearch.version>1.7.2</elasticsearch.version> <cluster.runner.version>1.7.0.0</cluster.runner.version> <!-- Tomcat --> @@ -232,16 +232,24 @@ <mkdir dir="${basedir}/target/plugins" /> <get dest="${basedir}/target/plugins"> <url - url="${maven.snapshot.repo.url}/org/codelibs/elasticsearch-configsync/1.6.0-SNAPSHOT/elasticsearch-configsync-1.6.0-20150716.075532-2.zip" /> + url="${maven.release.repo.url}/org/codelibs/elasticsearch-analysis-kuromoji-neologd/1.7.1/elasticsearch-analysis-kuromoji-neologd-1.7.1.zip" /> <url - url="${maven.release.repo.url}/org/codelibs/elasticsearch-analysis-kuromoji-neologd/1.6.0/elasticsearch-analysis-kuromoji-neologd-1.6.0.zip" /> + url="${maven.release.repo.url}/org/codelibs/elasticsearch-analysis-synonym/1.5.0/elasticsearch-analysis-synonym-1.5.0.zip" /> + <url + url="${maven.snapshot.repo.url}/org/codelibs/elasticsearch-configsync/1.6.0-SNAPSHOT/elasticsearch-configsync-1.6.0-20150820.025903-5.zip" /> + <url + url="${maven.snapshot.repo.url}/org/codelibs/elasticsearch-langfield/1.7.0-SNAPSHOT/elasticsearch-langfield-1.7.0-20150922.221718-3.zip" /> </get> <delete dir="${basedir}/plugins" /> <mkdir dir="${basedir}/plugins" /> <unzip dest="${basedir}/plugins/analysis-kuromoji-neologd" - src="${basedir}/target/plugins/elasticsearch-analysis-kuromoji-neologd-1.6.0.zip" /> + src="${basedir}/target/plugins/elasticsearch-analysis-kuromoji-neologd-1.7.1.zip" /> + <unzip dest="${basedir}/plugins/analysis-synonym" + src="${basedir}/target/plugins/elasticsearch-analysis-synonym-1.5.0.zip" /> <unzip dest="${basedir}/plugins/configsync" - src="${basedir}/target/plugins/elasticsearch-configsync-1.6.0-20150716.075532-2.zip" /> + src="${basedir}/target/plugins/elasticsearch-configsync-1.6.0-20150820.025903-5.zip" /> + <unzip dest="${basedir}/plugins/langfield" + src="${basedir}/target/plugins/elasticsearch-langfield-1.7.0-20150922.221718-3.zip" /> </tasks> </configuration> <goals> diff --git a/src/main/java/org/codelibs/fess/client/FessEsClient.java b/src/main/java/org/codelibs/fess/client/FessEsClient.java index c71e3b237..2c14a0721 100644 --- a/src/main/java/org/codelibs/fess/client/FessEsClient.java +++ b/src/main/java/org/codelibs/fess/client/FessEsClient.java @@ -314,6 +314,15 @@ public class FessEsClient implements Client { logger.warn("Failed to register " + filePath, e); } }); + try (CurlResponse response = Curl.post(runner.node(), "_configsync/flush").execute()) { + if (response.getHttpStatusCode() == 200) { + logger.info("Flushed config files."); + } else { + logger.warn("Failed to flush config files."); + } + } catch (final Exception e) { + logger.warn("Failed to flush config files.", e); + } } try { @@ -719,10 +728,10 @@ public class FessEsClient implements Client { } } // highlighting - if (ComponentUtil.getQueryHelper().getHighlightingFields() != null - && ComponentUtil.getQueryHelper().getHighlightingFields().length != 0) { - for (final String hf : ComponentUtil.getQueryHelper().getHighlightingFields()) { - searchRequestBuilder.addHighlightedField(hf, ComponentUtil.getQueryHelper().getHighlightSnippetSize()); + if (ComponentUtil.getQueryHelper().getHighlightedFields() != null + && ComponentUtil.getQueryHelper().getHighlightedFields().length != 0) { + for (final String hf : ComponentUtil.getQueryHelper().getHighlightedFields()) { + searchRequestBuilder.addHighlightedField(hf, ComponentUtil.getQueryHelper().getHighlightFragmentSize()); } } diff --git a/src/main/java/org/codelibs/fess/helper/QueryHelper.java b/src/main/java/org/codelibs/fess/helper/QueryHelper.java index e4c392dd6..c016d8bc0 100644 --- a/src/main/java/org/codelibs/fess/helper/QueryHelper.java +++ b/src/main/java/org/codelibs/fess/helper/QueryHelper.java @@ -89,7 +89,7 @@ public class QueryHelper implements Serializable { protected String[] responseDocValuesFields; - protected String[] highlightingFields; + protected String[] highlightedFields; protected String[] searchFields; @@ -103,7 +103,7 @@ public class QueryHelper implements Serializable { protected String[] supportedAnalysisFields; - protected int highlightSnippetSize = 5; + protected int highlightFragmentSize = 100; protected boolean useBigram = true; @@ -121,7 +121,7 @@ public class QueryHelper implements Serializable { protected List<SortField> defaultSortFieldList = new ArrayList<SortField>(); - protected String highlightingPrefix = "hl_"; + protected String highlightPrefix = "hl_"; protected String minimumShouldMatch = "100%"; @@ -158,8 +158,8 @@ public class QueryHelper implements Serializable { if (responseDocValuesFields == null) { responseDocValuesFields = new String[] { fieldHelper.clickCountField, fieldHelper.favoriteCountField }; } - if (highlightingFields == null) { - highlightingFields = new String[] { fieldHelper.contentField }; + if (highlightedFields == null) { + highlightedFields = new String[] { fieldHelper.contentField }; } if (searchFields == null) { searchFields = @@ -1112,17 +1112,17 @@ public class QueryHelper implements Serializable { } /** - * @return the highlightingFields + * @return the highlightedFields */ - public String[] getHighlightingFields() { - return highlightingFields; + public String[] getHighlightedFields() { + return highlightedFields; } /** - * @param highlightingFields the highlightingFields to set + * @param highlightedFields the highlightedFields to set */ - public void setHighlightingFields(final String[] highlightingFields) { - this.highlightingFields = highlightingFields; + public void setHighlightedFields(final String[] highlightedFields) { + this.highlightedFields = highlightedFields; } /** @@ -1186,17 +1186,17 @@ public class QueryHelper implements Serializable { } /** - * @return the highlightSnippetSize + * @return the highlightFragmentSize */ - public int getHighlightSnippetSize() { - return highlightSnippetSize; + public int getHighlightFragmentSize() { + return highlightFragmentSize; } /** - * @param highlightSnippetSize the highlightSnippetSize to set + * @param highlightFragmentSize the highlightFragmentSize to set */ - public void setHighlightSnippetSize(final int highlightSnippetSize) { - this.highlightSnippetSize = highlightSnippetSize; + public void setHighlightFragmentSize(final int highlightFragmentSize) { + this.highlightFragmentSize = highlightFragmentSize; } /** @@ -1290,12 +1290,12 @@ public class QueryHelper implements Serializable { return defaultSortFieldList.toArray(new SortField[defaultSortFieldList.size()]); } - public void setHighlightingPrefix(final String highlightingPrefix) { - this.highlightingPrefix = highlightingPrefix; + public void setHighlightPrefix(final String highlightPrefix) { + this.highlightPrefix = highlightPrefix; } - public String getHighlightingPrefix() { - return highlightingPrefix; + public String getHighlightPrefix() { + return highlightPrefix; } public String[] getSupportedMltFields() { diff --git a/src/main/java/org/codelibs/fess/util/QueryResponseList.java b/src/main/java/org/codelibs/fess/util/QueryResponseList.java index 4eeda36bb..4fd6378d6 100644 --- a/src/main/java/org/codelibs/fess/util/QueryResponseList.java +++ b/src/main/java/org/codelibs/fess/util/QueryResponseList.java @@ -97,7 +97,7 @@ public class QueryResponseList implements List<Map<String, Object>> { // build highlighting fields final QueryHelper queryHelper = ComponentUtil.getQueryHelper(); ComponentUtil.getFieldHelper(); - final String hlPrefix = queryHelper.getHighlightingPrefix(); + final String hlPrefix = queryHelper.getHighlightPrefix(); for (final SearchHit searchHit : searchHits.getHits()) { final Map<String, Object> docMap = new HashMap<String, Object>(); if (searchHit.getSource() == null) { diff --git a/src/main/resources/esclient.xml b/src/main/resources/esclient.xml index 7b796facb..48843aef8 100644 --- a/src/main/resources/esclient.xml +++ b/src/main/resources/esclient.xml @@ -9,11 +9,11 @@ <!-- Dictionaries --> <postConstruct name="addConfigFile"> <arg>"fess"</arg> - <arg>"ja/mapping.txt"</arg> + <arg>"synonym.txt"</arg> </postConstruct> <postConstruct name="addConfigFile"> <arg>"fess"</arg> - <arg>"ja/synonym.txt"</arg> + <arg>"ja/mapping.txt"</arg> </postConstruct> <postConstruct name="addConfigFile"> <arg>"fess"</arg> diff --git a/src/main/resources/fess_indices/fess.json b/src/main/resources/fess_indices/fess.json index 095b0fe6a..cc93f103d 100644 --- a/src/main/resources/fess_indices/fess.json +++ b/src/main/resources/fess_indices/fess.json @@ -4,6 +4,65 @@ "refresh_interval": "1s", "number_of_shards": 5, "number_of_replicas": 0 + }, + "analysis": { + "char_filter": { + "mapping_ja_cfilter": { + "type": "mapping", + "mappings_path": "ja/mapping.txt" + } + }, + "filter": { + "stemmer_en_tfilter": { + "type": "stemmer", + "name": "english" + } + }, + "tokenizer": { + "kuromoji_neologd_tokenizer": { + "type": "reloadable_kuromoji_neologd_tokenizer", + "mode": "normal", + "user_dictionary": "ja/kuromoji.txt", + "discard_punctuation": false, + "reload_interval":"1m" + }, + "2gram_synonym_tokenizer": { + "type": "ngram_synonym", + "n": "2", + "synonyms_path": "synonym.txt", + "dynamic_reload":true, + "reload_interval":"1m" + } + }, + "analyzer": { + "japanese_analyzer": { + "type": "custom", + "char_filter": [ + "mapping_ja_cfilter", + "kuromoji_neologd_iteration_mark" + ], + "tokenizer": "kuromoji_neologd_tokenizer", + "filter": [ + "kuromoji_neologd_baseform", + "kuromoji_neologd_stemmer", + "kuromoji_neologd_part_of_speech" + ] + }, + "english_analyzer": { + "type": "custom", + "tokenizer": "standard", + "filter": [ + "stemmer_en_tfilter" + ] + }, + "standard_analyzer": { + "type": "custom", + "char_filter": [ + "mapping_ja_cfilter" + ], + "tokenizer": "2gram_synonym_tokenizer" + } + } } } } diff --git a/src/main/resources/fess_indices/fess/doc.json b/src/main/resources/fess_indices/fess/doc.json index c9f96ebd0..72cccf11b 100644 --- a/src/main/resources/fess_indices/fess/doc.json +++ b/src/main/resources/fess_indices/fess/doc.json @@ -1,84 +1,516 @@ { - "doc" : { - "_all" : { - "enabled" : false - }, - "_id" : { - "path" : "id" - }, - "properties" : { - "anchor" : { - "type" : "string", - "index" : "not_analyzed" - }, - "boost" : { - "type" : "float" - }, - "click_count" : { - "type" : "long" - }, - "config_id" : { - "type" : "string", - "index" : "not_analyzed" - }, - "content" : { - "type" : "string" - }, - "content_length" : { - "type" : "long" - }, - "created" : { - "type" : "long" - }, - "digest" : { - "type" : "string" - }, - "doc_id" : { - "type" : "string", - "index" : "not_analyzed" - }, - "favorite_count" : { - "type" : "long" - }, - "filetype" : { - "type" : "string", - "index" : "not_analyzed" - }, - "host" : { - "type" : "string", - "index" : "not_analyzed" - }, - "id" : { - "type" : "string", - "index" : "not_analyzed" - }, - "lang" : { - "type" : "string", - "index" : "not_analyzed" - }, - "mimetype" : { - "type" : "string", - "index" : "not_analyzed" - }, - "parent_id" : { - "type" : "string", - "index" : "not_analyzed" - }, - "segment" : { - "type" : "string", - "index" : "not_analyzed" - }, - "site" : { - "type" : "string", - "index" : "not_analyzed" - }, - "title" : { - "type" : "string" - }, - "url" : { - "type" : "string", - "index" : "not_analyzed" + "doc": { + "_all": { + "enabled": false + }, + "_source": { + "enabled": true + }, + "_id": { + "path": "id" + }, + "dynamic_templates": [ + { + "lang_ar": { + "match": "*_ar", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" } } + }, + { + "lang_bg": { + "match": "*_bg", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_bn": { + "match": "*_bn", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_ca": { + "match": "*_ca", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_cs": { + "match": "*_cs", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_da": { + "match": "*_da", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_de": { + "match": "*_de", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_el": { + "match": "*_el", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_en": { + "match": "*_en", + "mapping": { + "type": "string", + "analyzer": "english_analyzer" + } + } + }, + { + "lang_es": { + "match": "*_es", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_et": { + "match": "*_et", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_fa": { + "match": "*_fa", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_fi": { + "match": "*_fi", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_fr": { + "match": "*_fr", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_gu": { + "match": "*_gu", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_he": { + "match": "*_he", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_hi": { + "match": "*_hi", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_hr": { + "match": "*_hr", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_hu": { + "match": "*_hu", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_id": { + "match": "*_id", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_it": { + "match": "*_it", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_ja": { + "match": "*_ja", + "mapping": { + "type": "string", + "analyzer": "japanese_analyzer" + } + } + }, + { + "lang_ko": { + "match": "*_ko", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_lt": { + "match": "*_lt", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_lv": { + "match": "*_lv", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_mk": { + "match": "*_mk", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_ml": { + "match": "*_ml", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_nl": { + "match": "*_nl", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_no": { + "match": "*_no", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_pa": { + "match": "*_pa", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_pl": { + "match": "*_pl", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_pt": { + "match": "*_pt", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_ro": { + "match": "*_ro", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_ru": { + "match": "*_ru", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_si": { + "match": "*_si", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_sq": { + "match": "*_sq", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_sv": { + "match": "*_sv", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_ta": { + "match": "*_ta", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_te": { + "match": "*_te", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_th": { + "match": "*_th", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_tl": { + "match": "*_tl", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_tr": { + "match": "*_tr", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_uk": { + "match": "*_uk", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_ur": { + "match": "*_ur", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_vi": { + "match": "*_vi", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_zh-cn": { + "match": "*_zh-cn", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + }, + { + "lang_zh-tw": { + "match": "*_zh-tw", + "mapping": { + "type": "string", + "analyzer": "standard_analyzer" + } + } + } + ], + "properties": { + "anchor": { + "type": "string", + "index": "not_analyzed" + }, + "boost": { + "type": "float" + }, + "click_count": { + "type": "long" + }, + "config_id": { + "type": "string", + "index": "not_analyzed" + }, + "content": { + "type": "langstring", + "analyzer": "standard_analyzer", + "term_vector": "with_positions_offsets" + }, + "content_length": { + "type": "long" + }, + "created": { + "type": "long" + }, + "digest": { + "type": "string" + }, + "doc_id": { + "type": "string", + "index": "not_analyzed" + }, + "favorite_count": { + "type": "long" + }, + "filetype": { + "type": "string", + "index": "not_analyzed" + }, + "host": { + "type": "string", + "index": "not_analyzed" + }, + "id": { + "type": "string", + "index": "not_analyzed" + }, + "lang": { + "type": "string", + "index": "not_analyzed" + }, + "mimetype": { + "type": "string", + "index": "not_analyzed" + }, + "parent_id": { + "type": "string", + "index": "not_analyzed" + }, + "segment": { + "type": "string", + "index": "not_analyzed" + }, + "site": { + "type": "string", + "index": "not_analyzed" + }, + "title": { + "type": "langstring", + "analyzer": "standard_analyzer", + "term_vector": "with_positions_offsets" + }, + "url": { + "type": "string", + "index": "not_analyzed" } + } + } } diff --git a/src/main/resources/fess_indices/fess/ja/synonym.txt b/src/main/resources/fess_indices/fess/synonym.txt similarity index 100% rename from src/main/resources/fess_indices/fess/ja/synonym.txt rename to src/main/resources/fess_indices/fess/synonym.txt -- GitLab