|
|
1
|
+package com.yoho.search.service.searchexplainer;
|
|
|
2
|
+
|
|
|
3
|
+import com.alibaba.fastjson.JSONArray;
|
|
|
4
|
+import com.alibaba.fastjson.JSONObject;
|
|
|
5
|
+import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
|
|
|
6
|
+import com.google.common.cache.CacheBuilder;
|
|
|
7
|
+import com.google.common.cache.CacheLoader;
|
|
|
8
|
+import com.yoho.search.base.utils.ISearchConstants;
|
|
|
9
|
+import com.yoho.search.core.es.IElasticsearchClient;
|
|
|
10
|
+import com.yoho.search.core.es.model.SearchParam;
|
|
|
11
|
+import com.yoho.search.core.es.model.SearchResult;
|
|
|
12
|
+import com.yoho.search.service.service.ESClientMgr;
|
|
|
13
|
+import org.apache.commons.collections.CollectionUtils;
|
|
|
14
|
+import org.apache.commons.lang3.StringUtils;
|
|
|
15
|
+import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
|
|
|
16
|
+import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsResponse;
|
|
|
17
|
+import org.elasticsearch.cluster.metadata.MappingMetaData;
|
|
|
18
|
+import org.elasticsearch.common.collect.ImmutableOpenMap;
|
|
|
19
|
+import org.elasticsearch.index.query.QueryBuilders;
|
|
|
20
|
+import org.slf4j.Logger;
|
|
|
21
|
+import org.slf4j.LoggerFactory;
|
|
|
22
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
23
|
+import org.springframework.stereotype.Service;
|
|
|
24
|
+import org.springframework.util.Assert;
|
|
|
25
|
+
|
|
|
26
|
+import java.util.*;
|
|
|
27
|
+import java.util.concurrent.ExecutionException;
|
|
|
28
|
+import java.util.concurrent.TimeUnit;
|
|
|
29
|
+import java.util.stream.Collectors;
|
|
|
30
|
+
|
|
|
31
|
+/**
|
|
|
32
|
+ * Created by ginozhang on 2016/11/16.
|
|
|
33
|
+ */
|
|
|
34
|
+@Service
|
|
|
35
|
+public class SearchExplainerService {
|
|
|
36
|
+
|
|
|
37
|
+ private static final Logger logger = LoggerFactory.getLogger(SearchExplainerService.class);
|
|
|
38
|
+
|
|
|
39
|
+ private static final String DEFAULT_ANALYZER = "keyword";
|
|
|
40
|
+
|
|
|
41
|
+ private static final String MATCH_TYPE_STRING = "String Match";
|
|
|
42
|
+ private static final String MATCH_TYPE_RANGE = "Range Match";
|
|
|
43
|
+
|
|
|
44
|
+ private static final String RETURN_TYPE_ALL = "all";
|
|
|
45
|
+ private static final String RETURN_TYPE_MATCH_ONLY = "match_only";
|
|
|
46
|
+
|
|
|
47
|
+ // 拷贝search-service的search.default.field配置参数
|
|
|
48
|
+ // private static final String fieldsToSearch = Configuration.getString("search.default.field");
|
|
|
49
|
+
|
|
|
50
|
+ @Autowired
|
|
|
51
|
+ private ESClientMgr esClientMgr;
|
|
|
52
|
+
|
|
|
53
|
+ private volatile Map<String, FieldDesc> localFieldDescMap = null;
|
|
|
54
|
+
|
|
|
55
|
+ private volatile Map<String, List<String>> localCopiedFieldMap = null;
|
|
|
56
|
+
|
|
|
57
|
+ private volatile List<String> localMutilFields = null;
|
|
|
58
|
+
|
|
|
59
|
+ //入参和索引中字段名的映射
|
|
|
60
|
+ public final static Map<String, EsField> fieldMap = new HashMap<String, EsField>() {{
|
|
|
61
|
+ //字符类
|
|
|
62
|
+ put("brand", new EsField("brandId", MATCH_TYPE_STRING));
|
|
|
63
|
+ put("shop", new EsField("shopId", MATCH_TYPE_STRING));
|
|
|
64
|
+ put("msort", new EsField("maxSortId", MATCH_TYPE_STRING));
|
|
|
65
|
+ put("misort", new EsField("middleSortId", MATCH_TYPE_STRING));
|
|
|
66
|
+ put("sort", new EsField("smallSortId", MATCH_TYPE_STRING));
|
|
|
67
|
+ put("color", new EsField("colorIds", MATCH_TYPE_STRING));
|
|
|
68
|
+ put("size", new EsField("sizeIds", MATCH_TYPE_STRING));
|
|
|
69
|
+ put("price", new EsField("salesPrice", MATCH_TYPE_STRING));
|
|
|
70
|
+ put("gender", new EsField("gender", MATCH_TYPE_STRING));
|
|
|
71
|
+ put("specialoffer", new EsField("specialoffer", MATCH_TYPE_STRING));
|
|
|
72
|
+ put("isdiscount", new EsField("isDiscount", MATCH_TYPE_STRING));
|
|
|
73
|
+ put("promotion", new EsField("ispromotion", MATCH_TYPE_STRING));
|
|
|
74
|
+ put("vdt", new EsField("vipDiscountType", MATCH_TYPE_STRING));
|
|
|
75
|
+ put("attribute", new EsField("attribute", MATCH_TYPE_STRING));
|
|
|
76
|
+ put("limited", new EsField("islimited", MATCH_TYPE_STRING));
|
|
|
77
|
+ put("new", new EsField("isnew", MATCH_TYPE_STRING));
|
|
|
78
|
+ put("outlets", new EsField("isOutlets", MATCH_TYPE_STRING));
|
|
|
79
|
+ put("status", new EsField("status", MATCH_TYPE_STRING));
|
|
|
80
|
+ put("style", new EsField("styleIds", MATCH_TYPE_STRING));
|
|
|
81
|
+ put("sell_channels", new EsField("sellChannels", MATCH_TYPE_STRING));
|
|
|
82
|
+ put("folder_id", new EsField("folder_id", MATCH_TYPE_STRING));
|
|
|
83
|
+ put("series_id", new EsField("series_id", MATCH_TYPE_STRING));
|
|
|
84
|
+ put("day", new EsField("shelveDay", MATCH_TYPE_STRING));
|
|
|
85
|
+ put("brand", new EsField("salesPrice", MATCH_TYPE_STRING));
|
|
|
86
|
+ put("shop", new EsField("salesPrice", MATCH_TYPE_STRING));
|
|
|
87
|
+ put("brand", new EsField("brandId", MATCH_TYPE_STRING));
|
|
|
88
|
+ put("shop", new EsField("shop", MATCH_TYPE_STRING));
|
|
|
89
|
+ put("brand", new EsField("brandId", MATCH_TYPE_STRING));
|
|
|
90
|
+ put("breaking", new EsField("breakingRate", MATCH_TYPE_STRING));
|
|
|
91
|
+ put("ageLevel", new EsField("ageLevel", MATCH_TYPE_STRING));
|
|
|
92
|
+ put("product_skn", new EsField("productSkn", MATCH_TYPE_STRING));
|
|
|
93
|
+ //范围类
|
|
|
94
|
+ put("stocknumber", new EsField("storageNum", MATCH_TYPE_RANGE));
|
|
|
95
|
+ put("p_d", new EsField("promotionDiscountInt", MATCH_TYPE_RANGE));
|
|
|
96
|
+ put("p_d_int", new EsField("promotionDiscountInt", MATCH_TYPE_RANGE));
|
|
|
97
|
+ put("first_shelve_time", new EsField("firstShelveTime", MATCH_TYPE_RANGE));
|
|
|
98
|
+ put("shelve_time", new EsField("shelveTime", MATCH_TYPE_RANGE));
|
|
|
99
|
+ }};
|
|
|
100
|
+
|
|
|
101
|
+ public String fieldConvert(String paraField) {
|
|
|
102
|
+ EsField esField = fieldMap.get(paraField);
|
|
|
103
|
+ if (esField == null) {
|
|
|
104
|
+ return null;
|
|
|
105
|
+ }
|
|
|
106
|
+ return esField.getFieldName();
|
|
|
107
|
+ }
|
|
|
108
|
+
|
|
|
109
|
+ public Map<String, Object> explain(Map<String, String> paramMap) throws Exception {
|
|
|
110
|
+ long start = System.currentTimeMillis();
|
|
|
111
|
+ logger.info("Begin to explain search. start: {}, paramMap: {}.", start, paramMap);
|
|
|
112
|
+ String skn = paramMap.get("skn");
|
|
|
113
|
+ if (StringUtils.isEmpty(skn)) {
|
|
|
114
|
+ Map<String, Object> map = new LinkedHashMap<>();
|
|
|
115
|
+ map.put("code", "400");
|
|
|
116
|
+ map.put("message", "skn is null");
|
|
|
117
|
+ return map;
|
|
|
118
|
+ }
|
|
|
119
|
+
|
|
|
120
|
+ String returnType = paramMap.get("returnType");
|
|
|
121
|
+ if (returnType == null) {
|
|
|
122
|
+ returnType = RETURN_TYPE_MATCH_ONLY;
|
|
|
123
|
+ }
|
|
|
124
|
+
|
|
|
125
|
+ String multiSearchType = paramMap.get("multiSearchType");
|
|
|
126
|
+ if (multiSearchType == null) {
|
|
|
127
|
+ multiSearchType = ISearchConstants.SEARCH_MULTIMATCHQUERY_TYPE;
|
|
|
128
|
+ }
|
|
|
129
|
+
|
|
|
130
|
+ String keyword = paramMap.get("query");
|
|
|
131
|
+ logger.info("Begin to explain search keyword [{}] for product skn {}.", keyword, skn);
|
|
|
132
|
+
|
|
|
133
|
+ // 1. 根据skn获取es中的数据
|
|
|
134
|
+ Map<String, Object> document = getDocumentBySkn(skn);
|
|
|
135
|
+ logger.info("Get the document for the product. document: \n{}", document);
|
|
|
136
|
+
|
|
|
137
|
+ // 2.分析过滤的参数
|
|
|
138
|
+ Map<String, Object> explainResult = explainFilter(paramMap, document);
|
|
|
139
|
+
|
|
|
140
|
+ if ((Boolean) explainResult.get("filter_match") && StringUtils.isNotEmpty(keyword)) {
|
|
|
141
|
+ List<SearchMatch> queryMatchList = explainMultiMatchQuery(document, keyword, returnType, multiSearchType);
|
|
|
142
|
+ explainResult.put("query_match", queryMatchList.stream().anyMatch(SearchMatch::isMatch));
|
|
|
143
|
+ explainResult.put("query_list", queryMatchList);
|
|
|
144
|
+ }
|
|
|
145
|
+
|
|
|
146
|
+ logger.info("End to explain search keyword [{}] for product skn {}. cost: {}", keyword, skn, System.currentTimeMillis() - start);
|
|
|
147
|
+ return explainResult;
|
|
|
148
|
+ }
|
|
|
149
|
+
|
|
|
150
|
+ private Map<String, Object> explainFilter(Map<String, String> paramMap, Map<String, Object> document) {
|
|
|
151
|
+ Map<String, Object> map = new LinkedHashMap<>();
|
|
|
152
|
+ List<String> warns = new ArrayList<>();
|
|
|
153
|
+ boolean isFilterMatch = true;
|
|
|
154
|
+ List<FilterMatch> filterMatchList = new ArrayList<>();
|
|
|
155
|
+ for (Map.Entry<String, String> entry : paramMap.entrySet()) {
|
|
|
156
|
+ String key = entry.getKey();
|
|
|
157
|
+ if (key.equals("query") || key.equals("skn") || key.equals("returnType")) {
|
|
|
158
|
+ continue;
|
|
|
159
|
+ }
|
|
|
160
|
+
|
|
|
161
|
+ if (!fieldMap.containsKey(key)) {
|
|
|
162
|
+ warns.add("parameter " + key + " is invalid\n");
|
|
|
163
|
+ continue;
|
|
|
164
|
+ }
|
|
|
165
|
+
|
|
|
166
|
+ Object value = document.get(fieldConvert(key));
|
|
|
167
|
+ if (value != null) {
|
|
|
168
|
+ String paraValue = value.toString();
|
|
|
169
|
+ EsField esField = fieldMap.get(key);
|
|
|
170
|
+ //将入参和字段值都转化为字符数组
|
|
|
171
|
+ String[] paraArray = entry.getValue().split(",");
|
|
|
172
|
+ String[] indexArray = paraValue.split(",");
|
|
|
173
|
+ Set<String> indexSet = new HashSet<String>();
|
|
|
174
|
+ for (String i : indexArray) {
|
|
|
175
|
+ indexSet.add(i);
|
|
|
176
|
+ }
|
|
|
177
|
+ //字符匹配比较
|
|
|
178
|
+ if (esField.getFieldType().equals(MATCH_TYPE_STRING)) {
|
|
|
179
|
+ for (String p : paraArray) {
|
|
|
180
|
+ if (!indexSet.contains(p)) {
|
|
|
181
|
+ filterMatchList.add(new FilterMatch(key, entry.getValue(), paraValue, esField.getFieldType(), false));
|
|
|
182
|
+ isFilterMatch = false;
|
|
|
183
|
+ }
|
|
|
184
|
+ }
|
|
|
185
|
+ }
|
|
|
186
|
+ //取值范围比较
|
|
|
187
|
+ if (esField.getFieldType().equals(MATCH_TYPE_RANGE)) {
|
|
|
188
|
+ if (key.equals("stocknumber")) {
|
|
|
189
|
+ if (Integer.valueOf(value.toString()) < 1) {
|
|
|
190
|
+ filterMatchList.add(new FilterMatch(key, entry.getValue(), paraValue, esField.getFieldType(), false));
|
|
|
191
|
+ isFilterMatch = false;
|
|
|
192
|
+ }
|
|
|
193
|
+ } else if (paraArray.length == 2 && indexArray.length == 1) {
|
|
|
194
|
+ Integer paraRangeStart = Integer.valueOf(paraArray[0]);
|
|
|
195
|
+ Integer paraRangeEnd = Integer.valueOf(paraArray[1]);
|
|
|
196
|
+ Integer indexValue = Integer.valueOf(indexArray[0]);
|
|
|
197
|
+ if (indexValue < paraRangeStart || indexValue > paraRangeEnd) {
|
|
|
198
|
+ filterMatchList.add(new FilterMatch(key, entry.getValue(), paraValue, esField.getFieldType(), false));
|
|
|
199
|
+ isFilterMatch = false;
|
|
|
200
|
+ }
|
|
|
201
|
+ }
|
|
|
202
|
+ }
|
|
|
203
|
+ }
|
|
|
204
|
+ }
|
|
|
205
|
+
|
|
|
206
|
+ map.put("code", "200");
|
|
|
207
|
+ map.put("message", "search explainer");
|
|
|
208
|
+ if (CollectionUtils.isNotEmpty(warns)) {
|
|
|
209
|
+ map.put("warn", warns);
|
|
|
210
|
+ }
|
|
|
211
|
+ map.put("filter_match", isFilterMatch);
|
|
|
212
|
+ map.put("filter_list", filterMatchList);
|
|
|
213
|
+
|
|
|
214
|
+ return map;
|
|
|
215
|
+ }
|
|
|
216
|
+
|
|
|
217
|
+ private List<SearchMatch> explainMultiMatchQuery(Map<String, Object> document, String keyword, String returnType, String multiSearchType) throws Exception {
|
|
|
218
|
+ // 3. 解析mapping文件获取字段元数据
|
|
|
219
|
+ Map<String, FieldDesc> fieldDescMap = parseMapping();
|
|
|
220
|
+ logger.info("Get the field description by mapping. fieldDescMap: \n{}", fieldDescMap);
|
|
|
221
|
+
|
|
|
222
|
+ // 4. 处理多字段和copy_to的字段并赋值
|
|
|
223
|
+ processMultiFields(fieldDescMap, document);
|
|
|
224
|
+ processCopiedFields(fieldDescMap, document);
|
|
|
225
|
+ logger.info("Set the value of copied field succeeded.");
|
|
|
226
|
+
|
|
|
227
|
+ // 5. 分析每个multi-match的每个字段 根据分词结果判定是否匹配
|
|
|
228
|
+ List<String> sortedSearchFields = getSortedSearchFields();
|
|
|
229
|
+ logger.info("Get the search fields. fields: {}", sortedSearchFields);
|
|
|
230
|
+
|
|
|
231
|
+ String searchType = ISearchConstants.SEARCH_MULTIMATCHQUERY_TYPE;
|
|
|
232
|
+ if ("cross_fields".equalsIgnoreCase(multiSearchType)) {
|
|
|
233
|
+ return explainCrossFieldsQuery(document, keyword, returnType, fieldDescMap, sortedSearchFields);
|
|
|
234
|
+ } else {
|
|
|
235
|
+ return explainBestFieldsQuery(document, keyword, returnType, fieldDescMap, sortedSearchFields);
|
|
|
236
|
+ }
|
|
|
237
|
+ }
|
|
|
238
|
+
|
|
|
239
|
+ private List explainBestFieldsQuery(Map<String, Object> document, String keyword, String returnType, Map<String, FieldDesc> fieldDescMap, List<String> sortedSearchFields) throws Exception {
|
|
|
240
|
+ boolean isQueryMatch = false;
|
|
|
241
|
+ boolean isFieldQueryMatch = false;
|
|
|
242
|
+ List<FieldMatch> queryMatchList = new ArrayList<>();
|
|
|
243
|
+ for (String field : sortedSearchFields) {
|
|
|
244
|
+ String value = document.get(field) != null ? document.get(field).toString() : null;
|
|
|
245
|
+ FieldDesc fieldDesc = fieldDescMap.get(field);
|
|
|
246
|
+ Assert.notNull(fieldDesc, "no field " + field + " defined.");
|
|
|
247
|
+ List<String> analyzerResult = getAnalyzerResult(fieldDesc, value);
|
|
|
248
|
+ List<String> keywordAnalyzerResult = getKeyWordAnalyzerResult(fieldDesc, keyword);
|
|
|
249
|
+ isFieldQueryMatch = judgeMatch(analyzerResult, keywordAnalyzerResult);
|
|
|
250
|
+ isQueryMatch = isFieldQueryMatch || isQueryMatch;
|
|
|
251
|
+ //如果是匹配模式则只返回匹配的字段,如果是ALL模式则返回索引字段
|
|
|
252
|
+ if (returnType.equals(RETURN_TYPE_ALL) || isFieldQueryMatch) {
|
|
|
253
|
+ queryMatchList.add(new FieldMatch(field, value, fieldDesc.analyzer, analyzerResult, fieldDesc.search_analyzer, keywordAnalyzerResult, isFieldQueryMatch));
|
|
|
254
|
+ }
|
|
|
255
|
+
|
|
|
256
|
+ if (returnType.equals(RETURN_TYPE_MATCH_ONLY) && isFieldQueryMatch) {
|
|
|
257
|
+ // 找到匹配的就返回
|
|
|
258
|
+ break;
|
|
|
259
|
+ }
|
|
|
260
|
+ }
|
|
|
261
|
+
|
|
|
262
|
+ return queryMatchList;
|
|
|
263
|
+ }
|
|
|
264
|
+
|
|
|
265
|
+ private List explainCrossFieldsQuery(Map<String, Object> document, String keyword, String returnType, Map<String, FieldDesc> fieldDescMap, List<String> sortedSearchFields) throws Exception {
|
|
|
266
|
+ List<GroupMatch> groupMatchList = new ArrayList<>();
|
|
|
267
|
+ List<FieldDesc> searchfieldDescList = new ArrayList<>();
|
|
|
268
|
+ fieldDescMap.forEach((field, fieldDesc) -> {
|
|
|
269
|
+ if (sortedSearchFields.contains(field)) {
|
|
|
270
|
+ if (StringUtils.isEmpty(fieldDesc.analyzer)) {
|
|
|
271
|
+ fieldDesc.analyzer = DEFAULT_ANALYZER;
|
|
|
272
|
+ }
|
|
|
273
|
+ if (StringUtils.isEmpty(fieldDesc.search_analyzer)) {
|
|
|
274
|
+ fieldDesc.search_analyzer = fieldDesc.analyzer;
|
|
|
275
|
+ }
|
|
|
276
|
+
|
|
|
277
|
+ searchfieldDescList.add(fieldDesc);
|
|
|
278
|
+ }
|
|
|
279
|
+ });
|
|
|
280
|
+
|
|
|
281
|
+
|
|
|
282
|
+ // 根据search_analyzer进行分组
|
|
|
283
|
+ Map<String, List<FieldDesc>> groupMap = searchfieldDescList.stream().collect(Collectors.groupingBy(fieldDesc -> fieldDesc.search_analyzer));
|
|
|
284
|
+ for (Map.Entry<String, List<FieldDesc>> groupEntry : groupMap.entrySet()) {
|
|
|
285
|
+ // 对于cross_field来说 必须所有输入的token在同一组内
|
|
|
286
|
+ String searchAnalyzer = groupEntry.getValue().get(0).search_analyzer;
|
|
|
287
|
+ List<String> keywordAnalyzerResult = getAnalyzerTokens(keyword, searchAnalyzer);
|
|
|
288
|
+ List<String> sortedFieldDescNames = groupEntry.getValue().stream().map(fieldDesc -> fieldDesc.field).collect(Collectors.toList());
|
|
|
289
|
+ boolean isMatch = true;
|
|
|
290
|
+ List<FieldMatch> fieldMatchList = new ArrayList<>();
|
|
|
291
|
+ for (String token : keywordAnalyzerResult) {
|
|
|
292
|
+ boolean isTokenMatch = false;
|
|
|
293
|
+ for (String field : sortedSearchFields) {
|
|
|
294
|
+ // 检查field是否匹配token
|
|
|
295
|
+ if (!sortedFieldDescNames.contains(field)) {
|
|
|
296
|
+ continue;
|
|
|
297
|
+ }
|
|
|
298
|
+
|
|
|
299
|
+ String value = document.get(field) != null ? document.get(field).toString() : null;
|
|
|
300
|
+ FieldDesc fieldDesc = fieldDescMap.get(field);
|
|
|
301
|
+ Assert.notNull(fieldDesc, "for " + field);
|
|
|
302
|
+ List<String> analyzerResult = getAnalyzerResult(fieldDesc, value);
|
|
|
303
|
+ isTokenMatch = analyzerResult.contains(token);
|
|
|
304
|
+ if (returnType.equals(RETURN_TYPE_ALL) || isTokenMatch) {
|
|
|
305
|
+ fieldMatchList.add(new FieldMatch(field, value, fieldDesc.analyzer, analyzerResult, fieldDesc.search_analyzer, Arrays.asList(token), isTokenMatch));
|
|
|
306
|
+ }
|
|
|
307
|
+
|
|
|
308
|
+ if (returnType.equals(RETURN_TYPE_MATCH_ONLY) && isTokenMatch) {
|
|
|
309
|
+ // 找到匹配的就返回
|
|
|
310
|
+ break;
|
|
|
311
|
+ }
|
|
|
312
|
+ }
|
|
|
313
|
+
|
|
|
314
|
+ isMatch = isMatch && isTokenMatch;
|
|
|
315
|
+ }
|
|
|
316
|
+
|
|
|
317
|
+ if (returnType.equals(RETURN_TYPE_ALL) || isMatch) {
|
|
|
318
|
+ groupMatchList.add(new GroupMatch("", searchAnalyzer, keywordAnalyzerResult, fieldMatchList, isMatch));
|
|
|
319
|
+ }
|
|
|
320
|
+ }
|
|
|
321
|
+
|
|
|
322
|
+ return groupMatchList;
|
|
|
323
|
+ }
|
|
|
324
|
+
|
|
|
325
|
+ public Map<String, Object> getTokens(String skn) throws Exception {
|
|
|
326
|
+ long start = System.currentTimeMillis();
|
|
|
327
|
+ logger.info("Begin to get tokens for product skn {}.", skn);
|
|
|
328
|
+
|
|
|
329
|
+ // 1. 根据skn获取es中的数据
|
|
|
330
|
+ Map<String, Object> document = getDocumentBySkn(skn);
|
|
|
331
|
+ logger.info("Get the document for the product. document: \n{}", document);
|
|
|
332
|
+
|
|
|
333
|
+ // 2. 解析mapping文件获取字段元数据
|
|
|
334
|
+ Map<String, FieldDesc> fieldDescMap = parseMapping();
|
|
|
335
|
+ logger.info("Get the field description by mapping. fieldDescMap: \n{}", fieldDescMap);
|
|
|
336
|
+
|
|
|
337
|
+ // 3. 处理多字段和copy_to的字段并赋值
|
|
|
338
|
+ processMultiFields(fieldDescMap, document);
|
|
|
339
|
+ processCopiedFields(fieldDescMap, document);
|
|
|
340
|
+ logger.info("Set the value of copied field succeeded.");
|
|
|
341
|
+
|
|
|
342
|
+ // 4. 分析每个multi-match的每个字段 根据权重优先级展示token列表
|
|
|
343
|
+ List<String> sortedSearchFields = getSortedSearchFields();
|
|
|
344
|
+ logger.info("Get the search fields. fields: {}", sortedSearchFields);
|
|
|
345
|
+ boolean isMatch = false;
|
|
|
346
|
+ List<String> tokens = new ArrayList<>();
|
|
|
347
|
+ for (String field : sortedSearchFields) {
|
|
|
348
|
+ String value = document.get(field) != null ? document.get(field).toString() : null;
|
|
|
349
|
+ FieldDesc fieldDesc = fieldDescMap.get(field);
|
|
|
350
|
+ Assert.notNull(fieldDesc, "no field " + field + " defined.");
|
|
|
351
|
+ List<String> analyzerResult = getAnalyzerResult(fieldDesc, value);
|
|
|
352
|
+ if (CollectionUtils.isNotEmpty(analyzerResult)) {
|
|
|
353
|
+ analyzerResult.removeAll(tokens);
|
|
|
354
|
+ tokens.addAll(analyzerResult);
|
|
|
355
|
+ }
|
|
|
356
|
+ }
|
|
|
357
|
+
|
|
|
358
|
+ Map<String, Object> map = new HashMap<>();
|
|
|
359
|
+ map.put("code", "200");
|
|
|
360
|
+ map.put("message", "product tokens.");
|
|
|
361
|
+ map.put("data", tokens);
|
|
|
362
|
+ logger.info("End to get tokens for product skn {}. cost: {}", skn, System.currentTimeMillis() - start);
|
|
|
363
|
+ return map;
|
|
|
364
|
+ }
|
|
|
365
|
+
|
|
|
366
|
+ private boolean judgeMatch(List<String> analyzerResult, List<String> keywordAnalyzerResult) {
|
|
|
367
|
+ // 对于best_fields必须包含所有token
|
|
|
368
|
+ return analyzerResult.containsAll(keywordAnalyzerResult);
|
|
|
369
|
+ }
|
|
|
370
|
+
|
|
|
371
|
+ private List<String> getKeyWordAnalyzerResult(FieldDesc fieldDesc, String keyword) throws ExecutionException {
|
|
|
372
|
+ if (fieldDesc.noNeedAnalyzer()) {
|
|
|
373
|
+ return Arrays.asList(keyword);
|
|
|
374
|
+ }
|
|
|
375
|
+
|
|
|
376
|
+ String searchAnalyzer = fieldDesc.search_analyzer;
|
|
|
377
|
+ if (StringUtils.isEmpty(searchAnalyzer)) {
|
|
|
378
|
+ searchAnalyzer = fieldDesc.analyzer;
|
|
|
379
|
+ }
|
|
|
380
|
+ if (StringUtils.isEmpty(searchAnalyzer)) {
|
|
|
381
|
+ searchAnalyzer = DEFAULT_ANALYZER;
|
|
|
382
|
+ }
|
|
|
383
|
+ return getAnalyzerTokens(keyword, searchAnalyzer);
|
|
|
384
|
+ }
|
|
|
385
|
+
|
|
|
386
|
+ private List<String> getAnalyzerResult(FieldDesc fieldDesc, String value) throws ExecutionException {
|
|
|
387
|
+ if (StringUtils.isEmpty(value)) {
|
|
|
388
|
+ return new ArrayList<>();
|
|
|
389
|
+ }
|
|
|
390
|
+
|
|
|
391
|
+ if (fieldDesc.noNeedAnalyzer()) {
|
|
|
392
|
+ return Arrays.asList(value);
|
|
|
393
|
+ }
|
|
|
394
|
+
|
|
|
395
|
+ String analyzer = fieldDesc.analyzer;
|
|
|
396
|
+ if (StringUtils.isEmpty(analyzer)) {
|
|
|
397
|
+ analyzer = DEFAULT_ANALYZER;
|
|
|
398
|
+ }
|
|
|
399
|
+
|
|
|
400
|
+ return getAnalyzerTokens(value, analyzer);
|
|
|
401
|
+ }
|
|
|
402
|
+
|
|
|
403
|
+ private List<String> getAnalyzerTokens(String text, String analyzer) throws ExecutionException {
|
|
|
404
|
+ // 使用guava的本地缓存
|
|
|
405
|
+ return CacheBuilder.newBuilder()
|
|
|
406
|
+ .maximumSize(1000)
|
|
|
407
|
+ .expireAfterWrite(3, TimeUnit.MINUTES)
|
|
|
408
|
+ .build(
|
|
|
409
|
+ new CacheLoader<String, List<String>>() {
|
|
|
410
|
+ public List<String> load(String key) {
|
|
|
411
|
+ String tempAnalyzer = key.split("@")[0];
|
|
|
412
|
+ String tempText = key.split("@")[1];
|
|
|
413
|
+
|
|
|
414
|
+ if ("keyword".equals(analyzer)) {
|
|
|
415
|
+ // keyword 分词就不去调用ES
|
|
|
416
|
+ return Arrays.asList(tempText);
|
|
|
417
|
+ }
|
|
|
418
|
+
|
|
|
419
|
+ IElasticsearchClient client = esClientMgr.getClient(ISearchConstants.INDEX_NAME_PRODUCT_INDEX);
|
|
|
420
|
+ List<AnalyzeResponse.AnalyzeToken> list = client.getAnalyzeResponse(ISearchConstants.INDEX_NAME_PRODUCT_INDEX, text, analyzer).getTokens();
|
|
|
421
|
+ return list.stream().map(item -> item.getTerm()).collect(Collectors.toList());
|
|
|
422
|
+ }
|
|
|
423
|
+ }).get(analyzer + "@" + text);
|
|
|
424
|
+ }
|
|
|
425
|
+
|
|
|
426
|
+
|
|
|
427
|
+ private Map<String, Object> getDocumentBySkn(String skn) {
|
|
|
428
|
+ IElasticsearchClient client = esClientMgr.getClient(ISearchConstants.INDEX_NAME_PRODUCT_INDEX);
|
|
|
429
|
+ if (!client.indexExists(ISearchConstants.INDEX_NAME_PRODUCT_INDEX)) {
|
|
|
430
|
+ throw new RuntimeException("index not exist");
|
|
|
431
|
+ }
|
|
|
432
|
+
|
|
|
433
|
+ SearchParam searchParam = new SearchParam();
|
|
|
434
|
+ searchParam.setQuery(QueryBuilders.termQuery("productSkn", skn));
|
|
|
435
|
+ SearchResult searchResult = client.search(ISearchConstants.INDEX_NAME_PRODUCT_INDEX, ISearchConstants.INDEX_NAME_PRODUCT_INDEX, searchParam);
|
|
|
436
|
+ if (searchResult == null || searchResult.getResultList() == null || searchResult.getResultList().isEmpty()) {
|
|
|
437
|
+ throw new RuntimeException("product skn not exist");
|
|
|
438
|
+ }
|
|
|
439
|
+
|
|
|
440
|
+ return searchResult.getResultList().get(0);
|
|
|
441
|
+ }
|
|
|
442
|
+
|
|
|
443
|
+ private Map<String, FieldDesc> parseMapping() {
|
|
|
444
|
+ if (localFieldDescMap != null) {
|
|
|
445
|
+ return localFieldDescMap;
|
|
|
446
|
+ }
|
|
|
447
|
+
|
|
|
448
|
+ synchronized (this) {
|
|
|
449
|
+ if (localFieldDescMap != null) {
|
|
|
450
|
+ // double check
|
|
|
451
|
+ return localFieldDescMap;
|
|
|
452
|
+ }
|
|
|
453
|
+
|
|
|
454
|
+ // 从ES获取mapping
|
|
|
455
|
+ IElasticsearchClient client = esClientMgr.getClient(ISearchConstants.INDEX_NAME_PRODUCT_INDEX);
|
|
|
456
|
+ GetMappingsResponse response = client.getMapping(ISearchConstants.INDEX_NAME_PRODUCT_INDEX, ISearchConstants.INDEX_NAME_PRODUCT_INDEX);
|
|
|
457
|
+ Assert.notNull(response, ISearchConstants.INDEX_NAME_PRODUCT_INDEX);
|
|
|
458
|
+ ImmutableOpenMap<String, MappingMetaData> mapping = response.getMappings().get(ISearchConstants.INDEX_NAME_PRODUCT_INDEX);
|
|
|
459
|
+ Iterator<ObjectObjectCursor<String, MappingMetaData>> iterator = mapping.iterator();
|
|
|
460
|
+ while(iterator.hasNext())
|
|
|
461
|
+ {
|
|
|
462
|
+ ObjectObjectCursor<String, MappingMetaData> object = iterator.next();
|
|
|
463
|
+ //object.value.getSourceAsMap();
|
|
|
464
|
+ }
|
|
|
465
|
+
|
|
|
466
|
+// String mappingContent = yohoIndexService.getIndex(ISearchConstants.INDEX_NAME_PRODUCT_INDEX).getMappingContent();
|
|
|
467
|
+// JSONObject jsonObject = JSON.parseObject(mappingContent);
|
|
|
468
|
+// JSONObject mapping = jsonObject.getJSONObject(ISearchConstants.INDEX_NAME_PRODUCT_INDEX).getJSONObject("properties");
|
|
|
469
|
+// Map<String, FieldDesc> fieldDescMap = new HashMap<>();
|
|
|
470
|
+// for (String field : mapping.keySet()) {
|
|
|
471
|
+// parseToFieldDesc(fieldDescMap, field, mapping.getJSONObject(field));
|
|
|
472
|
+// }
|
|
|
473
|
+
|
|
|
474
|
+// localFieldDescMap = fieldDescMap;
|
|
|
475
|
+ return localFieldDescMap;
|
|
|
476
|
+ }
|
|
|
477
|
+ }
|
|
|
478
|
+
|
|
|
479
|
+ private void parseToFieldDesc(Map<String, FieldDesc> fieldDescMap, String field, JSONObject jsonObject) {
|
|
|
480
|
+ if (jsonObject.containsKey("fields")) {
|
|
|
481
|
+ // 多字段类型
|
|
|
482
|
+ JSONObject innerJsonObject = jsonObject.getJSONObject("fields");
|
|
|
483
|
+ for (String innerField : innerJsonObject.keySet()) {
|
|
|
484
|
+ String realFieldName = innerField;
|
|
|
485
|
+ if (!innerField.equals(field)) {
|
|
|
486
|
+ realFieldName = field + "." + innerField;
|
|
|
487
|
+ }
|
|
|
488
|
+
|
|
|
489
|
+ parseSingle(fieldDescMap, realFieldName, innerJsonObject.getJSONObject(innerField));
|
|
|
490
|
+ }
|
|
|
491
|
+ } else if (jsonObject.containsKey("properties")) {
|
|
|
492
|
+ // nested object类型 搜索暂不支持查询内嵌对象的字段
|
|
|
493
|
+ } else {
|
|
|
494
|
+ parseSingle(fieldDescMap, field, jsonObject);
|
|
|
495
|
+ }
|
|
|
496
|
+ }
|
|
|
497
|
+
|
|
|
498
|
+ private void parseSingle(Map<String, FieldDesc> fieldDescMap, String field, JSONObject jsonObject) {
|
|
|
499
|
+ FieldDesc fieldDesc = new FieldDesc();
|
|
|
500
|
+ fieldDesc.field = field;
|
|
|
501
|
+ fieldDesc.type = jsonObject.getString("type");
|
|
|
502
|
+ fieldDesc.index = jsonObject.getString("index");
|
|
|
503
|
+ fieldDesc.analyzer = jsonObject.getString("analyzer");
|
|
|
504
|
+ fieldDesc.search_analyzer = jsonObject.getString("search_analyzer");
|
|
|
505
|
+ Object copyTo = jsonObject.get("copy_to");
|
|
|
506
|
+ if (copyTo != null && copyTo instanceof JSONArray) {
|
|
|
507
|
+ JSONArray jsonArray = (JSONArray) copyTo;
|
|
|
508
|
+ for (int i = 0; i < jsonArray.size(); i++) {
|
|
|
509
|
+ fieldDesc.addCopyTo(jsonArray.getString(i));
|
|
|
510
|
+ }
|
|
|
511
|
+ } else if (copyTo != null && copyTo instanceof JSONObject) {
|
|
|
512
|
+ fieldDesc.addCopyTo((String) copyTo);
|
|
|
513
|
+ }
|
|
|
514
|
+
|
|
|
515
|
+ fieldDescMap.put(field, fieldDesc);
|
|
|
516
|
+ }
|
|
|
517
|
+
|
|
|
518
|
+ private void processMultiFields(Map<String, FieldDesc> fieldDescMap, Map<String, Object> document) {
|
|
|
519
|
+ for (String field : fieldDescMap.keySet()) {
|
|
|
520
|
+ String[] parts = field.split("\\.");
|
|
|
521
|
+ if (parts != null && parts.length == 2) {
|
|
|
522
|
+ document.put(field, document.get(parts[0]));
|
|
|
523
|
+ }
|
|
|
524
|
+ }
|
|
|
525
|
+ }
|
|
|
526
|
+
|
|
|
527
|
+ private void processCopiedFields(Map<String, FieldDesc> fieldDescMap, Map<String, Object> document) {
|
|
|
528
|
+ Map<String, List<String>> copiedFieldMap = getCopiedFieldMap(fieldDescMap);
|
|
|
529
|
+ for (Map.Entry<String, List<String>> entry : copiedFieldMap.entrySet()) {
|
|
|
530
|
+ String copiedField = entry.getKey();
|
|
|
531
|
+ StringBuffer sb = new StringBuffer();
|
|
|
532
|
+ for (String field : entry.getValue()) {
|
|
|
533
|
+ if (document.get(field) != null) {
|
|
|
534
|
+ sb.append(document.get(field)).append(" ");
|
|
|
535
|
+ }
|
|
|
536
|
+ }
|
|
|
537
|
+
|
|
|
538
|
+ document.put(copiedField, sb.toString());
|
|
|
539
|
+ }
|
|
|
540
|
+ }
|
|
|
541
|
+
|
|
|
542
|
+ private Map<String, List<String>> getCopiedFieldMap(Map<String, FieldDesc> fieldDescMap) {
|
|
|
543
|
+ if (localCopiedFieldMap != null) {
|
|
|
544
|
+ return localCopiedFieldMap;
|
|
|
545
|
+ }
|
|
|
546
|
+
|
|
|
547
|
+ synchronized (this) {
|
|
|
548
|
+ if (localCopiedFieldMap != null) {
|
|
|
549
|
+ // double check
|
|
|
550
|
+ return localCopiedFieldMap;
|
|
|
551
|
+ }
|
|
|
552
|
+
|
|
|
553
|
+ Map<String, List<String>> copiedFieldMap = new HashMap<>();
|
|
|
554
|
+ for (Map.Entry<String, FieldDesc> entry : fieldDescMap.entrySet()) {
|
|
|
555
|
+ for (String dest : entry.getValue().copy_to) {
|
|
|
556
|
+ List<String> list = copiedFieldMap.get(dest);
|
|
|
557
|
+ if (list == null) {
|
|
|
558
|
+ list = new ArrayList<>();
|
|
|
559
|
+ copiedFieldMap.put(dest, list);
|
|
|
560
|
+ }
|
|
|
561
|
+
|
|
|
562
|
+ list.add(entry.getKey());
|
|
|
563
|
+ }
|
|
|
564
|
+ }
|
|
|
565
|
+
|
|
|
566
|
+ localCopiedFieldMap = copiedFieldMap;
|
|
|
567
|
+ return localCopiedFieldMap;
|
|
|
568
|
+ }
|
|
|
569
|
+ }
|
|
|
570
|
+
|
|
|
571
|
+ private List<String> getSortedSearchFields() {
|
|
|
572
|
+ if (localMutilFields != null) {
|
|
|
573
|
+ return localMutilFields;
|
|
|
574
|
+ }
|
|
|
575
|
+
|
|
|
576
|
+ synchronized (this) {
|
|
|
577
|
+ if (localMutilFields != null) {
|
|
|
578
|
+ // double check
|
|
|
579
|
+ return localMutilFields;
|
|
|
580
|
+ }
|
|
|
581
|
+
|
|
|
582
|
+ List<FieldWithBoost> list = new ArrayList<>();
|
|
|
583
|
+ List<String> fields = ISearchConstants.SEARCH_DEFAULT_FIELD;
|
|
|
584
|
+ for (String field : fields) {
|
|
|
585
|
+ String[] fieldBoost = field.split("\\^");
|
|
|
586
|
+ if (fieldBoost.length == 2) {
|
|
|
587
|
+ list.add(new FieldWithBoost(fieldBoost[0], Integer.valueOf(fieldBoost[1])));
|
|
|
588
|
+ } else if (fieldBoost.length == 1) {
|
|
|
589
|
+ list.add(new FieldWithBoost(fieldBoost[0], 1));
|
|
|
590
|
+ }
|
|
|
591
|
+ }
|
|
|
592
|
+
|
|
|
593
|
+ Collections.sort(list);
|
|
|
594
|
+ List<String> result = new ArrayList<>();
|
|
|
595
|
+ for (FieldWithBoost item : list) {
|
|
|
596
|
+ result.add(item.fieldName);
|
|
|
597
|
+ }
|
|
|
598
|
+
|
|
|
599
|
+ localMutilFields = result;
|
|
|
600
|
+ return localMutilFields;
|
|
|
601
|
+ }
|
|
|
602
|
+ }
|
|
|
603
|
+
|
|
|
604
|
+ static class FieldWithBoost implements Comparable {
|
|
|
605
|
+ int boost;
|
|
|
606
|
+ String fieldName;
|
|
|
607
|
+
|
|
|
608
|
+ public FieldWithBoost(String fieldName, int boost) {
|
|
|
609
|
+ this.boost = boost;
|
|
|
610
|
+ this.fieldName = fieldName;
|
|
|
611
|
+ }
|
|
|
612
|
+
|
|
|
613
|
+ @Override
|
|
|
614
|
+ public int compareTo(Object o) {
|
|
|
615
|
+ return ((FieldWithBoost) o).boost - boost;
|
|
|
616
|
+ }
|
|
|
617
|
+ }
|
|
|
618
|
+
|
|
|
619
|
+ static class FieldDesc {
|
|
|
620
|
+ String field;
|
|
|
621
|
+
|
|
|
622
|
+ String type;
|
|
|
623
|
+
|
|
|
624
|
+ String index;
|
|
|
625
|
+
|
|
|
626
|
+ String analyzer;
|
|
|
627
|
+
|
|
|
628
|
+ String search_analyzer;
|
|
|
629
|
+
|
|
|
630
|
+ Set<String> copy_to = new HashSet<>();
|
|
|
631
|
+
|
|
|
632
|
+ public void addCopyTo(String item) {
|
|
|
633
|
+ copy_to.add(item);
|
|
|
634
|
+ }
|
|
|
635
|
+
|
|
|
636
|
+ public boolean noNeedAnalyzer() {
|
|
|
637
|
+ // 声明不分词的类型 或者 其他非string类型
|
|
|
638
|
+ return "not_analyzed".equals(index) || (type != null && !"string".equals(type));
|
|
|
639
|
+ }
|
|
|
640
|
+
|
|
|
641
|
+ @Override
|
|
|
642
|
+ public String toString() {
|
|
|
643
|
+ return "FieldDesc{" +
|
|
|
644
|
+ "type='" + type + '\'' +
|
|
|
645
|
+ ", index='" + index + '\'' +
|
|
|
646
|
+ ", analyzer='" + analyzer + '\'' +
|
|
|
647
|
+ ", search_analyzer='" + search_analyzer + '\'' +
|
|
|
648
|
+ '}';
|
|
|
649
|
+ }
|
|
|
650
|
+ }
|
|
|
651
|
+} |