Authored by zhaojun2

修改 skn分词

... ... @@ -84,4 +84,16 @@ public class SearchExplainerController {
return map;
}
@RequestMapping(value = "/tools/multiFieldAnalyzeList")
@ResponseBody
public Map<String, Object> multiFieldAnalyzeList(HttpServletRequest request) {
Map<String, String> paramMap = HttpServletRequestUtils.transParamType(request);
try {
return searchExplainerService.multiFieldAnalyzeList(paramMap);
} catch (Throwable t) {
logger.error(t.getMessage(), t);
return errorResult(t.getMessage());
}
}
}
... ...
... ... @@ -753,4 +753,106 @@ public class SearchExplainerService {
}
}
public Map<String, Object> multiFieldAnalyzeList(Map<String, String> paramMap) throws Exception {
Map<String, Object> map = new LinkedHashMap<>();
String skn = paramMap.get("skn");
if (StringUtils.isEmpty(skn)) {
map.put("code", "400");
map.put("message", "skn is null");
return map;
}
String keyword = paramMap.get("query");
long start = System.currentTimeMillis();
logger.info("Begin to show skn tokens. skn={}, keyword={}.", skn, keyword);
// 1. 获取document
Map<String, Object> document = getDocumentBySkn(skn);
logger.info("Get the document for the product. document: \n{}", document);
// 2. 解析mapping文件获取字段元数据
Map<String, FieldDesc> fieldDescMap = parseMapping();
logger.info("Get the field description by mapping. fieldDescMap: \n{}", fieldDescMap);
// 3. 处理多字段和copy_to的字段并赋值
processMultiFields(fieldDescMap, document);
processCopiedFields(fieldDescMap, document);
logger.info("Set the value of copied field succeeded.");
// 4. 分析每个multi-match的每个字段
List<String> sortedSearchFields = getSortedSearchFields();
logger.info("Get the search fields. fields: {}", sortedSearchFields);
Map<String, FieldDesc> multiMatchFieldMap = new HashMap<>();
sortedSearchFields.forEach(e -> {
if (document.get(e) != null && StringUtils.isNotBlank(document.get(e).toString())) {
multiMatchFieldMap.put(e, fieldDescMap.get(e));
}
});
List<String> multiMatchFieldSearchAnalyzes = multiMatchFieldMap.values().stream().map(e -> e.search_analyzer).distinct().collect(Collectors.toList());
Map<String, List<FieldDesc>> multiMatchFieldAnalyzeFieldsMap = multiMatchFieldMap.values().stream().collect(Collectors.groupingBy(e -> e.analyzer));
List<SearchFieldResult> resultList = getSearchFieldResult(keyword, multiMatchFieldSearchAnalyzes, multiMatchFieldAnalyzeFieldsMap, document);
map.put("code", "200");
map.put("message", "show skn tokens");
map.put("data", resultList);
logger.info("End to show skn tokens. skn={}, keyword={}, cost={}.", skn, keyword, System.currentTimeMillis() - start);
return map;
}
private List<SearchFieldResult> getSearchFieldResult(String keyword, List<String> multiMatchFieldSearchAnalyzes, Map<String, List<FieldDesc>> multiMatchFieldAnalyzeFieldsMap, Map<String, Object> document) throws Exception {
Map<String, List<AnalyzeResponse.AnalyzeToken>> keywordSearchAnalyzesMap = new HashMap<>();
multiMatchFieldSearchAnalyzes.forEach(e -> keywordSearchAnalyzesMap.put(e, getAnalyzerResultWithMultiValues(e, keyword)));
List<SearchFieldResult> searchFieldResultList = new ArrayList<>();
Map<String, FieldWithBoost> localMutilFieldWithBoostMap = localMutilFieldWithBoostList.stream().collect(Collectors.toMap((e -> e.fieldName), p -> p));
multiMatchFieldAnalyzeFieldsMap.entrySet().forEach(e -> {
List<FieldDesc> analyzeFields = e.getValue();
List<String> fieldValueList = analyzeFields.stream().map(f -> document.get(f.field).toString()).collect(Collectors.toList());
List<AnalyzeResponse.AnalyzeToken> analyzeResponse = getAnalyzerResultWithMultiValues(e.getKey(), fieldValueList.toArray(new String[fieldValueList.size()]));
List<List<String>> tokensGroupList = new ArrayList<>();
int position = 0;
int analyzeResponseIndex = 0;
for (int i = 0; i < analyzeFields.size(); i++) {
List<String> tokenList = new ArrayList<>();
tokensGroupList.add(tokenList);
for (int j = analyzeResponseIndex; j < analyzeResponse.size(); j++) {
if (analyzeResponse.get(j).getPosition() - position < 100) {
tokenList.add(analyzeResponse.get(j).getTerm());
position = analyzeResponse.get(j).getPosition();
} else {
analyzeResponseIndex = j;
position = analyzeResponse.get(j).getPosition();
break;
}
}
}
for (int i = 0; i < analyzeFields.size(); i++) {
SearchFieldResult searchFieldResult = new SearchFieldResult();
searchFieldResult.setFieldName(analyzeFields.get(i).field);
searchFieldResult.setBoost(localMutilFieldWithBoostMap.get(analyzeFields.get(i).field).boost);
searchFieldResult.setIndexAnalyzer(analyzeFields.get(i).analyzer);
searchFieldResult.setSearchAnalyzer(analyzeFields.get(i).search_analyzer);
searchFieldResult.setFieldValue(document.get(analyzeFields.get(i).field).toString());
List<String> tokens = tokensGroupList.get(i);
searchFieldResult.setTokens(Arrays.asList(StringUtils.join(tokens, ";")));
List<String> matchTokens = keywordSearchAnalyzesMap.get(analyzeFields.get(i).search_analyzer).stream().filter(kt -> tokens.contains(kt)).map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList());
searchFieldResult.addAllMatchToken(matchTokens);
searchFieldResult.setMatchResult(CollectionUtils.isNotEmpty(matchTokens));
searchFieldResultList.add(searchFieldResult);
}
});
return searchFieldResultList;
}
private List<AnalyzeResponse.AnalyzeToken> getAnalyzerResultWithMultiValues(String analyzer, String... value) {
IElasticsearchClient client = esClientMgr.getClient(ISearchConstants.INDEX_NAME_PRODUCT_INDEX);
List<AnalyzeResponse.AnalyzeToken> analyzeResponse = client.getAnalyzeResponseWithMultiValues(ISearchConstants.INDEX_NAME_PRODUCT_INDEX, analyzer, value).getTokens();
return analyzeResponse;
}
}
... ...
... ... @@ -6,7 +6,7 @@ import java.util.List;
/**
* Created by ginozhang on 2017/2/13.
*/
public class SearchFieldResult {
public class SearchFieldResult implements Comparable<SearchFieldResult>{
private String fieldName;
... ... @@ -24,10 +24,19 @@ public class SearchFieldResult {
private boolean matchResult;
@Override
public int compareTo(SearchFieldResult o) {
return o.boost - boost;
}
public void addMatchToken(String searchToken) {
matchTokens.add(searchToken);
}
public void addAllMatchToken(List<String> searchTokenList) {
matchTokens.addAll(searchTokenList);
}
public List<String> getMatchTokens() {
return matchTokens;
}
... ...