Authored by zhaojun2

修改 skn分词

@@ -84,4 +84,16 @@ public class SearchExplainerController { @@ -84,4 +84,16 @@ public class SearchExplainerController {
84 return map; 84 return map;
85 } 85 }
86 86
  87 + @RequestMapping(value = "/tools/multiFieldAnalyzeList")
  88 + @ResponseBody
  89 + public Map<String, Object> multiFieldAnalyzeList(HttpServletRequest request) {
  90 + Map<String, String> paramMap = HttpServletRequestUtils.transParamType(request);
  91 + try {
  92 + return searchExplainerService.multiFieldAnalyzeList(paramMap);
  93 + } catch (Throwable t) {
  94 + logger.error(t.getMessage(), t);
  95 + return errorResult(t.getMessage());
  96 + }
  97 + }
  98 +
87 } 99 }
@@ -753,4 +753,106 @@ public class SearchExplainerService { @@ -753,4 +753,106 @@ public class SearchExplainerService {
753 } 753 }
754 } 754 }
755 755
  756 + public Map<String, Object> multiFieldAnalyzeList(Map<String, String> paramMap) throws Exception {
  757 + Map<String, Object> map = new LinkedHashMap<>();
  758 + String skn = paramMap.get("skn");
  759 + if (StringUtils.isEmpty(skn)) {
  760 + map.put("code", "400");
  761 + map.put("message", "skn is null");
  762 + return map;
  763 + }
  764 +
  765 + String keyword = paramMap.get("query");
  766 + long start = System.currentTimeMillis();
  767 + logger.info("Begin to show skn tokens. skn={}, keyword={}.", skn, keyword);
  768 +
  769 + // 1. 获取document
  770 + Map<String, Object> document = getDocumentBySkn(skn);
  771 + logger.info("Get the document for the product. document: \n{}", document);
  772 +
  773 + // 2. 解析mapping文件获取字段元数据
  774 + Map<String, FieldDesc> fieldDescMap = parseMapping();
  775 + logger.info("Get the field description by mapping. fieldDescMap: \n{}", fieldDescMap);
  776 +
  777 + // 3. 处理多字段和copy_to的字段并赋值
  778 + processMultiFields(fieldDescMap, document);
  779 + processCopiedFields(fieldDescMap, document);
  780 + logger.info("Set the value of copied field succeeded.");
  781 +
  782 + // 4. 分析每个multi-match的每个字段
  783 + List<String> sortedSearchFields = getSortedSearchFields();
  784 + logger.info("Get the search fields. fields: {}", sortedSearchFields);
  785 +
  786 + Map<String, FieldDesc> multiMatchFieldMap = new HashMap<>();
  787 + sortedSearchFields.forEach(e -> {
  788 + if (document.get(e) != null && StringUtils.isNotBlank(document.get(e).toString())) {
  789 + multiMatchFieldMap.put(e, fieldDescMap.get(e));
  790 + }
  791 + });
  792 + List<String> multiMatchFieldSearchAnalyzes = multiMatchFieldMap.values().stream().map(e -> e.search_analyzer).distinct().collect(Collectors.toList());
  793 + Map<String, List<FieldDesc>> multiMatchFieldAnalyzeFieldsMap = multiMatchFieldMap.values().stream().collect(Collectors.groupingBy(e -> e.analyzer));
  794 + List<SearchFieldResult> resultList = getSearchFieldResult(keyword, multiMatchFieldSearchAnalyzes, multiMatchFieldAnalyzeFieldsMap, document);
  795 +
  796 + map.put("code", "200");
  797 + map.put("message", "show skn tokens");
  798 + map.put("data", resultList);
  799 + logger.info("End to show skn tokens. skn={}, keyword={}, cost={}.", skn, keyword, System.currentTimeMillis() - start);
  800 + return map;
  801 + }
  802 +
  803 +
  804 + private List<SearchFieldResult> getSearchFieldResult(String keyword, List<String> multiMatchFieldSearchAnalyzes, Map<String, List<FieldDesc>> multiMatchFieldAnalyzeFieldsMap, Map<String, Object> document) throws Exception {
  805 + Map<String, List<AnalyzeResponse.AnalyzeToken>> keywordSearchAnalyzesMap = new HashMap<>();
  806 + multiMatchFieldSearchAnalyzes.forEach(e -> keywordSearchAnalyzesMap.put(e, getAnalyzerResultWithMultiValues(e, keyword)));
  807 +
  808 + List<SearchFieldResult> searchFieldResultList = new ArrayList<>();
  809 + Map<String, FieldWithBoost> localMutilFieldWithBoostMap = localMutilFieldWithBoostList.stream().collect(Collectors.toMap((e -> e.fieldName), p -> p));
  810 +
  811 + multiMatchFieldAnalyzeFieldsMap.entrySet().forEach(e -> {
  812 + List<FieldDesc> analyzeFields = e.getValue();
  813 + List<String> fieldValueList = analyzeFields.stream().map(f -> document.get(f.field).toString()).collect(Collectors.toList());
  814 + List<AnalyzeResponse.AnalyzeToken> analyzeResponse = getAnalyzerResultWithMultiValues(e.getKey(), fieldValueList.toArray(new String[fieldValueList.size()]));
  815 + List<List<String>> tokensGroupList = new ArrayList<>();
  816 + int position = 0;
  817 + int analyzeResponseIndex = 0;
  818 + for (int i = 0; i < analyzeFields.size(); i++) {
  819 + List<String> tokenList = new ArrayList<>();
  820 + tokensGroupList.add(tokenList);
  821 + for (int j = analyzeResponseIndex; j < analyzeResponse.size(); j++) {
  822 + if (analyzeResponse.get(j).getPosition() - position < 100) {
  823 + tokenList.add(analyzeResponse.get(j).getTerm());
  824 + position = analyzeResponse.get(j).getPosition();
  825 + } else {
  826 + analyzeResponseIndex = j;
  827 + position = analyzeResponse.get(j).getPosition();
  828 + break;
  829 + }
  830 + }
  831 + }
  832 + for (int i = 0; i < analyzeFields.size(); i++) {
  833 + SearchFieldResult searchFieldResult = new SearchFieldResult();
  834 + searchFieldResult.setFieldName(analyzeFields.get(i).field);
  835 + searchFieldResult.setBoost(localMutilFieldWithBoostMap.get(analyzeFields.get(i).field).boost);
  836 + searchFieldResult.setIndexAnalyzer(analyzeFields.get(i).analyzer);
  837 + searchFieldResult.setSearchAnalyzer(analyzeFields.get(i).search_analyzer);
  838 + searchFieldResult.setFieldValue(document.get(analyzeFields.get(i).field).toString());
  839 + List<String> tokens = tokensGroupList.get(i);
  840 + searchFieldResult.setTokens(Arrays.asList(StringUtils.join(tokens, ";")));
  841 + List<String> matchTokens = keywordSearchAnalyzesMap.get(analyzeFields.get(i).search_analyzer).stream().filter(kt -> tokens.contains(kt)).map(AnalyzeResponse.AnalyzeToken::getTerm).collect(Collectors.toList());
  842 + searchFieldResult.addAllMatchToken(matchTokens);
  843 + searchFieldResult.setMatchResult(CollectionUtils.isNotEmpty(matchTokens));
  844 + searchFieldResultList.add(searchFieldResult);
  845 + }
  846 +
  847 + });
  848 + return searchFieldResultList;
  849 + }
  850 +
  851 + private List<AnalyzeResponse.AnalyzeToken> getAnalyzerResultWithMultiValues(String analyzer, String... value) {
  852 + IElasticsearchClient client = esClientMgr.getClient(ISearchConstants.INDEX_NAME_PRODUCT_INDEX);
  853 + List<AnalyzeResponse.AnalyzeToken> analyzeResponse = client.getAnalyzeResponseWithMultiValues(ISearchConstants.INDEX_NAME_PRODUCT_INDEX, analyzer, value).getTokens();
  854 + return analyzeResponse;
  855 + }
  856 +
  857 +
756 } 858 }
@@ -6,7 +6,7 @@ import java.util.List; @@ -6,7 +6,7 @@ import java.util.List;
6 /** 6 /**
7 * Created by ginozhang on 2017/2/13. 7 * Created by ginozhang on 2017/2/13.
8 */ 8 */
9 -public class SearchFieldResult { 9 +public class SearchFieldResult implements Comparable<SearchFieldResult>{
10 10
11 private String fieldName; 11 private String fieldName;
12 12
@@ -24,10 +24,19 @@ public class SearchFieldResult { @@ -24,10 +24,19 @@ public class SearchFieldResult {
24 24
25 private boolean matchResult; 25 private boolean matchResult;
26 26
  27 + @Override
  28 + public int compareTo(SearchFieldResult o) {
  29 + return o.boost - boost;
  30 + }
  31 +
27 public void addMatchToken(String searchToken) { 32 public void addMatchToken(String searchToken) {
28 matchTokens.add(searchToken); 33 matchTokens.add(searchToken);
29 } 34 }
30 35
  36 + public void addAllMatchToken(List<String> searchTokenList) {
  37 + matchTokens.addAll(searchTokenList);
  38 + }
  39 +
31 public List<String> getMatchTokens() { 40 public List<String> getMatchTokens() {
32 return matchTokens; 41 return matchTokens;
33 } 42 }