Authored by Gino Zhang

将searchExplainer移到service

  1 +package com.yoho.search.service.searchexplainer;
  2 +
  3 +/**
  4 + * Created by wangnan on 2016/11/17.
  5 + */
  6 +public class EsField {
  7 + private String fieldName;
  8 +
  9 + private String fieldType;
  10 +
  11 + public EsField() {
  12 + }
  13 +
  14 + public EsField(String fieldName, String fieldType) {
  15 + this.fieldName = fieldName;
  16 + this.fieldType = fieldType;
  17 + }
  18 +
  19 +
  20 + public String getFieldType() {
  21 + return fieldType;
  22 + }
  23 +
  24 + public void setFieldType(String fieldType) {
  25 + this.fieldType = fieldType;
  26 + }
  27 +
  28 + public String getFieldName() {
  29 + return fieldName;
  30 + }
  31 +
  32 + public void setFieldName(String fieldName) {
  33 + this.fieldName = fieldName;
  34 + }
  35 +}
  1 +package com.yoho.search.service.searchexplainer;
  2 +
  3 +import java.util.List;
  4 +
  5 +/**
  6 + * Created by ginozhang on 2016/11/16.
  7 + */
  8 +public class FieldMatch extends SearchMatch {
  9 + private String field;
  10 +
  11 + private String value;
  12 +
  13 + private List<String> indexTokens;
  14 +
  15 + public FieldMatch() {
  16 +
  17 + }
  18 +
  19 + public FieldMatch(String field, String value, String indexAnalyzer, List<String> indexTokens, String searchAnalyzer, List<String> searchTokens, boolean isMatch) {
  20 + super(isMatch, indexAnalyzer, searchAnalyzer, searchTokens);
  21 + this.field = field;
  22 + this.value = value;
  23 + this.indexTokens = indexTokens;
  24 + }
  25 +
  26 + public String getField() {
  27 + return field;
  28 + }
  29 +
  30 + public void setField(String field) {
  31 + this.field = field;
  32 + }
  33 +
  34 + public String getValue() {
  35 + return value;
  36 + }
  37 +
  38 + public void setValue(String value) {
  39 + this.value = value;
  40 + }
  41 +
  42 + public List<String> getIndexTokens() {
  43 + return indexTokens;
  44 + }
  45 +
  46 + public void setIndexTokens(List<String> indexTokens) {
  47 + this.indexTokens = indexTokens;
  48 + }
  49 +}
  1 +package com.yoho.search.service.searchexplainer;
  2 +
  3 +/**
  4 + * Created by wangnan on 2016/11/16.
  5 + */
  6 +public class FilterMatch {
  7 + private String field;
  8 +
  9 + private String inputValue;
  10 +
  11 + private String indexValue;
  12 +
  13 + private String matchType;
  14 +
  15 + private boolean isMatch = false;
  16 +
  17 +
  18 + public FilterMatch() {
  19 + }
  20 +
  21 + public FilterMatch(String field, String inputValue, String indexValue, String matchType, boolean isMatch) {
  22 + this.field = field;
  23 + this.inputValue = inputValue;
  24 + this.indexValue = indexValue;
  25 + this.matchType = matchType;
  26 + this.isMatch = isMatch;
  27 + }
  28 +
  29 + public String getField() {
  30 + return field;
  31 + }
  32 +
  33 + public void setField(String field) {
  34 + this.field = field;
  35 + }
  36 +
  37 + public String getInputValue() {
  38 + return inputValue;
  39 + }
  40 +
  41 + public void setInputValue(String inputValue) {
  42 + this.inputValue = inputValue;
  43 + }
  44 +
  45 + public String getIndexValue() {
  46 + return indexValue;
  47 + }
  48 +
  49 + public void setIndexValue(String indexValue) {
  50 + this.indexValue = indexValue;
  51 + }
  52 +
  53 + public boolean isMatch() {
  54 + return isMatch;
  55 + }
  56 +
  57 + public void setMatch(boolean match) {
  58 + isMatch = match;
  59 + }
  60 +
  61 + public String getMatchType() {
  62 + return matchType;
  63 + }
  64 +
  65 + public void setMatchType(String matchType) {
  66 + this.matchType = matchType;
  67 + }
  68 +
  69 + @Override
  70 + public String toString() {
  71 + return "FilterMatch{" +
  72 + "field='" + field + '\'' +
  73 + ", inputValue='" + inputValue + '\'' +
  74 + ", indexValue=" + indexValue + '\'' +
  75 + ", matchType=" + matchType + '\'' +
  76 + ", isMatch=" + isMatch +
  77 + '}';
  78 + }
  79 +}
  1 +package com.yoho.search.service.searchexplainer;
  2 +
  3 +import java.util.List;
  4 +
  5 +/**
  6 + * Created by ginozhang on 2016/12/22.
  7 + */
  8 +public class GroupMatch extends SearchMatch {
  9 +
  10 + private List<FieldMatch> fieldMatchList;
  11 +
  12 + public GroupMatch() {
  13 + }
  14 +
  15 + public GroupMatch(String indexAnalyzer, String searchAnalyzer, List<String> searchTokens, List<FieldMatch> fieldMatchList, boolean isMatch) {
  16 + super(isMatch, indexAnalyzer, searchAnalyzer, searchTokens);
  17 + this.fieldMatchList = fieldMatchList;
  18 + }
  19 +
  20 + public List<FieldMatch> getFieldMatchList() {
  21 + return fieldMatchList;
  22 + }
  23 +
  24 + public void setFieldMatchList(List<FieldMatch> fieldMatchList) {
  25 + this.fieldMatchList = fieldMatchList;
  26 + }
  27 +}
  1 +package com.yoho.search.service.searchexplainer;
  2 +
  3 +import com.yoho.search.base.utils.HttpServletRequestUtils;
  4 +import org.slf4j.Logger;
  5 +import org.slf4j.LoggerFactory;
  6 +import org.springframework.beans.factory.annotation.Autowired;
  7 +import org.springframework.stereotype.Controller;
  8 +import org.springframework.web.bind.annotation.RequestMapping;
  9 +import org.springframework.web.bind.annotation.RequestParam;
  10 +import org.springframework.web.bind.annotation.ResponseBody;
  11 +
  12 +import javax.servlet.http.HttpServletRequest;
  13 +import java.util.HashMap;
  14 +import java.util.Map;
  15 +
  16 +/**
  17 + * Created by ginozhang on 2016/11/16.
  18 + */
  19 +@Controller
  20 +public class SearchExplainerController {
  21 +
  22 + private static final Logger logger = LoggerFactory.getLogger(SearchExplainerController.class);
  23 +
  24 + @Autowired
  25 + private SearchExplainerService searchExplainerService;
  26 +
  27 + @RequestMapping(value = "/tools/explain")
  28 + @ResponseBody
  29 + public Map<String, Object> explain(HttpServletRequest request) {
  30 + Map<String, String> paramMap = HttpServletRequestUtils.transParamType(request);
  31 + try {
  32 + return searchExplainerService.explain(paramMap);
  33 + } catch (Throwable t) {
  34 + logger.error(t.getMessage(), t);
  35 + return errorResult(t.getMessage());
  36 + }
  37 + }
  38 +
  39 + @RequestMapping(value = "/tools/tokens")
  40 + @ResponseBody
  41 + public Map<String, Object> getTokens(@RequestParam String skn) {
  42 + try {
  43 + return searchExplainerService.getTokens(skn);
  44 + } catch (Throwable t) {
  45 + logger.error(t.getMessage(), t);
  46 + return errorResult(t.getMessage());
  47 + }
  48 + }
  49 +
  50 + private Map<String, Object> errorResult(String message) {
  51 + Map<String, Object> map = new HashMap<>();
  52 + map.put("code", "400");
  53 + map.put("message", message);
  54 + return map;
  55 + }
  56 +
  57 +}
  1 +package com.yoho.search.service.searchexplainer;
  2 +
  3 +import com.alibaba.fastjson.JSONArray;
  4 +import com.alibaba.fastjson.JSONObject;
  5 +import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
  6 +import com.google.common.cache.CacheBuilder;
  7 +import com.google.common.cache.CacheLoader;
  8 +import com.yoho.search.base.utils.ISearchConstants;
  9 +import com.yoho.search.core.es.IElasticsearchClient;
  10 +import com.yoho.search.core.es.model.SearchParam;
  11 +import com.yoho.search.core.es.model.SearchResult;
  12 +import com.yoho.search.service.service.ESClientMgr;
  13 +import org.apache.commons.collections.CollectionUtils;
  14 +import org.apache.commons.lang3.StringUtils;
  15 +import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
  16 +import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsResponse;
  17 +import org.elasticsearch.cluster.metadata.MappingMetaData;
  18 +import org.elasticsearch.common.collect.ImmutableOpenMap;
  19 +import org.elasticsearch.index.query.QueryBuilders;
  20 +import org.slf4j.Logger;
  21 +import org.slf4j.LoggerFactory;
  22 +import org.springframework.beans.factory.annotation.Autowired;
  23 +import org.springframework.stereotype.Service;
  24 +import org.springframework.util.Assert;
  25 +
  26 +import java.util.*;
  27 +import java.util.concurrent.ExecutionException;
  28 +import java.util.concurrent.TimeUnit;
  29 +import java.util.stream.Collectors;
  30 +
  31 +/**
  32 + * Created by ginozhang on 2016/11/16.
  33 + */
  34 +@Service
  35 +public class SearchExplainerService {
  36 +
  37 + private static final Logger logger = LoggerFactory.getLogger(SearchExplainerService.class);
  38 +
  39 + private static final String DEFAULT_ANALYZER = "keyword";
  40 +
  41 + private static final String MATCH_TYPE_STRING = "String Match";
  42 + private static final String MATCH_TYPE_RANGE = "Range Match";
  43 +
  44 + private static final String RETURN_TYPE_ALL = "all";
  45 + private static final String RETURN_TYPE_MATCH_ONLY = "match_only";
  46 +
  47 + // 拷贝search-service的search.default.field配置参数
  48 + // private static final String fieldsToSearch = Configuration.getString("search.default.field");
  49 +
  50 + @Autowired
  51 + private ESClientMgr esClientMgr;
  52 +
  53 + private volatile Map<String, FieldDesc> localFieldDescMap = null;
  54 +
  55 + private volatile Map<String, List<String>> localCopiedFieldMap = null;
  56 +
  57 + private volatile List<String> localMutilFields = null;
  58 +
  59 + //入参和索引中字段名的映射
  60 + public final static Map<String, EsField> fieldMap = new HashMap<String, EsField>() {{
  61 + //字符类
  62 + put("brand", new EsField("brandId", MATCH_TYPE_STRING));
  63 + put("shop", new EsField("shopId", MATCH_TYPE_STRING));
  64 + put("msort", new EsField("maxSortId", MATCH_TYPE_STRING));
  65 + put("misort", new EsField("middleSortId", MATCH_TYPE_STRING));
  66 + put("sort", new EsField("smallSortId", MATCH_TYPE_STRING));
  67 + put("color", new EsField("colorIds", MATCH_TYPE_STRING));
  68 + put("size", new EsField("sizeIds", MATCH_TYPE_STRING));
  69 + put("price", new EsField("salesPrice", MATCH_TYPE_STRING));
  70 + put("gender", new EsField("gender", MATCH_TYPE_STRING));
  71 + put("specialoffer", new EsField("specialoffer", MATCH_TYPE_STRING));
  72 + put("isdiscount", new EsField("isDiscount", MATCH_TYPE_STRING));
  73 + put("promotion", new EsField("ispromotion", MATCH_TYPE_STRING));
  74 + put("vdt", new EsField("vipDiscountType", MATCH_TYPE_STRING));
  75 + put("attribute", new EsField("attribute", MATCH_TYPE_STRING));
  76 + put("limited", new EsField("islimited", MATCH_TYPE_STRING));
  77 + put("new", new EsField("isnew", MATCH_TYPE_STRING));
  78 + put("outlets", new EsField("isOutlets", MATCH_TYPE_STRING));
  79 + put("status", new EsField("status", MATCH_TYPE_STRING));
  80 + put("style", new EsField("styleIds", MATCH_TYPE_STRING));
  81 + put("sell_channels", new EsField("sellChannels", MATCH_TYPE_STRING));
  82 + put("folder_id", new EsField("folder_id", MATCH_TYPE_STRING));
  83 + put("series_id", new EsField("series_id", MATCH_TYPE_STRING));
  84 + put("day", new EsField("shelveDay", MATCH_TYPE_STRING));
  85 + put("brand", new EsField("salesPrice", MATCH_TYPE_STRING));
  86 + put("shop", new EsField("salesPrice", MATCH_TYPE_STRING));
  87 + put("brand", new EsField("brandId", MATCH_TYPE_STRING));
  88 + put("shop", new EsField("shop", MATCH_TYPE_STRING));
  89 + put("brand", new EsField("brandId", MATCH_TYPE_STRING));
  90 + put("breaking", new EsField("breakingRate", MATCH_TYPE_STRING));
  91 + put("ageLevel", new EsField("ageLevel", MATCH_TYPE_STRING));
  92 + put("product_skn", new EsField("productSkn", MATCH_TYPE_STRING));
  93 + //范围类
  94 + put("stocknumber", new EsField("storageNum", MATCH_TYPE_RANGE));
  95 + put("p_d", new EsField("promotionDiscountInt", MATCH_TYPE_RANGE));
  96 + put("p_d_int", new EsField("promotionDiscountInt", MATCH_TYPE_RANGE));
  97 + put("first_shelve_time", new EsField("firstShelveTime", MATCH_TYPE_RANGE));
  98 + put("shelve_time", new EsField("shelveTime", MATCH_TYPE_RANGE));
  99 + }};
  100 +
  101 + public String fieldConvert(String paraField) {
  102 + EsField esField = fieldMap.get(paraField);
  103 + if (esField == null) {
  104 + return null;
  105 + }
  106 + return esField.getFieldName();
  107 + }
  108 +
  109 + public Map<String, Object> explain(Map<String, String> paramMap) throws Exception {
  110 + long start = System.currentTimeMillis();
  111 + logger.info("Begin to explain search. start: {}, paramMap: {}.", start, paramMap);
  112 + String skn = paramMap.get("skn");
  113 + if (StringUtils.isEmpty(skn)) {
  114 + Map<String, Object> map = new LinkedHashMap<>();
  115 + map.put("code", "400");
  116 + map.put("message", "skn is null");
  117 + return map;
  118 + }
  119 +
  120 + String returnType = paramMap.get("returnType");
  121 + if (returnType == null) {
  122 + returnType = RETURN_TYPE_MATCH_ONLY;
  123 + }
  124 +
  125 + String multiSearchType = paramMap.get("multiSearchType");
  126 + if (multiSearchType == null) {
  127 + multiSearchType = ISearchConstants.SEARCH_MULTIMATCHQUERY_TYPE;
  128 + }
  129 +
  130 + String keyword = paramMap.get("query");
  131 + logger.info("Begin to explain search keyword [{}] for product skn {}.", keyword, skn);
  132 +
  133 + // 1. 根据skn获取es中的数据
  134 + Map<String, Object> document = getDocumentBySkn(skn);
  135 + logger.info("Get the document for the product. document: \n{}", document);
  136 +
  137 + // 2.分析过滤的参数
  138 + Map<String, Object> explainResult = explainFilter(paramMap, document);
  139 +
  140 + if ((Boolean) explainResult.get("filter_match") && StringUtils.isNotEmpty(keyword)) {
  141 + List<SearchMatch> queryMatchList = explainMultiMatchQuery(document, keyword, returnType, multiSearchType);
  142 + explainResult.put("query_match", queryMatchList.stream().anyMatch(SearchMatch::isMatch));
  143 + explainResult.put("query_list", queryMatchList);
  144 + }
  145 +
  146 + logger.info("End to explain search keyword [{}] for product skn {}. cost: {}", keyword, skn, System.currentTimeMillis() - start);
  147 + return explainResult;
  148 + }
  149 +
  150 + private Map<String, Object> explainFilter(Map<String, String> paramMap, Map<String, Object> document) {
  151 + Map<String, Object> map = new LinkedHashMap<>();
  152 + List<String> warns = new ArrayList<>();
  153 + boolean isFilterMatch = true;
  154 + List<FilterMatch> filterMatchList = new ArrayList<>();
  155 + for (Map.Entry<String, String> entry : paramMap.entrySet()) {
  156 + String key = entry.getKey();
  157 + if (key.equals("query") || key.equals("skn") || key.equals("returnType")) {
  158 + continue;
  159 + }
  160 +
  161 + if (!fieldMap.containsKey(key)) {
  162 + warns.add("parameter " + key + " is invalid\n");
  163 + continue;
  164 + }
  165 +
  166 + Object value = document.get(fieldConvert(key));
  167 + if (value != null) {
  168 + String paraValue = value.toString();
  169 + EsField esField = fieldMap.get(key);
  170 + //将入参和字段值都转化为字符数组
  171 + String[] paraArray = entry.getValue().split(",");
  172 + String[] indexArray = paraValue.split(",");
  173 + Set<String> indexSet = new HashSet<String>();
  174 + for (String i : indexArray) {
  175 + indexSet.add(i);
  176 + }
  177 + //字符匹配比较
  178 + if (esField.getFieldType().equals(MATCH_TYPE_STRING)) {
  179 + for (String p : paraArray) {
  180 + if (!indexSet.contains(p)) {
  181 + filterMatchList.add(new FilterMatch(key, entry.getValue(), paraValue, esField.getFieldType(), false));
  182 + isFilterMatch = false;
  183 + }
  184 + }
  185 + }
  186 + //取值范围比较
  187 + if (esField.getFieldType().equals(MATCH_TYPE_RANGE)) {
  188 + if (key.equals("stocknumber")) {
  189 + if (Integer.valueOf(value.toString()) < 1) {
  190 + filterMatchList.add(new FilterMatch(key, entry.getValue(), paraValue, esField.getFieldType(), false));
  191 + isFilterMatch = false;
  192 + }
  193 + } else if (paraArray.length == 2 && indexArray.length == 1) {
  194 + Integer paraRangeStart = Integer.valueOf(paraArray[0]);
  195 + Integer paraRangeEnd = Integer.valueOf(paraArray[1]);
  196 + Integer indexValue = Integer.valueOf(indexArray[0]);
  197 + if (indexValue < paraRangeStart || indexValue > paraRangeEnd) {
  198 + filterMatchList.add(new FilterMatch(key, entry.getValue(), paraValue, esField.getFieldType(), false));
  199 + isFilterMatch = false;
  200 + }
  201 + }
  202 + }
  203 + }
  204 + }
  205 +
  206 + map.put("code", "200");
  207 + map.put("message", "search explainer");
  208 + if (CollectionUtils.isNotEmpty(warns)) {
  209 + map.put("warn", warns);
  210 + }
  211 + map.put("filter_match", isFilterMatch);
  212 + map.put("filter_list", filterMatchList);
  213 +
  214 + return map;
  215 + }
  216 +
  217 + private List<SearchMatch> explainMultiMatchQuery(Map<String, Object> document, String keyword, String returnType, String multiSearchType) throws Exception {
  218 + // 3. 解析mapping文件获取字段元数据
  219 + Map<String, FieldDesc> fieldDescMap = parseMapping();
  220 + logger.info("Get the field description by mapping. fieldDescMap: \n{}", fieldDescMap);
  221 +
  222 + // 4. 处理多字段和copy_to的字段并赋值
  223 + processMultiFields(fieldDescMap, document);
  224 + processCopiedFields(fieldDescMap, document);
  225 + logger.info("Set the value of copied field succeeded.");
  226 +
  227 + // 5. 分析每个multi-match的每个字段 根据分词结果判定是否匹配
  228 + List<String> sortedSearchFields = getSortedSearchFields();
  229 + logger.info("Get the search fields. fields: {}", sortedSearchFields);
  230 +
  231 + String searchType = ISearchConstants.SEARCH_MULTIMATCHQUERY_TYPE;
  232 + if ("cross_fields".equalsIgnoreCase(multiSearchType)) {
  233 + return explainCrossFieldsQuery(document, keyword, returnType, fieldDescMap, sortedSearchFields);
  234 + } else {
  235 + return explainBestFieldsQuery(document, keyword, returnType, fieldDescMap, sortedSearchFields);
  236 + }
  237 + }
  238 +
  239 + private List explainBestFieldsQuery(Map<String, Object> document, String keyword, String returnType, Map<String, FieldDesc> fieldDescMap, List<String> sortedSearchFields) throws Exception {
  240 + boolean isQueryMatch = false;
  241 + boolean isFieldQueryMatch = false;
  242 + List<FieldMatch> queryMatchList = new ArrayList<>();
  243 + for (String field : sortedSearchFields) {
  244 + String value = document.get(field) != null ? document.get(field).toString() : null;
  245 + FieldDesc fieldDesc = fieldDescMap.get(field);
  246 + Assert.notNull(fieldDesc, "no field " + field + " defined.");
  247 + List<String> analyzerResult = getAnalyzerResult(fieldDesc, value);
  248 + List<String> keywordAnalyzerResult = getKeyWordAnalyzerResult(fieldDesc, keyword);
  249 + isFieldQueryMatch = judgeMatch(analyzerResult, keywordAnalyzerResult);
  250 + isQueryMatch = isFieldQueryMatch || isQueryMatch;
  251 + //如果是匹配模式则只返回匹配的字段,如果是ALL模式则返回索引字段
  252 + if (returnType.equals(RETURN_TYPE_ALL) || isFieldQueryMatch) {
  253 + queryMatchList.add(new FieldMatch(field, value, fieldDesc.analyzer, analyzerResult, fieldDesc.search_analyzer, keywordAnalyzerResult, isFieldQueryMatch));
  254 + }
  255 +
  256 + if (returnType.equals(RETURN_TYPE_MATCH_ONLY) && isFieldQueryMatch) {
  257 + // 找到匹配的就返回
  258 + break;
  259 + }
  260 + }
  261 +
  262 + return queryMatchList;
  263 + }
  264 +
  265 + private List explainCrossFieldsQuery(Map<String, Object> document, String keyword, String returnType, Map<String, FieldDesc> fieldDescMap, List<String> sortedSearchFields) throws Exception {
  266 + List<GroupMatch> groupMatchList = new ArrayList<>();
  267 + List<FieldDesc> searchfieldDescList = new ArrayList<>();
  268 + fieldDescMap.forEach((field, fieldDesc) -> {
  269 + if (sortedSearchFields.contains(field)) {
  270 + if (StringUtils.isEmpty(fieldDesc.analyzer)) {
  271 + fieldDesc.analyzer = DEFAULT_ANALYZER;
  272 + }
  273 + if (StringUtils.isEmpty(fieldDesc.search_analyzer)) {
  274 + fieldDesc.search_analyzer = fieldDesc.analyzer;
  275 + }
  276 +
  277 + searchfieldDescList.add(fieldDesc);
  278 + }
  279 + });
  280 +
  281 +
  282 + // 根据search_analyzer进行分组
  283 + Map<String, List<FieldDesc>> groupMap = searchfieldDescList.stream().collect(Collectors.groupingBy(fieldDesc -> fieldDesc.search_analyzer));
  284 + for (Map.Entry<String, List<FieldDesc>> groupEntry : groupMap.entrySet()) {
  285 + // 对于cross_field来说 必须所有输入的token在同一组内
  286 + String searchAnalyzer = groupEntry.getValue().get(0).search_analyzer;
  287 + List<String> keywordAnalyzerResult = getAnalyzerTokens(keyword, searchAnalyzer);
  288 + List<String> sortedFieldDescNames = groupEntry.getValue().stream().map(fieldDesc -> fieldDesc.field).collect(Collectors.toList());
  289 + boolean isMatch = true;
  290 + List<FieldMatch> fieldMatchList = new ArrayList<>();
  291 + for (String token : keywordAnalyzerResult) {
  292 + boolean isTokenMatch = false;
  293 + for (String field : sortedSearchFields) {
  294 + // 检查field是否匹配token
  295 + if (!sortedFieldDescNames.contains(field)) {
  296 + continue;
  297 + }
  298 +
  299 + String value = document.get(field) != null ? document.get(field).toString() : null;
  300 + FieldDesc fieldDesc = fieldDescMap.get(field);
  301 + Assert.notNull(fieldDesc, "for " + field);
  302 + List<String> analyzerResult = getAnalyzerResult(fieldDesc, value);
  303 + isTokenMatch = analyzerResult.contains(token);
  304 + if (returnType.equals(RETURN_TYPE_ALL) || isTokenMatch) {
  305 + fieldMatchList.add(new FieldMatch(field, value, fieldDesc.analyzer, analyzerResult, fieldDesc.search_analyzer, Arrays.asList(token), isTokenMatch));
  306 + }
  307 +
  308 + if (returnType.equals(RETURN_TYPE_MATCH_ONLY) && isTokenMatch) {
  309 + // 找到匹配的就返回
  310 + break;
  311 + }
  312 + }
  313 +
  314 + isMatch = isMatch && isTokenMatch;
  315 + }
  316 +
  317 + if (returnType.equals(RETURN_TYPE_ALL) || isMatch) {
  318 + groupMatchList.add(new GroupMatch("", searchAnalyzer, keywordAnalyzerResult, fieldMatchList, isMatch));
  319 + }
  320 + }
  321 +
  322 + return groupMatchList;
  323 + }
  324 +
  325 + public Map<String, Object> getTokens(String skn) throws Exception {
  326 + long start = System.currentTimeMillis();
  327 + logger.info("Begin to get tokens for product skn {}.", skn);
  328 +
  329 + // 1. 根据skn获取es中的数据
  330 + Map<String, Object> document = getDocumentBySkn(skn);
  331 + logger.info("Get the document for the product. document: \n{}", document);
  332 +
  333 + // 2. 解析mapping文件获取字段元数据
  334 + Map<String, FieldDesc> fieldDescMap = parseMapping();
  335 + logger.info("Get the field description by mapping. fieldDescMap: \n{}", fieldDescMap);
  336 +
  337 + // 3. 处理多字段和copy_to的字段并赋值
  338 + processMultiFields(fieldDescMap, document);
  339 + processCopiedFields(fieldDescMap, document);
  340 + logger.info("Set the value of copied field succeeded.");
  341 +
  342 + // 4. 分析每个multi-match的每个字段 根据权重优先级展示token列表
  343 + List<String> sortedSearchFields = getSortedSearchFields();
  344 + logger.info("Get the search fields. fields: {}", sortedSearchFields);
  345 + boolean isMatch = false;
  346 + List<String> tokens = new ArrayList<>();
  347 + for (String field : sortedSearchFields) {
  348 + String value = document.get(field) != null ? document.get(field).toString() : null;
  349 + FieldDesc fieldDesc = fieldDescMap.get(field);
  350 + Assert.notNull(fieldDesc, "no field " + field + " defined.");
  351 + List<String> analyzerResult = getAnalyzerResult(fieldDesc, value);
  352 + if (CollectionUtils.isNotEmpty(analyzerResult)) {
  353 + analyzerResult.removeAll(tokens);
  354 + tokens.addAll(analyzerResult);
  355 + }
  356 + }
  357 +
  358 + Map<String, Object> map = new HashMap<>();
  359 + map.put("code", "200");
  360 + map.put("message", "product tokens.");
  361 + map.put("data", tokens);
  362 + logger.info("End to get tokens for product skn {}. cost: {}", skn, System.currentTimeMillis() - start);
  363 + return map;
  364 + }
  365 +
  366 + private boolean judgeMatch(List<String> analyzerResult, List<String> keywordAnalyzerResult) {
  367 + // 对于best_fields必须包含所有token
  368 + return analyzerResult.containsAll(keywordAnalyzerResult);
  369 + }
  370 +
  371 + private List<String> getKeyWordAnalyzerResult(FieldDesc fieldDesc, String keyword) throws ExecutionException {
  372 + if (fieldDesc.noNeedAnalyzer()) {
  373 + return Arrays.asList(keyword);
  374 + }
  375 +
  376 + String searchAnalyzer = fieldDesc.search_analyzer;
  377 + if (StringUtils.isEmpty(searchAnalyzer)) {
  378 + searchAnalyzer = fieldDesc.analyzer;
  379 + }
  380 + if (StringUtils.isEmpty(searchAnalyzer)) {
  381 + searchAnalyzer = DEFAULT_ANALYZER;
  382 + }
  383 + return getAnalyzerTokens(keyword, searchAnalyzer);
  384 + }
  385 +
  386 + private List<String> getAnalyzerResult(FieldDesc fieldDesc, String value) throws ExecutionException {
  387 + if (StringUtils.isEmpty(value)) {
  388 + return new ArrayList<>();
  389 + }
  390 +
  391 + if (fieldDesc.noNeedAnalyzer()) {
  392 + return Arrays.asList(value);
  393 + }
  394 +
  395 + String analyzer = fieldDesc.analyzer;
  396 + if (StringUtils.isEmpty(analyzer)) {
  397 + analyzer = DEFAULT_ANALYZER;
  398 + }
  399 +
  400 + return getAnalyzerTokens(value, analyzer);
  401 + }
  402 +
  403 + private List<String> getAnalyzerTokens(String text, String analyzer) throws ExecutionException {
  404 + // 使用guava的本地缓存
  405 + return CacheBuilder.newBuilder()
  406 + .maximumSize(1000)
  407 + .expireAfterWrite(3, TimeUnit.MINUTES)
  408 + .build(
  409 + new CacheLoader<String, List<String>>() {
  410 + public List<String> load(String key) {
  411 + String tempAnalyzer = key.split("@")[0];
  412 + String tempText = key.split("@")[1];
  413 +
  414 + if ("keyword".equals(analyzer)) {
  415 + // keyword 分词就不去调用ES
  416 + return Arrays.asList(tempText);
  417 + }
  418 +
  419 + IElasticsearchClient client = esClientMgr.getClient(ISearchConstants.INDEX_NAME_PRODUCT_INDEX);
  420 + List<AnalyzeResponse.AnalyzeToken> list = client.getAnalyzeResponse(ISearchConstants.INDEX_NAME_PRODUCT_INDEX, text, analyzer).getTokens();
  421 + return list.stream().map(item -> item.getTerm()).collect(Collectors.toList());
  422 + }
  423 + }).get(analyzer + "@" + text);
  424 + }
  425 +
  426 +
  427 + private Map<String, Object> getDocumentBySkn(String skn) {
  428 + IElasticsearchClient client = esClientMgr.getClient(ISearchConstants.INDEX_NAME_PRODUCT_INDEX);
  429 + if (!client.indexExists(ISearchConstants.INDEX_NAME_PRODUCT_INDEX)) {
  430 + throw new RuntimeException("index not exist");
  431 + }
  432 +
  433 + SearchParam searchParam = new SearchParam();
  434 + searchParam.setQuery(QueryBuilders.termQuery("productSkn", skn));
  435 + SearchResult searchResult = client.search(ISearchConstants.INDEX_NAME_PRODUCT_INDEX, ISearchConstants.INDEX_NAME_PRODUCT_INDEX, searchParam);
  436 + if (searchResult == null || searchResult.getResultList() == null || searchResult.getResultList().isEmpty()) {
  437 + throw new RuntimeException("product skn not exist");
  438 + }
  439 +
  440 + return searchResult.getResultList().get(0);
  441 + }
  442 +
  443 + private Map<String, FieldDesc> parseMapping() {
  444 + if (localFieldDescMap != null) {
  445 + return localFieldDescMap;
  446 + }
  447 +
  448 + synchronized (this) {
  449 + if (localFieldDescMap != null) {
  450 + // double check
  451 + return localFieldDescMap;
  452 + }
  453 +
  454 + // 从ES获取mapping
  455 + IElasticsearchClient client = esClientMgr.getClient(ISearchConstants.INDEX_NAME_PRODUCT_INDEX);
  456 + GetMappingsResponse response = client.getMapping(ISearchConstants.INDEX_NAME_PRODUCT_INDEX, ISearchConstants.INDEX_NAME_PRODUCT_INDEX);
  457 + Assert.notNull(response, ISearchConstants.INDEX_NAME_PRODUCT_INDEX);
  458 + ImmutableOpenMap<String, MappingMetaData> mapping = response.getMappings().get(ISearchConstants.INDEX_NAME_PRODUCT_INDEX);
  459 + Iterator<ObjectObjectCursor<String, MappingMetaData>> iterator = mapping.iterator();
  460 + while(iterator.hasNext())
  461 + {
  462 + ObjectObjectCursor<String, MappingMetaData> object = iterator.next();
  463 + //object.value.getSourceAsMap();
  464 + }
  465 +
  466 +// String mappingContent = yohoIndexService.getIndex(ISearchConstants.INDEX_NAME_PRODUCT_INDEX).getMappingContent();
  467 +// JSONObject jsonObject = JSON.parseObject(mappingContent);
  468 +// JSONObject mapping = jsonObject.getJSONObject(ISearchConstants.INDEX_NAME_PRODUCT_INDEX).getJSONObject("properties");
  469 +// Map<String, FieldDesc> fieldDescMap = new HashMap<>();
  470 +// for (String field : mapping.keySet()) {
  471 +// parseToFieldDesc(fieldDescMap, field, mapping.getJSONObject(field));
  472 +// }
  473 +
  474 +// localFieldDescMap = fieldDescMap;
  475 + return localFieldDescMap;
  476 + }
  477 + }
  478 +
  479 + private void parseToFieldDesc(Map<String, FieldDesc> fieldDescMap, String field, JSONObject jsonObject) {
  480 + if (jsonObject.containsKey("fields")) {
  481 + // 多字段类型
  482 + JSONObject innerJsonObject = jsonObject.getJSONObject("fields");
  483 + for (String innerField : innerJsonObject.keySet()) {
  484 + String realFieldName = innerField;
  485 + if (!innerField.equals(field)) {
  486 + realFieldName = field + "." + innerField;
  487 + }
  488 +
  489 + parseSingle(fieldDescMap, realFieldName, innerJsonObject.getJSONObject(innerField));
  490 + }
  491 + } else if (jsonObject.containsKey("properties")) {
  492 + // nested object类型 搜索暂不支持查询内嵌对象的字段
  493 + } else {
  494 + parseSingle(fieldDescMap, field, jsonObject);
  495 + }
  496 + }
  497 +
  498 + private void parseSingle(Map<String, FieldDesc> fieldDescMap, String field, JSONObject jsonObject) {
  499 + FieldDesc fieldDesc = new FieldDesc();
  500 + fieldDesc.field = field;
  501 + fieldDesc.type = jsonObject.getString("type");
  502 + fieldDesc.index = jsonObject.getString("index");
  503 + fieldDesc.analyzer = jsonObject.getString("analyzer");
  504 + fieldDesc.search_analyzer = jsonObject.getString("search_analyzer");
  505 + Object copyTo = jsonObject.get("copy_to");
  506 + if (copyTo != null && copyTo instanceof JSONArray) {
  507 + JSONArray jsonArray = (JSONArray) copyTo;
  508 + for (int i = 0; i < jsonArray.size(); i++) {
  509 + fieldDesc.addCopyTo(jsonArray.getString(i));
  510 + }
  511 + } else if (copyTo != null && copyTo instanceof JSONObject) {
  512 + fieldDesc.addCopyTo((String) copyTo);
  513 + }
  514 +
  515 + fieldDescMap.put(field, fieldDesc);
  516 + }
  517 +
  518 + private void processMultiFields(Map<String, FieldDesc> fieldDescMap, Map<String, Object> document) {
  519 + for (String field : fieldDescMap.keySet()) {
  520 + String[] parts = field.split("\\.");
  521 + if (parts != null && parts.length == 2) {
  522 + document.put(field, document.get(parts[0]));
  523 + }
  524 + }
  525 + }
  526 +
  527 + private void processCopiedFields(Map<String, FieldDesc> fieldDescMap, Map<String, Object> document) {
  528 + Map<String, List<String>> copiedFieldMap = getCopiedFieldMap(fieldDescMap);
  529 + for (Map.Entry<String, List<String>> entry : copiedFieldMap.entrySet()) {
  530 + String copiedField = entry.getKey();
  531 + StringBuffer sb = new StringBuffer();
  532 + for (String field : entry.getValue()) {
  533 + if (document.get(field) != null) {
  534 + sb.append(document.get(field)).append(" ");
  535 + }
  536 + }
  537 +
  538 + document.put(copiedField, sb.toString());
  539 + }
  540 + }
  541 +
  542 + private Map<String, List<String>> getCopiedFieldMap(Map<String, FieldDesc> fieldDescMap) {
  543 + if (localCopiedFieldMap != null) {
  544 + return localCopiedFieldMap;
  545 + }
  546 +
  547 + synchronized (this) {
  548 + if (localCopiedFieldMap != null) {
  549 + // double check
  550 + return localCopiedFieldMap;
  551 + }
  552 +
  553 + Map<String, List<String>> copiedFieldMap = new HashMap<>();
  554 + for (Map.Entry<String, FieldDesc> entry : fieldDescMap.entrySet()) {
  555 + for (String dest : entry.getValue().copy_to) {
  556 + List<String> list = copiedFieldMap.get(dest);
  557 + if (list == null) {
  558 + list = new ArrayList<>();
  559 + copiedFieldMap.put(dest, list);
  560 + }
  561 +
  562 + list.add(entry.getKey());
  563 + }
  564 + }
  565 +
  566 + localCopiedFieldMap = copiedFieldMap;
  567 + return localCopiedFieldMap;
  568 + }
  569 + }
  570 +
  571 + private List<String> getSortedSearchFields() {
  572 + if (localMutilFields != null) {
  573 + return localMutilFields;
  574 + }
  575 +
  576 + synchronized (this) {
  577 + if (localMutilFields != null) {
  578 + // double check
  579 + return localMutilFields;
  580 + }
  581 +
  582 + List<FieldWithBoost> list = new ArrayList<>();
  583 + List<String> fields = ISearchConstants.SEARCH_DEFAULT_FIELD;
  584 + for (String field : fields) {
  585 + String[] fieldBoost = field.split("\\^");
  586 + if (fieldBoost.length == 2) {
  587 + list.add(new FieldWithBoost(fieldBoost[0], Integer.valueOf(fieldBoost[1])));
  588 + } else if (fieldBoost.length == 1) {
  589 + list.add(new FieldWithBoost(fieldBoost[0], 1));
  590 + }
  591 + }
  592 +
  593 + Collections.sort(list);
  594 + List<String> result = new ArrayList<>();
  595 + for (FieldWithBoost item : list) {
  596 + result.add(item.fieldName);
  597 + }
  598 +
  599 + localMutilFields = result;
  600 + return localMutilFields;
  601 + }
  602 + }
  603 +
  604 + static class FieldWithBoost implements Comparable {
  605 + int boost;
  606 + String fieldName;
  607 +
  608 + public FieldWithBoost(String fieldName, int boost) {
  609 + this.boost = boost;
  610 + this.fieldName = fieldName;
  611 + }
  612 +
  613 + @Override
  614 + public int compareTo(Object o) {
  615 + return ((FieldWithBoost) o).boost - boost;
  616 + }
  617 + }
  618 +
  619 + static class FieldDesc {
  620 + String field;
  621 +
  622 + String type;
  623 +
  624 + String index;
  625 +
  626 + String analyzer;
  627 +
  628 + String search_analyzer;
  629 +
  630 + Set<String> copy_to = new HashSet<>();
  631 +
  632 + public void addCopyTo(String item) {
  633 + copy_to.add(item);
  634 + }
  635 +
  636 + public boolean noNeedAnalyzer() {
  637 + // 声明不分词的类型 或者 其他非string类型
  638 + return "not_analyzed".equals(index) || (type != null && !"string".equals(type));
  639 + }
  640 +
  641 + @Override
  642 + public String toString() {
  643 + return "FieldDesc{" +
  644 + "type='" + type + '\'' +
  645 + ", index='" + index + '\'' +
  646 + ", analyzer='" + analyzer + '\'' +
  647 + ", search_analyzer='" + search_analyzer + '\'' +
  648 + '}';
  649 + }
  650 + }
  651 +}
  1 +package com.yoho.search.service.searchexplainer;
  2 +
  3 +import java.util.List;
  4 +
  5 +/**
  6 + * Created by ginozhang on 2016/12/22.
  7 + */
  8 +public class SearchMatch {
  9 +
  10 + private boolean isMatch = false;
  11 +
  12 + private String indexAnalyzer;
  13 +
  14 + private String searchAnalyzer;
  15 +
  16 + private List<String> searchTokens;
  17 +
  18 + public SearchMatch() {
  19 + }
  20 +
  21 + public SearchMatch(boolean isMatch, String indexAnalyzer, String searchAnalyzer, List<String> searchTokens) {
  22 + this.isMatch = isMatch;
  23 + this.indexAnalyzer = indexAnalyzer;
  24 + this.searchAnalyzer = searchAnalyzer;
  25 + this.searchTokens = searchTokens;
  26 + }
  27 +
  28 + public String getIndexAnalyzer() {
  29 + return indexAnalyzer;
  30 + }
  31 +
  32 + public void setIndexAnalyzer(String indexAnalyzer) {
  33 + this.indexAnalyzer = indexAnalyzer;
  34 + }
  35 +
  36 + public String getSearchAnalyzer() {
  37 + return searchAnalyzer;
  38 + }
  39 +
  40 + public void setSearchAnalyzer(String searchAnalyzer) {
  41 + this.searchAnalyzer = searchAnalyzer;
  42 + }
  43 +
  44 + public boolean isMatch() {
  45 + return isMatch;
  46 + }
  47 +
  48 + public void setMatch(boolean match) {
  49 + isMatch = match;
  50 + }
  51 +
  52 + public List<String> getSearchTokens() {
  53 + return searchTokens;
  54 + }
  55 +
  56 + public void setSearchTokens(List<String> searchTokens) {
  57 + this.searchTokens = searchTokens;
  58 + }
  59 +}