...
|
...
|
@@ -6,12 +6,10 @@ import com.google.common.cache.CacheLoader; |
|
|
import com.yoho.search.base.utils.ISearchConstants;
|
|
|
import com.yoho.search.base.utils.ProductIndexEsField;
|
|
|
import com.yoho.search.common.ESClientMgr;
|
|
|
import com.yoho.search.common.SearchServiceConfiger;
|
|
|
import com.yoho.search.core.es.IElasticsearchClient;
|
|
|
import com.yoho.search.core.es.model.SearchField;
|
|
|
import com.yoho.search.core.es.model.SearchParam;
|
|
|
import com.yoho.search.core.es.model.SearchResult;
|
|
|
import com.yoho.search.service.helper.SearchQueryHelper;
|
|
|
import com.yoho.search.service.index.SearchFieldBoostConfigService;
|
|
|
import org.apache.commons.collections.CollectionUtils;
|
|
|
import org.apache.commons.lang3.StringUtils;
|
...
|
...
|
@@ -39,30 +37,19 @@ public class SearchExplainerService { |
|
|
|
|
|
private static final Logger logger = LoggerFactory.getLogger(SearchExplainerService.class);
|
|
|
|
|
|
|
|
|
@Autowired
|
|
|
private ESClientMgr esClientMgr;
|
|
|
@Autowired
|
|
|
private SearchServiceConfiger searchServiceConfiger;
|
|
|
@Autowired
|
|
|
private SearchFieldBoostConfigService searchFieldBoostConfigService;
|
|
|
|
|
|
private static final String DEFAULT_ANALYZER = "keyword";
|
|
|
|
|
|
private static final String MATCH_TYPE_STRING = "String Match";
|
|
|
private static final String MATCH_TYPE_RANGE = "Range Match";
|
|
|
|
|
|
private static final String RETURN_TYPE_ALL = "all";
|
|
|
private static final String RETURN_TYPE_MATCH_ONLY = "match_only";
|
|
|
|
|
|
|
|
|
private volatile Map<String, FieldDesc> localFieldDescMap = null;
|
|
|
|
|
|
private volatile Map<String, List<String>> localCopiedFieldMap = null;
|
|
|
|
|
|
// private volatile List<String> localMutilFields = null;
|
|
|
//
|
|
|
// private volatile List<FieldWithBoost> localMutilFieldWithBoostList = null;
|
|
|
|
|
|
|
|
|
// 入参和索引中字段名的映射
|
|
|
@SuppressWarnings("serial")
|
...
|
...
|
@@ -108,21 +95,6 @@ public class SearchExplainerService { |
|
|
}
|
|
|
};
|
|
|
|
|
|
public String fieldConvert(String paraField) {
|
|
|
EsField esField = fieldMap.get(paraField);
|
|
|
if (esField == null) {
|
|
|
return null;
|
|
|
}
|
|
|
return esField.getFieldName();
|
|
|
}
|
|
|
|
|
|
public void clearCachedData() {
|
|
|
// this.localMutilFields = null;
|
|
|
// this.localMutilFieldWithBoostList = null;
|
|
|
this.localCopiedFieldMap = null;
|
|
|
this.localFieldDescMap = null;
|
|
|
}
|
|
|
|
|
|
public Map<String, Object> show(Map<String, String> paramMap) throws Exception {
|
|
|
Map<String, Object> map = new LinkedHashMap<>();
|
|
|
String skn = paramMap.get("skn");
|
...
|
...
|
@@ -214,272 +186,10 @@ public class SearchExplainerService { |
|
|
return searchFieldResult;
|
|
|
}
|
|
|
|
|
|
public Map<String, Object> explain(Map<String, String> paramMap) throws Exception {
|
|
|
long start = System.currentTimeMillis();
|
|
|
logger.info("Begin to explain search. start: {}, paramMap: {}.", start, paramMap);
|
|
|
String skn = paramMap.get("skn");
|
|
|
if (StringUtils.isEmpty(skn)) {
|
|
|
Map<String, Object> map = new LinkedHashMap<>();
|
|
|
map.put("code", "400");
|
|
|
map.put("message", "skn is null");
|
|
|
return map;
|
|
|
}
|
|
|
|
|
|
String returnType = paramMap.get("returnType");
|
|
|
if (returnType == null) {
|
|
|
returnType = RETURN_TYPE_MATCH_ONLY;
|
|
|
}
|
|
|
|
|
|
String multiSearchType = paramMap.get("multiSearchType");
|
|
|
if (multiSearchType == null) {
|
|
|
multiSearchType = searchServiceConfiger.getSearchMultiMatchQueryType();
|
|
|
}
|
|
|
|
|
|
String keyword = paramMap.get("query");
|
|
|
logger.info("Begin to explain search keyword [{}] for product skn {}.", keyword, skn);
|
|
|
|
|
|
// 1. 根据skn获取es中的数据
|
|
|
Map<String, Object> document = getDocumentBySkn(skn);
|
|
|
logger.info("Get the document for the product. document: \n{}", document);
|
|
|
|
|
|
// 2.分析过滤的参数
|
|
|
Map<String, Object> explainResult = explainFilter(paramMap, document);
|
|
|
|
|
|
if ((Boolean) explainResult.get("filter_match") && StringUtils.isNotEmpty(keyword)) {
|
|
|
List<SearchMatch> queryMatchList = explainMultiMatchQuery(document, keyword, returnType, multiSearchType);
|
|
|
explainResult.put("query_match", queryMatchList.stream().anyMatch(SearchMatch::isMatch));
|
|
|
explainResult.put("query_list", queryMatchList);
|
|
|
}
|
|
|
|
|
|
logger.info("End to explain search keyword [{}] for product skn {}. cost: {}", keyword, skn, System.currentTimeMillis() - start);
|
|
|
return explainResult;
|
|
|
}
|
|
|
|
|
|
private Map<String, Object> explainFilter(Map<String, String> paramMap, Map<String, Object> document) {
|
|
|
Map<String, Object> map = new LinkedHashMap<>();
|
|
|
List<String> warns = new ArrayList<>();
|
|
|
boolean isFilterMatch = true;
|
|
|
List<FilterMatch> filterMatchList = new ArrayList<>();
|
|
|
for (Map.Entry<String, String> entry : paramMap.entrySet()) {
|
|
|
String key = entry.getKey();
|
|
|
if (key.equals("query") || key.equals("skn") || key.equals("returnType")) {
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
if (!fieldMap.containsKey(key)) {
|
|
|
warns.add("parameter " + key + " is invalid\n");
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
Object value = document.get(fieldConvert(key));
|
|
|
if (value != null) {
|
|
|
String paraValue = value.toString();
|
|
|
EsField esField = fieldMap.get(key);
|
|
|
// 将入参和字段值都转化为字符数组
|
|
|
String[] paraArray = entry.getValue().split(",");
|
|
|
String[] indexArray = paraValue.split(",");
|
|
|
Set<String> indexSet = new HashSet<String>(indexArray.length);
|
|
|
for (String i : indexArray) {
|
|
|
indexSet.add(i);
|
|
|
}
|
|
|
// 字符匹配比较
|
|
|
if (MATCH_TYPE_STRING.equals(esField.getFieldType())) {
|
|
|
for (String p : paraArray) {
|
|
|
if (!indexSet.contains(p)) {
|
|
|
filterMatchList.add(new FilterMatch(key, entry.getValue(), paraValue, esField.getFieldType(), false));
|
|
|
isFilterMatch = false;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
// 取值范围比较
|
|
|
if (MATCH_TYPE_RANGE.equals(esField.getFieldType())) {
|
|
|
if (key.equals("stocknumber")) {
|
|
|
if (Integer.valueOf(value.toString()) < 1) {
|
|
|
filterMatchList.add(new FilterMatch(key, entry.getValue(), paraValue, esField.getFieldType(), false));
|
|
|
isFilterMatch = false;
|
|
|
}
|
|
|
} else if (paraArray.length == 2 && indexArray.length == 1) {
|
|
|
Integer paraRangeStart = Integer.valueOf(paraArray[0]);
|
|
|
Integer paraRangeEnd = Integer.valueOf(paraArray[1]);
|
|
|
Integer indexValue = Integer.valueOf(indexArray[0]);
|
|
|
if (indexValue < paraRangeStart || indexValue > paraRangeEnd) {
|
|
|
filterMatchList.add(new FilterMatch(key, entry.getValue(), paraValue, esField.getFieldType(), false));
|
|
|
isFilterMatch = false;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
map.put("code", "200");
|
|
|
map.put("message", "search explainer");
|
|
|
if (CollectionUtils.isNotEmpty(warns)) {
|
|
|
map.put("warn", warns);
|
|
|
}
|
|
|
map.put("filter_match", isFilterMatch);
|
|
|
map.put("filter_list", filterMatchList);
|
|
|
|
|
|
return map;
|
|
|
}
|
|
|
|
|
|
private List<SearchMatch> explainMultiMatchQuery(Map<String, Object> document, String keyword, String returnType, String multiSearchType) throws Exception {
|
|
|
// 3. 解析mapping文件获取字段元数据
|
|
|
Map<String, FieldDesc> fieldDescMap = parseMapping();
|
|
|
logger.info("Get the field description by mapping. fieldDescMap: \n{}", fieldDescMap);
|
|
|
|
|
|
// 4. 处理多字段和copy_to的字段并赋值
|
|
|
processMultiFields(fieldDescMap, document);
|
|
|
processCopiedFields(fieldDescMap, document);
|
|
|
logger.info("Set the value of copied field succeeded.");
|
|
|
|
|
|
// 5. 分析每个multi-match的每个字段 根据分词结果判定是否匹配
|
|
|
List<String> sortedSearchFields = getSortedSearchFields();
|
|
|
logger.info("Get the search fields. fields: {}", sortedSearchFields);
|
|
|
|
|
|
if ("cross_fields".equalsIgnoreCase(multiSearchType)) {
|
|
|
return explainCrossFieldsQuery(document, keyword, returnType, fieldDescMap, sortedSearchFields);
|
|
|
} else {
|
|
|
return explainBestFieldsQuery(document, keyword, returnType, fieldDescMap, sortedSearchFields);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
private List<SearchMatch> explainBestFieldsQuery(Map<String, Object> document, String keyword, String returnType, Map<String, FieldDesc> fieldDescMap,
|
|
|
List<String> sortedSearchFields) throws Exception {
|
|
|
boolean isQueryMatch = false;
|
|
|
boolean isFieldQueryMatch = false;
|
|
|
List<SearchMatch> queryMatchList = new ArrayList<>();
|
|
|
for (String field : sortedSearchFields) {
|
|
|
String value = document.get(field) != null ? document.get(field).toString() : null;
|
|
|
FieldDesc fieldDesc = fieldDescMap.get(field);
|
|
|
Assert.notNull(fieldDesc, "no field " + field + " defined.");
|
|
|
List<String> analyzerResult = getAnalyzerResult(fieldDesc, value);
|
|
|
List<String> keywordAnalyzerResult = getKeyWordAnalyzerResult(fieldDesc, keyword);
|
|
|
isFieldQueryMatch = judgeMatch(analyzerResult, keywordAnalyzerResult);
|
|
|
isQueryMatch = isFieldQueryMatch || isQueryMatch;
|
|
|
// 如果是匹配模式则只返回匹配的字段,如果是ALL模式则返回索引字段
|
|
|
if (isFieldQueryMatch || RETURN_TYPE_ALL.equals(returnType)) {
|
|
|
queryMatchList.add(new FieldMatch(field, value, fieldDesc.analyzer, analyzerResult, fieldDesc.search_analyzer, keywordAnalyzerResult, isFieldQueryMatch));
|
|
|
}
|
|
|
if (isFieldQueryMatch && RETURN_TYPE_MATCH_ONLY.equals(returnType)) {
|
|
|
// 找到匹配的就返回
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
return queryMatchList;
|
|
|
}
|
|
|
|
|
|
private List<SearchMatch> explainCrossFieldsQuery(Map<String, Object> document, String keyword, String returnType, Map<String, FieldDesc> fieldDescMap,
|
|
|
List<String> sortedSearchFields) throws Exception {
|
|
|
List<SearchMatch> groupMatchList = new ArrayList<>();
|
|
|
List<FieldDesc> searchFieldDescList = new ArrayList<>();
|
|
|
fieldDescMap.forEach((field, fieldDesc) -> {
|
|
|
if (sortedSearchFields.contains(field)) {
|
|
|
if (StringUtils.isEmpty(fieldDesc.analyzer)) {
|
|
|
fieldDesc.analyzer = DEFAULT_ANALYZER;
|
|
|
}
|
|
|
if (StringUtils.isEmpty(fieldDesc.search_analyzer)) {
|
|
|
fieldDesc.search_analyzer = fieldDesc.analyzer;
|
|
|
}
|
|
|
|
|
|
searchFieldDescList.add(fieldDesc);
|
|
|
}
|
|
|
});
|
|
|
|
|
|
// 根据search_analyzer进行分组
|
|
|
Map<String, List<FieldDesc>> groupMap = searchFieldDescList.stream().collect(Collectors.groupingBy(fieldDesc -> fieldDesc.search_analyzer));
|
|
|
for (Map.Entry<String, List<FieldDesc>> groupEntry : groupMap.entrySet()) {
|
|
|
// 对于cross_field来说 必须所有输入的token在同一组内
|
|
|
String searchAnalyzer = groupEntry.getValue().get(0).search_analyzer;
|
|
|
List<String> keywordAnalyzerResult = getAnalyzerTokens(keyword, searchAnalyzer);
|
|
|
List<String> sortedFieldDescNames = groupEntry.getValue().stream().map(fieldDesc -> fieldDesc.field).collect(Collectors.toList());
|
|
|
boolean isMatch = true;
|
|
|
List<FieldMatch> fieldMatchList = new ArrayList<>();
|
|
|
for (String token : keywordAnalyzerResult) {
|
|
|
boolean isTokenMatch = false;
|
|
|
for (String field : sortedSearchFields) {
|
|
|
// 检查field是否匹配token
|
|
|
if (!sortedFieldDescNames.contains(field)) {
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
String value = document.get(field) != null ? document.get(field).toString() : null;
|
|
|
FieldDesc fieldDesc = fieldDescMap.get(field);
|
|
|
Assert.notNull(fieldDesc, "for " + field);
|
|
|
List<String> analyzerResult = getAnalyzerResult(fieldDesc, value);
|
|
|
boolean isTokenMatchInThisField = analyzerResult.contains(token);
|
|
|
isTokenMatch = isTokenMatch || isTokenMatchInThisField;
|
|
|
if (isTokenMatchInThisField || RETURN_TYPE_ALL.equals(returnType)) {
|
|
|
fieldMatchList.add(new FieldMatch(field, value, fieldDesc.analyzer, analyzerResult, fieldDesc.search_analyzer, Arrays.asList(token),
|
|
|
isTokenMatchInThisField));
|
|
|
}
|
|
|
|
|
|
if (isTokenMatchInThisField && RETURN_TYPE_MATCH_ONLY.equals(returnType)) {
|
|
|
// 找到匹配的就返回
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
isMatch = isMatch && isTokenMatch;
|
|
|
}
|
|
|
|
|
|
if (isMatch || RETURN_TYPE_ALL.equals(returnType)) {
|
|
|
groupMatchList.add(new GroupMatch("", searchAnalyzer, keywordAnalyzerResult, fieldMatchList, isMatch));
|
|
|
}
|
|
|
}
|
|
|
|
|
|
return groupMatchList;
|
|
|
}
|
|
|
|
|
|
public Map<String, Object> getTokens(String skn) throws Exception {
|
|
|
long start = System.currentTimeMillis();
|
|
|
logger.info("Begin to get tokens for product skn {}.", skn);
|
|
|
|
|
|
// 1. 根据skn获取es中的数据
|
|
|
Map<String, Object> document = getDocumentBySkn(skn);
|
|
|
logger.info("Get the document for the product. document: \n{}", document);
|
|
|
|
|
|
// 2. 解析mapping文件获取字段元数据
|
|
|
Map<String, FieldDesc> fieldDescMap = parseMapping();
|
|
|
logger.info("Get the field description by mapping. fieldDescMap: \n{}", fieldDescMap);
|
|
|
|
|
|
// 3. 处理多字段和copy_to的字段并赋值
|
|
|
processMultiFields(fieldDescMap, document);
|
|
|
processCopiedFields(fieldDescMap, document);
|
|
|
logger.info("Set the value of copied field succeeded.");
|
|
|
|
|
|
// 4. 分析每个multi-match的每个字段 根据权重优先级展示token列表
|
|
|
List<String> sortedSearchFields = getSortedSearchFields();
|
|
|
logger.info("Get the search fields. fields: {}", sortedSearchFields);
|
|
|
// boolean isMatch = false;
|
|
|
List<String> tokens = new ArrayList<>();
|
|
|
for (String field : sortedSearchFields) {
|
|
|
String value = document.get(field) != null ? document.get(field).toString() : null;
|
|
|
FieldDesc fieldDesc = fieldDescMap.get(field);
|
|
|
Assert.notNull(fieldDesc, "no field " + field + " defined.");
|
|
|
List<String> analyzerResult = getAnalyzerResult(fieldDesc, value);
|
|
|
if (CollectionUtils.isNotEmpty(analyzerResult)) {
|
|
|
analyzerResult.removeAll(tokens);
|
|
|
tokens.addAll(analyzerResult);
|
|
|
}
|
|
|
}
|
|
|
Map<String, Object> map = new HashMap<>();
|
|
|
map.put("code", "200");
|
|
|
map.put("message", "product tokens.");
|
|
|
map.put("data", tokens);
|
|
|
logger.info("End to get tokens for product skn {}. cost: {}", skn, System.currentTimeMillis() - start);
|
|
|
return map;
|
|
|
}
|
|
|
|
|
|
private boolean judgeMatch(List<String> analyzerResult, List<String> keywordAnalyzerResult) {
|
|
|
// 对于best_fields必须包含所有token
|
|
|
return analyzerResult.containsAll(keywordAnalyzerResult);
|
|
|
}
|
|
|
|
|
|
private List<String> getKeyWordAnalyzerResult(FieldDesc fieldDesc, String keyword) throws ExecutionException {
|
|
|
if (fieldDesc.noNeedAnalyzer()) {
|
|
|
return Arrays.asList(keyword);
|
|
|
}
|
|
|
|
|
|
String searchAnalyzer = fieldDesc.search_analyzer;
|
|
|
if (StringUtils.isEmpty(searchAnalyzer)) {
|
|
|
searchAnalyzer = fieldDesc.analyzer;
|
...
|
...
|
@@ -494,11 +204,9 @@ public class SearchExplainerService { |
|
|
if (StringUtils.isEmpty(value)) {
|
|
|
return new ArrayList<>();
|
|
|
}
|
|
|
|
|
|
if (fieldDesc.noNeedAnalyzer()) {
|
|
|
return Arrays.asList(value);
|
|
|
}
|
|
|
|
|
|
String analyzer = fieldDesc.analyzer;
|
|
|
if (StringUtils.isEmpty(analyzer)) {
|
|
|
analyzer = DEFAULT_ANALYZER;
|
...
|
...
|
|