Authored by hugufei

工具解释类优化

package com.yoho.search.restapi.tools;
import java.util.HashMap;
import java.util.Map;
import javax.servlet.http.HttpServletRequest;
import com.yoho.search.base.utils.HttpServletRequestUtils;
import com.yoho.search.service.scene.others.explain.SearchExplainerService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.bind.annotation.RestController;
import com.yoho.search.base.utils.HttpServletRequestUtils;
import com.yoho.search.service.scene.others.explain.SearchExplainerService;
import javax.servlet.http.HttpServletRequest;
import java.util.HashMap;
import java.util.Map;
/**
* Created by ginozhang on 2016/11/16.
*/
@Controller
@RestController
public class SearchExplainerController {
private static final Logger logger = LoggerFactory.getLogger(SearchExplainerController.class);
... ... @@ -27,20 +20,8 @@ public class SearchExplainerController {
@Autowired
private SearchExplainerService searchExplainerService;
@RequestMapping(value = "/tools/explain")
@ResponseBody
public Map<String, Object> explain(HttpServletRequest request) {
Map<String, String> paramMap = HttpServletRequestUtils.transParamType(request);
try {
return searchExplainerService.explain(paramMap);
} catch (Throwable t) {
logger.error(t.getMessage(), t);
return errorResult(t.getMessage());
}
}
//显示SKN分词结果
@RequestMapping(value = "/tools/show")
@ResponseBody
public Map<String, Object> show(HttpServletRequest request) {
Map<String, String> paramMap = HttpServletRequestUtils.transParamType(request);
try {
... ... @@ -51,26 +32,12 @@ public class SearchExplainerController {
}
}
@RequestMapping(value = "/tools/clearExplainCachedData")
@ResponseBody
public Map<String, Object> clearExplainCachedData() {
try {
searchExplainerService.clearCachedData();
Map<String, Object> map = new HashMap<>();
map.put("code", "200");
map.put("message", "succeed");
return map;
} catch (Throwable t) {
logger.error(t.getMessage(), t);
return errorResult(t.getMessage());
}
}
@RequestMapping(value = "/tools/tokens")
@ResponseBody
public Map<String, Object> getTokens(@RequestParam String skn) {
//显示SKN和商品的匹配结果
@RequestMapping(value = "/tools/multiFieldAnalyzeList")
public Map<String, Object> multiFieldAnalyzeList(HttpServletRequest request) {
Map<String, String> paramMap = HttpServletRequestUtils.transParamType(request);
try {
return searchExplainerService.getTokens(skn);
return searchExplainerService.multiFieldAnalyzeList(paramMap);
} catch (Throwable t) {
logger.error(t.getMessage(), t);
return errorResult(t.getMessage());
... ... @@ -79,21 +46,9 @@ public class SearchExplainerController {
private Map<String, Object> errorResult(String message) {
Map<String, Object> map = new HashMap<>();
map.put("code", "400");
map.put("code", "500");
map.put("message", message);
return map;
}
@RequestMapping(value = "/tools/multiFieldAnalyzeList")
@ResponseBody
public Map<String, Object> multiFieldAnalyzeList(HttpServletRequest request) {
Map<String, String> paramMap = HttpServletRequestUtils.transParamType(request);
try {
return searchExplainerService.multiFieldAnalyzeList(paramMap);
} catch (Throwable t) {
logger.error(t.getMessage(), t);
return errorResult(t.getMessage());
}
}
}
... ...
... ... @@ -6,12 +6,10 @@ import com.google.common.cache.CacheLoader;
import com.yoho.search.base.utils.ISearchConstants;
import com.yoho.search.base.utils.ProductIndexEsField;
import com.yoho.search.common.ESClientMgr;
import com.yoho.search.common.SearchServiceConfiger;
import com.yoho.search.core.es.IElasticsearchClient;
import com.yoho.search.core.es.model.SearchField;
import com.yoho.search.core.es.model.SearchParam;
import com.yoho.search.core.es.model.SearchResult;
import com.yoho.search.service.helper.SearchQueryHelper;
import com.yoho.search.service.index.SearchFieldBoostConfigService;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
... ... @@ -39,30 +37,19 @@ public class SearchExplainerService {
private static final Logger logger = LoggerFactory.getLogger(SearchExplainerService.class);
@Autowired
private ESClientMgr esClientMgr;
@Autowired
private SearchServiceConfiger searchServiceConfiger;
@Autowired
private SearchFieldBoostConfigService searchFieldBoostConfigService;
private static final String DEFAULT_ANALYZER = "keyword";
private static final String MATCH_TYPE_STRING = "String Match";
private static final String MATCH_TYPE_RANGE = "Range Match";
private static final String RETURN_TYPE_ALL = "all";
private static final String RETURN_TYPE_MATCH_ONLY = "match_only";
private volatile Map<String, FieldDesc> localFieldDescMap = null;
private volatile Map<String, List<String>> localCopiedFieldMap = null;
// private volatile List<String> localMutilFields = null;
//
// private volatile List<FieldWithBoost> localMutilFieldWithBoostList = null;
// 入参和索引中字段名的映射
@SuppressWarnings("serial")
... ... @@ -108,21 +95,6 @@ public class SearchExplainerService {
}
};
public String fieldConvert(String paraField) {
EsField esField = fieldMap.get(paraField);
if (esField == null) {
return null;
}
return esField.getFieldName();
}
public void clearCachedData() {
// this.localMutilFields = null;
// this.localMutilFieldWithBoostList = null;
this.localCopiedFieldMap = null;
this.localFieldDescMap = null;
}
public Map<String, Object> show(Map<String, String> paramMap) throws Exception {
Map<String, Object> map = new LinkedHashMap<>();
String skn = paramMap.get("skn");
... ... @@ -214,272 +186,10 @@ public class SearchExplainerService {
return searchFieldResult;
}
public Map<String, Object> explain(Map<String, String> paramMap) throws Exception {
long start = System.currentTimeMillis();
logger.info("Begin to explain search. start: {}, paramMap: {}.", start, paramMap);
String skn = paramMap.get("skn");
if (StringUtils.isEmpty(skn)) {
Map<String, Object> map = new LinkedHashMap<>();
map.put("code", "400");
map.put("message", "skn is null");
return map;
}
String returnType = paramMap.get("returnType");
if (returnType == null) {
returnType = RETURN_TYPE_MATCH_ONLY;
}
String multiSearchType = paramMap.get("multiSearchType");
if (multiSearchType == null) {
multiSearchType = searchServiceConfiger.getSearchMultiMatchQueryType();
}
String keyword = paramMap.get("query");
logger.info("Begin to explain search keyword [{}] for product skn {}.", keyword, skn);
// 1. 根据skn获取es中的数据
Map<String, Object> document = getDocumentBySkn(skn);
logger.info("Get the document for the product. document: \n{}", document);
// 2.分析过滤的参数
Map<String, Object> explainResult = explainFilter(paramMap, document);
if ((Boolean) explainResult.get("filter_match") && StringUtils.isNotEmpty(keyword)) {
List<SearchMatch> queryMatchList = explainMultiMatchQuery(document, keyword, returnType, multiSearchType);
explainResult.put("query_match", queryMatchList.stream().anyMatch(SearchMatch::isMatch));
explainResult.put("query_list", queryMatchList);
}
logger.info("End to explain search keyword [{}] for product skn {}. cost: {}", keyword, skn, System.currentTimeMillis() - start);
return explainResult;
}
private Map<String, Object> explainFilter(Map<String, String> paramMap, Map<String, Object> document) {
Map<String, Object> map = new LinkedHashMap<>();
List<String> warns = new ArrayList<>();
boolean isFilterMatch = true;
List<FilterMatch> filterMatchList = new ArrayList<>();
for (Map.Entry<String, String> entry : paramMap.entrySet()) {
String key = entry.getKey();
if (key.equals("query") || key.equals("skn") || key.equals("returnType")) {
continue;
}
if (!fieldMap.containsKey(key)) {
warns.add("parameter " + key + " is invalid\n");
continue;
}
Object value = document.get(fieldConvert(key));
if (value != null) {
String paraValue = value.toString();
EsField esField = fieldMap.get(key);
// 将入参和字段值都转化为字符数组
String[] paraArray = entry.getValue().split(",");
String[] indexArray = paraValue.split(",");
Set<String> indexSet = new HashSet<String>(indexArray.length);
for (String i : indexArray) {
indexSet.add(i);
}
// 字符匹配比较
if (MATCH_TYPE_STRING.equals(esField.getFieldType())) {
for (String p : paraArray) {
if (!indexSet.contains(p)) {
filterMatchList.add(new FilterMatch(key, entry.getValue(), paraValue, esField.getFieldType(), false));
isFilterMatch = false;
}
}
}
// 取值范围比较
if (MATCH_TYPE_RANGE.equals(esField.getFieldType())) {
if (key.equals("stocknumber")) {
if (Integer.valueOf(value.toString()) < 1) {
filterMatchList.add(new FilterMatch(key, entry.getValue(), paraValue, esField.getFieldType(), false));
isFilterMatch = false;
}
} else if (paraArray.length == 2 && indexArray.length == 1) {
Integer paraRangeStart = Integer.valueOf(paraArray[0]);
Integer paraRangeEnd = Integer.valueOf(paraArray[1]);
Integer indexValue = Integer.valueOf(indexArray[0]);
if (indexValue < paraRangeStart || indexValue > paraRangeEnd) {
filterMatchList.add(new FilterMatch(key, entry.getValue(), paraValue, esField.getFieldType(), false));
isFilterMatch = false;
}
}
}
}
}
map.put("code", "200");
map.put("message", "search explainer");
if (CollectionUtils.isNotEmpty(warns)) {
map.put("warn", warns);
}
map.put("filter_match", isFilterMatch);
map.put("filter_list", filterMatchList);
return map;
}
private List<SearchMatch> explainMultiMatchQuery(Map<String, Object> document, String keyword, String returnType, String multiSearchType) throws Exception {
// 3. 解析mapping文件获取字段元数据
Map<String, FieldDesc> fieldDescMap = parseMapping();
logger.info("Get the field description by mapping. fieldDescMap: \n{}", fieldDescMap);
// 4. 处理多字段和copy_to的字段并赋值
processMultiFields(fieldDescMap, document);
processCopiedFields(fieldDescMap, document);
logger.info("Set the value of copied field succeeded.");
// 5. 分析每个multi-match的每个字段 根据分词结果判定是否匹配
List<String> sortedSearchFields = getSortedSearchFields();
logger.info("Get the search fields. fields: {}", sortedSearchFields);
if ("cross_fields".equalsIgnoreCase(multiSearchType)) {
return explainCrossFieldsQuery(document, keyword, returnType, fieldDescMap, sortedSearchFields);
} else {
return explainBestFieldsQuery(document, keyword, returnType, fieldDescMap, sortedSearchFields);
}
}
private List<SearchMatch> explainBestFieldsQuery(Map<String, Object> document, String keyword, String returnType, Map<String, FieldDesc> fieldDescMap,
List<String> sortedSearchFields) throws Exception {
boolean isQueryMatch = false;
boolean isFieldQueryMatch = false;
List<SearchMatch> queryMatchList = new ArrayList<>();
for (String field : sortedSearchFields) {
String value = document.get(field) != null ? document.get(field).toString() : null;
FieldDesc fieldDesc = fieldDescMap.get(field);
Assert.notNull(fieldDesc, "no field " + field + " defined.");
List<String> analyzerResult = getAnalyzerResult(fieldDesc, value);
List<String> keywordAnalyzerResult = getKeyWordAnalyzerResult(fieldDesc, keyword);
isFieldQueryMatch = judgeMatch(analyzerResult, keywordAnalyzerResult);
isQueryMatch = isFieldQueryMatch || isQueryMatch;
// 如果是匹配模式则只返回匹配的字段,如果是ALL模式则返回索引字段
if (isFieldQueryMatch || RETURN_TYPE_ALL.equals(returnType)) {
queryMatchList.add(new FieldMatch(field, value, fieldDesc.analyzer, analyzerResult, fieldDesc.search_analyzer, keywordAnalyzerResult, isFieldQueryMatch));
}
if (isFieldQueryMatch && RETURN_TYPE_MATCH_ONLY.equals(returnType)) {
// 找到匹配的就返回
break;
}
}
return queryMatchList;
}
private List<SearchMatch> explainCrossFieldsQuery(Map<String, Object> document, String keyword, String returnType, Map<String, FieldDesc> fieldDescMap,
List<String> sortedSearchFields) throws Exception {
List<SearchMatch> groupMatchList = new ArrayList<>();
List<FieldDesc> searchFieldDescList = new ArrayList<>();
fieldDescMap.forEach((field, fieldDesc) -> {
if (sortedSearchFields.contains(field)) {
if (StringUtils.isEmpty(fieldDesc.analyzer)) {
fieldDesc.analyzer = DEFAULT_ANALYZER;
}
if (StringUtils.isEmpty(fieldDesc.search_analyzer)) {
fieldDesc.search_analyzer = fieldDesc.analyzer;
}
searchFieldDescList.add(fieldDesc);
}
});
// 根据search_analyzer进行分组
Map<String, List<FieldDesc>> groupMap = searchFieldDescList.stream().collect(Collectors.groupingBy(fieldDesc -> fieldDesc.search_analyzer));
for (Map.Entry<String, List<FieldDesc>> groupEntry : groupMap.entrySet()) {
// 对于cross_field来说 必须所有输入的token在同一组内
String searchAnalyzer = groupEntry.getValue().get(0).search_analyzer;
List<String> keywordAnalyzerResult = getAnalyzerTokens(keyword, searchAnalyzer);
List<String> sortedFieldDescNames = groupEntry.getValue().stream().map(fieldDesc -> fieldDesc.field).collect(Collectors.toList());
boolean isMatch = true;
List<FieldMatch> fieldMatchList = new ArrayList<>();
for (String token : keywordAnalyzerResult) {
boolean isTokenMatch = false;
for (String field : sortedSearchFields) {
// 检查field是否匹配token
if (!sortedFieldDescNames.contains(field)) {
continue;
}
String value = document.get(field) != null ? document.get(field).toString() : null;
FieldDesc fieldDesc = fieldDescMap.get(field);
Assert.notNull(fieldDesc, "for " + field);
List<String> analyzerResult = getAnalyzerResult(fieldDesc, value);
boolean isTokenMatchInThisField = analyzerResult.contains(token);
isTokenMatch = isTokenMatch || isTokenMatchInThisField;
if (isTokenMatchInThisField || RETURN_TYPE_ALL.equals(returnType)) {
fieldMatchList.add(new FieldMatch(field, value, fieldDesc.analyzer, analyzerResult, fieldDesc.search_analyzer, Arrays.asList(token),
isTokenMatchInThisField));
}
if (isTokenMatchInThisField && RETURN_TYPE_MATCH_ONLY.equals(returnType)) {
// 找到匹配的就返回
break;
}
}
isMatch = isMatch && isTokenMatch;
}
if (isMatch || RETURN_TYPE_ALL.equals(returnType)) {
groupMatchList.add(new GroupMatch("", searchAnalyzer, keywordAnalyzerResult, fieldMatchList, isMatch));
}
}
return groupMatchList;
}
public Map<String, Object> getTokens(String skn) throws Exception {
long start = System.currentTimeMillis();
logger.info("Begin to get tokens for product skn {}.", skn);
// 1. 根据skn获取es中的数据
Map<String, Object> document = getDocumentBySkn(skn);
logger.info("Get the document for the product. document: \n{}", document);
// 2. 解析mapping文件获取字段元数据
Map<String, FieldDesc> fieldDescMap = parseMapping();
logger.info("Get the field description by mapping. fieldDescMap: \n{}", fieldDescMap);
// 3. 处理多字段和copy_to的字段并赋值
processMultiFields(fieldDescMap, document);
processCopiedFields(fieldDescMap, document);
logger.info("Set the value of copied field succeeded.");
// 4. 分析每个multi-match的每个字段 根据权重优先级展示token列表
List<String> sortedSearchFields = getSortedSearchFields();
logger.info("Get the search fields. fields: {}", sortedSearchFields);
// boolean isMatch = false;
List<String> tokens = new ArrayList<>();
for (String field : sortedSearchFields) {
String value = document.get(field) != null ? document.get(field).toString() : null;
FieldDesc fieldDesc = fieldDescMap.get(field);
Assert.notNull(fieldDesc, "no field " + field + " defined.");
List<String> analyzerResult = getAnalyzerResult(fieldDesc, value);
if (CollectionUtils.isNotEmpty(analyzerResult)) {
analyzerResult.removeAll(tokens);
tokens.addAll(analyzerResult);
}
}
Map<String, Object> map = new HashMap<>();
map.put("code", "200");
map.put("message", "product tokens.");
map.put("data", tokens);
logger.info("End to get tokens for product skn {}. cost: {}", skn, System.currentTimeMillis() - start);
return map;
}
private boolean judgeMatch(List<String> analyzerResult, List<String> keywordAnalyzerResult) {
// 对于best_fields必须包含所有token
return analyzerResult.containsAll(keywordAnalyzerResult);
}
private List<String> getKeyWordAnalyzerResult(FieldDesc fieldDesc, String keyword) throws ExecutionException {
if (fieldDesc.noNeedAnalyzer()) {
return Arrays.asList(keyword);
}
String searchAnalyzer = fieldDesc.search_analyzer;
if (StringUtils.isEmpty(searchAnalyzer)) {
searchAnalyzer = fieldDesc.analyzer;
... ... @@ -494,11 +204,9 @@ public class SearchExplainerService {
if (StringUtils.isEmpty(value)) {
return new ArrayList<>();
}
if (fieldDesc.noNeedAnalyzer()) {
return Arrays.asList(value);
}
String analyzer = fieldDesc.analyzer;
if (StringUtils.isEmpty(analyzer)) {
analyzer = DEFAULT_ANALYZER;
... ...