|
|
package com.yoho.search.service.personalized;
|
|
|
|
|
|
import com.yoho.search.base.utils.HttpServletRequestUtils;
|
|
|
import com.yoho.search.service.service.SearchDynamicConfigService;
|
|
|
import java.util.HashMap;
|
|
|
import java.util.Map;
|
|
|
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
|
...
|
...
|
@@ -13,141 +13,163 @@ import org.slf4j.LoggerFactory; |
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
|
import org.springframework.stereotype.Service;
|
|
|
|
|
|
import java.util.HashMap;
|
|
|
import java.util.Map;
|
|
|
|
|
|
import com.yoho.search.base.utils.HttpServletRequestUtils;
|
|
|
import com.yoho.search.service.service.SearchDynamicConfigService;
|
|
|
|
|
|
@Service
|
|
|
public class PersonalVectorFeatureSearch {
|
|
|
|
|
|
private static final Logger PERSONALIZED = LoggerFactory.getLogger("PERSONALIZED");
|
|
|
|
|
|
private static final Double BASE_CONSTANT = 1.0D;
|
|
|
|
|
|
private static final Double FACTOR_CONSTANT = 1.0D;
|
|
|
|
|
|
@Autowired
|
|
|
private SearchDynamicConfigService searchDynamicConfigService;
|
|
|
|
|
|
@Autowired
|
|
|
private PersonalizedRedisService personalizedRedisService;
|
|
|
|
|
|
public void addPersonalizedScriptScore(FunctionScoreQueryBuilder functionScoreQueryBuilder, Map<String,String> paramMap) {
|
|
|
// 1. 获取特征向量版本(即生成时间,该时间需要与skn的生成时间一致才有意义)
|
|
|
String vectorFeatureVersion = searchDynamicConfigService.personalizedSearchVersion();
|
|
|
if (StringUtils.isEmpty(vectorFeatureVersion) || "-1".equals(vectorFeatureVersion)) {
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
String uid = paramMap.get("uid");
|
|
|
|
|
|
// 2. 获取用户的特征向量
|
|
|
String userVectorFeature = personalizedRedisService.getUserVectorFeature(uid, vectorFeatureVersion);
|
|
|
if (StringUtils.isEmpty(userVectorFeature)) {
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
PERSONALIZED.info("do personal search , paramString is [{}]", HttpServletRequestUtils.genParamString(paramMap));
|
|
|
|
|
|
// 3. 传入参数调用脚本
|
|
|
// field -> productindex索引种保存skn特征向量的字段名
|
|
|
// userFeatureFactors -> 用户特征向量值,多个值之间用逗号分隔
|
|
|
// vectorFeatureVersion -> 用户特征向量版本,当与skn的版本一致才计算相关性
|
|
|
// baseConstant,factorConstant -> 相关性常量和系数,计算规则为 baseConstant + factorConstant * cos(Vuser, Vskn)
|
|
|
Map<String, Object> scriptParams = new HashMap<>();
|
|
|
scriptParams.put("field", "productFeatureFactor");
|
|
|
scriptParams.put("userFeatureFactors", userVectorFeature);
|
|
|
scriptParams.put("vectorFeatureVersion", vectorFeatureVersion);
|
|
|
scriptParams.put("baseConstant", BASE_CONSTANT);
|
|
|
scriptParams.put("factorConstant", FACTOR_CONSTANT);
|
|
|
Script script = new Script("feature_factor_vector_score", ScriptService.ScriptType.INLINE, "native", scriptParams);
|
|
|
functionScoreQueryBuilder.add(ScoreFunctionBuilders.scriptFunction(script));
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* 根据UID和SKN列表获取特征相关性评分,该方法用于测试和问题定位
|
|
|
*
|
|
|
* @param uid 用户标识
|
|
|
* @param productVectorFeatureMap 商品特征map
|
|
|
* @return 相关性计算结果
|
|
|
*/
|
|
|
public Map<String, Object> calVectorFeature(String uid, String version, Map<String, String> productVectorFeatureMap) {
|
|
|
Map<String, Object> scoreMap = new HashMap<>();
|
|
|
if (productVectorFeatureMap == null || productVectorFeatureMap.isEmpty()) {
|
|
|
scoreMap.put("productVectorFeatureMap", "empty");
|
|
|
return scoreMap;
|
|
|
}
|
|
|
|
|
|
|
|
|
String vectorFeatureVersion = StringUtils.isNotEmpty(version) ? version : searchDynamicConfigService.personalizedSearchVersion();
|
|
|
scoreMap.put("vectorFeatureVersion", vectorFeatureVersion);
|
|
|
if (StringUtils.isEmpty(vectorFeatureVersion) || "-1".equals(vectorFeatureVersion)) {
|
|
|
return scoreMap;
|
|
|
}
|
|
|
|
|
|
// 2. 获取用户的特征向量
|
|
|
String userVectorFeature = personalizedRedisService.getUserVectorFeature(uid, vectorFeatureVersion);
|
|
|
scoreMap.put("userVectorFeature", userVectorFeature);
|
|
|
if (StringUtils.isEmpty(userVectorFeature)) {
|
|
|
return scoreMap;
|
|
|
}
|
|
|
|
|
|
String[] userFeatureFactorArr = userVectorFeature.split(",");
|
|
|
double tempUserFeatureVectorNorm = 0.0D;
|
|
|
int dimensionOfFactors = userFeatureFactorArr.length;
|
|
|
double[] userFeatureFactors = new double[dimensionOfFactors];
|
|
|
double temp;
|
|
|
for (int index = 0; index < dimensionOfFactors; index++) {
|
|
|
temp = Double.parseDouble(userFeatureFactorArr[index].trim());
|
|
|
userFeatureFactors[index] = temp;
|
|
|
tempUserFeatureVectorNorm += temp * temp;
|
|
|
}
|
|
|
|
|
|
final double userFeatureVectorNorm = tempUserFeatureVectorNorm;
|
|
|
scoreMap.put("userFeatureVectorNorm", userFeatureVectorNorm);
|
|
|
scoreMap.put("dimensionOfFactors", dimensionOfFactors);
|
|
|
|
|
|
// 3. 计算相关性得分
|
|
|
productVectorFeatureMap.forEach((skn, vector) -> {
|
|
|
Map<String, Object> content = new HashMap<String, Object>();
|
|
|
content.put("vector", vector);
|
|
|
content.put("score", calculateScore(vector, vectorFeatureVersion, userFeatureFactors, userFeatureVectorNorm));
|
|
|
scoreMap.put(skn, content);
|
|
|
});
|
|
|
|
|
|
return scoreMap;
|
|
|
}
|
|
|
|
|
|
public double calculateScore(String productFeatureFactor, String vectorFeatureVersion, double[] userFeatureFactors, double userFeatureVectorNorm) {
|
|
|
if (productFeatureFactor == null || productFeatureFactor.trim().isEmpty()) {
|
|
|
return BASE_CONSTANT;
|
|
|
}
|
|
|
|
|
|
String versionPrefix = vectorFeatureVersion + "|";
|
|
|
if (!productFeatureFactor.trim().startsWith(versionPrefix)) {
|
|
|
return BASE_CONSTANT;
|
|
|
}
|
|
|
|
|
|
String[] productFeatureFactorArr = productFeatureFactor.trim().substring(versionPrefix.length()).split(",");
|
|
|
if (productFeatureFactorArr == null || productFeatureFactorArr.length != userFeatureFactors.length) {
|
|
|
return BASE_CONSTANT;
|
|
|
}
|
|
|
|
|
|
double prodFeatureVectorNorm = 0.0D;
|
|
|
double productiveSum = 0.0D;
|
|
|
double tempProdFactor;
|
|
|
for (int i = 0; i < userFeatureFactors.length; i++) {
|
|
|
tempProdFactor = Double.parseDouble(productFeatureFactorArr[i].trim());
|
|
|
productiveSum += tempProdFactor * userFeatureFactors[i];
|
|
|
prodFeatureVectorNorm += tempProdFactor * tempProdFactor;
|
|
|
}
|
|
|
|
|
|
if (prodFeatureVectorNorm == 0) {
|
|
|
return BASE_CONSTANT;
|
|
|
}
|
|
|
|
|
|
double cosScore = productiveSum / (Math.sqrt(prodFeatureVectorNorm) * Math.sqrt(userFeatureVectorNorm));
|
|
|
double finalScore = BASE_CONSTANT + FACTOR_CONSTANT * cosScore;
|
|
|
return finalScore;
|
|
|
}
|
|
|
|
|
|
private static final Logger PERSONALIZED = LoggerFactory.getLogger("PERSONALIZED");
|
|
|
|
|
|
private static final Double BASE_CONSTANT = 1.0D;
|
|
|
|
|
|
private static final Double FACTOR_CONSTANT = 1.0D;
|
|
|
|
|
|
@Autowired
|
|
|
private SearchDynamicConfigService searchDynamicConfigService;
|
|
|
|
|
|
@Autowired
|
|
|
private PersonalizedRedisService personalizedRedisService;
|
|
|
|
|
|
public void addPersonalizedScriptScore(FunctionScoreQueryBuilder functionScoreQueryBuilder, Map<String, String> paramMap) {
|
|
|
// 1. 获取特征向量版本(即生成时间,该时间需要与skn的生成时间一致才有意义)
|
|
|
String vectorFeatureVersion = searchDynamicConfigService.personalizedSearchVersion();
|
|
|
if (StringUtils.isEmpty(vectorFeatureVersion) || "-1".equals(vectorFeatureVersion)) {
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
String uid = paramMap.get("uid");
|
|
|
|
|
|
// 2. 获取用户的特征向量
|
|
|
String userVectorFeature = personalizedRedisService.getUserVectorFeature(uid, vectorFeatureVersion);
|
|
|
if (StringUtils.isEmpty(userVectorFeature)) {
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
PERSONALIZED.info("do personal search , paramString is [{}]", HttpServletRequestUtils.genParamString(paramMap));
|
|
|
|
|
|
// 3. 传入参数调用脚本
|
|
|
// field -> productindex索引种保存skn特征向量的字段名
|
|
|
// userFeatureFactors -> 用户特征向量值,多个值之间用逗号分隔
|
|
|
// vectorFeatureVersion -> 用户特征向量版本,当与skn的版本一致才计算相关性
|
|
|
// baseConstant,factorConstant -> 相关性常量和系数,计算规则为 baseConstant +
|
|
|
// factorConstant * cos(Vuser, Vskn)
|
|
|
Map<String, Object> scriptParams = new HashMap<>();
|
|
|
scriptParams.put("field", "productFeatureFactor");
|
|
|
scriptParams.put("userFeatureFactors", userVectorFeature);
|
|
|
scriptParams.put("vectorFeatureVersion", vectorFeatureVersion);
|
|
|
scriptParams.put("baseConstant", BASE_CONSTANT);
|
|
|
scriptParams.put("factorConstant", FACTOR_CONSTANT);
|
|
|
Script script = new Script("feature_factor_vector_score", ScriptService.ScriptType.INLINE, "native", scriptParams);
|
|
|
functionScoreQueryBuilder.add(ScoreFunctionBuilders.scriptFunction(script));
|
|
|
}
|
|
|
|
|
|
public void addPersonalizedScriptScoreUserProductFeature(FunctionScoreQueryBuilder functionScoreQueryBuilder, String productVectorFeature) {
|
|
|
// 1. 获取特征向量版本(即生成时间,该时间需要与skn的生成时间一致才有意义)
|
|
|
String vectorFeatureVersion = searchDynamicConfigService.personalizedSearchVersion();
|
|
|
if (StringUtils.isEmpty(vectorFeatureVersion) || "-1".equals(vectorFeatureVersion)) {
|
|
|
return;
|
|
|
}
|
|
|
// 2. 获取商品特征向量
|
|
|
if(StringUtils.isBlank(productVectorFeature)){
|
|
|
return;
|
|
|
}
|
|
|
// 3. 传入参数调用脚本,以商品特征代替用户特征
|
|
|
Map<String, Object> scriptParams = new HashMap<>();
|
|
|
scriptParams.put("field", "productFeatureFactor");
|
|
|
scriptParams.put("userFeatureFactors", productVectorFeature);
|
|
|
scriptParams.put("vectorFeatureVersion", vectorFeatureVersion);
|
|
|
scriptParams.put("baseConstant", BASE_CONSTANT);
|
|
|
scriptParams.put("factorConstant", FACTOR_CONSTANT);
|
|
|
Script script = new Script("feature_factor_vector_score", ScriptService.ScriptType.INLINE, "native", scriptParams);
|
|
|
functionScoreQueryBuilder.add(ScoreFunctionBuilders.scriptFunction(script));
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* 根据UID和SKN列表获取特征相关性评分,该方法用于测试和问题定位
|
|
|
*
|
|
|
* @param uid
|
|
|
* 用户标识
|
|
|
* @param productVectorFeatureMap
|
|
|
* 商品特征map
|
|
|
* @return 相关性计算结果
|
|
|
*/
|
|
|
public Map<String, Object> calVectorFeature(String uid, String version, Map<String, String> productVectorFeatureMap) {
|
|
|
Map<String, Object> scoreMap = new HashMap<>();
|
|
|
if (productVectorFeatureMap == null || productVectorFeatureMap.isEmpty()) {
|
|
|
scoreMap.put("productVectorFeatureMap", "empty");
|
|
|
return scoreMap;
|
|
|
}
|
|
|
|
|
|
String vectorFeatureVersion = StringUtils.isNotEmpty(version) ? version : searchDynamicConfigService.personalizedSearchVersion();
|
|
|
scoreMap.put("vectorFeatureVersion", vectorFeatureVersion);
|
|
|
if (StringUtils.isEmpty(vectorFeatureVersion) || "-1".equals(vectorFeatureVersion)) {
|
|
|
return scoreMap;
|
|
|
}
|
|
|
|
|
|
// 2. 获取用户的特征向量
|
|
|
String userVectorFeature = personalizedRedisService.getUserVectorFeature(uid, vectorFeatureVersion);
|
|
|
scoreMap.put("userVectorFeature", userVectorFeature);
|
|
|
if (StringUtils.isEmpty(userVectorFeature)) {
|
|
|
return scoreMap;
|
|
|
}
|
|
|
|
|
|
String[] userFeatureFactorArr = userVectorFeature.split(",");
|
|
|
double tempUserFeatureVectorNorm = 0.0D;
|
|
|
int dimensionOfFactors = userFeatureFactorArr.length;
|
|
|
double[] userFeatureFactors = new double[dimensionOfFactors];
|
|
|
double temp;
|
|
|
for (int index = 0; index < dimensionOfFactors; index++) {
|
|
|
temp = Double.parseDouble(userFeatureFactorArr[index].trim());
|
|
|
userFeatureFactors[index] = temp;
|
|
|
tempUserFeatureVectorNorm += temp * temp;
|
|
|
}
|
|
|
|
|
|
final double userFeatureVectorNorm = tempUserFeatureVectorNorm;
|
|
|
scoreMap.put("userFeatureVectorNorm", userFeatureVectorNorm);
|
|
|
scoreMap.put("dimensionOfFactors", dimensionOfFactors);
|
|
|
|
|
|
// 3. 计算相关性得分
|
|
|
productVectorFeatureMap.forEach((skn, vector) -> {
|
|
|
Map<String, Object> content = new HashMap<String, Object>();
|
|
|
content.put("vector", vector);
|
|
|
content.put("score", calculateScore(vector, vectorFeatureVersion, userFeatureFactors, userFeatureVectorNorm));
|
|
|
scoreMap.put(skn, content);
|
|
|
});
|
|
|
|
|
|
return scoreMap;
|
|
|
}
|
|
|
|
|
|
public double calculateScore(String productFeatureFactor, String vectorFeatureVersion, double[] userFeatureFactors, double userFeatureVectorNorm) {
|
|
|
if (productFeatureFactor == null || productFeatureFactor.trim().isEmpty()) {
|
|
|
return BASE_CONSTANT;
|
|
|
}
|
|
|
|
|
|
String versionPrefix = vectorFeatureVersion + "|";
|
|
|
if (!productFeatureFactor.trim().startsWith(versionPrefix)) {
|
|
|
return BASE_CONSTANT;
|
|
|
}
|
|
|
|
|
|
String[] productFeatureFactorArr = productFeatureFactor.trim().substring(versionPrefix.length()).split(",");
|
|
|
if (productFeatureFactorArr == null || productFeatureFactorArr.length != userFeatureFactors.length) {
|
|
|
return BASE_CONSTANT;
|
|
|
}
|
|
|
|
|
|
double prodFeatureVectorNorm = 0.0D;
|
|
|
double productiveSum = 0.0D;
|
|
|
double tempProdFactor;
|
|
|
for (int i = 0; i < userFeatureFactors.length; i++) {
|
|
|
tempProdFactor = Double.parseDouble(productFeatureFactorArr[i].trim());
|
|
|
productiveSum += tempProdFactor * userFeatureFactors[i];
|
|
|
prodFeatureVectorNorm += tempProdFactor * tempProdFactor;
|
|
|
}
|
|
|
|
|
|
if (prodFeatureVectorNorm == 0) {
|
|
|
return BASE_CONSTANT;
|
|
|
}
|
|
|
|
|
|
double cosScore = productiveSum / (Math.sqrt(prodFeatureVectorNorm) * Math.sqrt(userFeatureVectorNorm));
|
|
|
double finalScore = BASE_CONSTANT + FACTOR_CONSTANT * cosScore;
|
|
|
return finalScore;
|
|
|
}
|
|
|
} |
...
|
...
|
|