Authored by WN\wangnan

fix

Showing 20 changed files with 91 additions and 563 deletions
package com.yoho.search.consumer.dynwords;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.util.Assert;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.ResponseBody;
import javax.servlet.http.HttpServletRequest;
import java.util.*;
@Controller
public class DictToolsController {
private static Logger logger = LoggerFactory.getLogger(DictToolsController.class);
@Autowired
private EsWordDefLogicService esWordDefLogicService;
private static Map<String, Integer> allDictFiles = new LinkedHashMap<String, Integer>();
static {
// 词的类型 1为主词典的词 2为停用词 3为姓氏 4为量词 5为后缀词 6为介词
allDictFiles.put("main.dic", Integer.valueOf(1)); // IK主词典
allDictFiles.put("mydict.dic", Integer.valueOf(1)); // IK扩展词典
allDictFiles.put("single_word_full.dic", Integer.valueOf(1)); // IK单字主词典
allDictFiles.put("words.dic", Integer.valueOf(1)); // MMSEG主词典
allDictFiles.put("words-yoho.dic", Integer.valueOf(1)); // MMSEG扩展词典
// allDictFiles.put("single_word_low_freq.dic", Integer.valueOf(1));
// //IK低频单字主词典 被包括在single_word_full.dic中
allDictFiles.put("stopword.dic", Integer.valueOf(2)); // IK停用词词典
allDictFiles.put("ext_stopword.dic", Integer.valueOf(2)); // IK扩展停用词词典
//allDictFiles.put("surname.dic", Integer.valueOf(3)); // IK姓氏词典 暂不支持
allDictFiles.put("quantifier.dic", Integer.valueOf(4)); // IK量词词典
//allDictFiles.put("suffix.dic", Integer.valueOf(5)); // IK后缀词词典 暂不支持
//allDictFiles.put("preposition.dic", Integer.valueOf(6)); // IK介词词典 暂不支持
allDictFiles.put("synonyms.dic", Integer.valueOf(7)); // 同义词规则
}
/**
* 批量插入字典文件中的词到数据库表
*
* @return 处理结果
*/
@RequestMapping(value = "/dictTools/loadWordFile")
@ResponseBody
public Map<String, Object> loadWordFile(HttpServletRequest request) {
Map<String, Object> rtnMap = new HashMap<String, Object>();
long begin = System.currentTimeMillis();
logger.info("[DynSegWords]DynSegWordsToolsController.loadWordFile start");
String wordFile = "main.dic";
int wordType = 1;
if (request.getParameter("wordFile") != null && request.getParameter("wordFile").trim().length() > 0) {
wordFile = request.getParameter("wordFile").trim();
}
if (request.getParameter("wordType") != null && request.getParameter("wordType").trim().length() > 0) {
wordType = Integer.parseInt(request.getParameter("wordType").trim());
}
Set<String> wordsToAdd = esWordDefLogicService.addWords(wordFile, wordType);
rtnMap.put("code", "200");
rtnMap.put("msg", "Load word file succeed.");
rtnMap.put("count", wordsToAdd == null ? 0 : wordsToAdd.size());
logger.info("[DynSegWords]DynSegWordsToolsController.loadWordFile end. cost: {}",
System.currentTimeMillis() - begin);
return rtnMap;
}
/**
* 批量插入字典文件中的词到数据库表
*
* @return 处理结果
*/
@RequestMapping(value = "/dictTools/loadAllWordFile")
@ResponseBody
public Map<String, Object> loadAllWordFile(HttpServletRequest request) {
Map<String, Object> rtnMap = new HashMap<String, Object>();
long begin = System.currentTimeMillis();
logger.info("[DynSegWords]DynSegWordsToolsController.loadAllWordFile start");
StringBuffer sb = new StringBuffer(75);
Set<String> wordsToAdd;
for (Map.Entry<String, Integer> entry : allDictFiles.entrySet()) {
wordsToAdd = esWordDefLogicService.addWords(entry.getKey(), entry.getValue());
sb.append("file:").append(entry.getKey()).append(", wordType:").append(entry.getValue())
.append(", added selectCount:").append(wordsToAdd.size()).append('|');
}
esWordDefLogicService.deleteWordsFromFile("_delete.dic");
rtnMap.put("code", "200");
rtnMap.put("msg", "Load all word file succeed.");
rtnMap.put("detail", sb.toString());
logger.info("[DynSegWords]DynSegWordsToolsController.loadAllWordFile end. cost: {}",
System.currentTimeMillis() - begin);
return rtnMap;
}
@RequestMapping(value = "/dictTools/addNewWords")
@ResponseBody
public Map<String, Object> addNewWords(HttpServletRequest request) {
Map<String, Object> rtnMap = new HashMap<String, Object>();
long begin = System.currentTimeMillis();
logger.info("[DynSegWords]DynSegWordsToolsController.addNewWords start");
String words = "";
if (request.getParameter("words") != null && request.getParameter("words").trim().length() > 0) {
words = request.getParameter("words").trim();
}
Assert.isTrue(words != null && !words.trim().isEmpty(), "Words cannot be null.");
String wordType = "1";
if (request.getParameter("wordType") != null && request.getParameter("wordType").trim().length() > 0) {
wordType = request.getParameter("wordType").trim();
}
Set<String> wordSet = new HashSet<String>();
if ("7".equals(wordType)) {
wordSet.add(words.trim());
} else {
for (String word : words.split(",")) {
if (word != null && word.trim().length() > 0) {
wordSet.add(word.trim());
}
}
}
Set<String> addedWords = esWordDefLogicService.addNewWords(wordSet, Integer.parseInt(wordType));
rtnMap.put("code", "200");
rtnMap.put("msg", "Add new word succeed.");
rtnMap.put("detail", addedWords);
logger.info("[DynSegWords]DynSegWordsToolsController.addNewWords end. cost: {}",
System.currentTimeMillis() - begin);
return rtnMap;
}
@RequestMapping(value = "/dictTools/reactiveWords")
@ResponseBody
public Map<String, Object> reactiveWords(HttpServletRequest request) {
Map<String, Object> rtnMap = new HashMap<String, Object>();
long begin = System.currentTimeMillis();
logger.info("[DynSegWords]DynSegWordsToolsController.reactiveWords start");
String words = "";
if (request.getParameter("words") != null && request.getParameter("words").trim().length() > 0) {
words = request.getParameter("words").trim();
}
Assert.isTrue(words != null && !words.trim().isEmpty(), "Words cannot be null.");
String wordType = "1";
if (request.getParameter("wordType") != null && request.getParameter("wordType").trim().length() > 0) {
wordType = request.getParameter("wordType").trim();
}
Set<String> wordSet = new HashSet<String>();
if ("7".equals(wordType)) {
wordSet.add(words.trim());
} else {
for (String word : words.split(",")) {
if (word != null && word.trim().length() > 0) {
wordSet.add(word.trim());
}
}
}
esWordDefLogicService.reactiveWords(wordSet, Integer.parseInt(wordType));
rtnMap.put("code", "200");
rtnMap.put("msg", "reactive words succeed.");
logger.info("[DynSegWords]DynSegWordsToolsController.reactiveWords end. cost: {}",
System.currentTimeMillis() - begin);
return rtnMap;
}
@RequestMapping(value = "/dictTools/deleteWords")
@ResponseBody
public Map<String, Object> deleteWords(HttpServletRequest request) {
Map<String, Object> rtnMap = new HashMap<String, Object>();
long begin = System.currentTimeMillis();
logger.info("[DynSegWords]DynSegWordsToolsController.deleteWords start");
String words = "";
if (request.getParameter("words") != null && request.getParameter("words").trim().length() > 0) {
words = request.getParameter("words").trim();
}
Assert.isTrue(words != null && !words.trim().isEmpty(), "Words cannot be null.");
String wordType = "1";
if (request.getParameter("wordType") != null && request.getParameter("wordType").trim().length() > 0) {
wordType = request.getParameter("wordType").trim();
}
List<String> list = new ArrayList<String>();
if ("7".equals(wordType)) {
list.add(words.trim());
} else {
for (String word : words.split(",")) {
if (word != null && word.trim().length() > 0) {
list.add(word.trim());
}
}
}
esWordDefLogicService.deleteWords(list);
rtnMap.put("code", "200");
rtnMap.put("msg", "delete words succeed.");
logger.info("[DynSegWords]DynSegWordsToolsController.deleteWords end. cost: {}",
System.currentTimeMillis() - begin);
return rtnMap;
}
@RequestMapping(value = "/dictTools/deleteWordsFromFile")
@ResponseBody
public Map<String, Object> deleteWordsFromFile(HttpServletRequest request) {
Map<String, Object> rtnMap = new HashMap<String, Object>();
long begin = System.currentTimeMillis();
logger.info("[DynSegWords]DynSegWordsToolsController.deleteWords start");
String wordFile = "_delete.dic";
if (request.getParameter("wordFile") != null && request.getParameter("wordFile").trim().length() > 0) {
wordFile = request.getParameter("wordFile").trim();
}
esWordDefLogicService.deleteWordsFromFile(wordFile);
rtnMap.put("code", "200");
rtnMap.put("msg", "delete words succeed.");
logger.info("[DynSegWords]DynSegWordsToolsController.deleteWords end. cost: {}",
System.currentTimeMillis() - begin);
return rtnMap;
}
}
package com.yoho.search.consumer.dynwords;
import com.yoho.search.dal.model.EsWordDef;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.*;
@Service
public class EsWordDefLogicService {
private static final String LINE_SEPARATOR = System.getProperty("line.separator");
private static Logger logger = LoggerFactory.getLogger(EsWordDefLogicService.class);
private static final int BATCH_NUMBER = 5000;
private static final int ACTIVE_WORD_STATUS = 1;
private static final int DISABLED_WORD_STATUS = 0;
@Autowired
private EsWordDefService esWordDefService;
public Set<String> reactiveWords(Set<String> words, int wordType) {
logger.info("[DynSegWords]ESWordsService.reactiveWords start");
int size = words != null ? words.size() : 0;
logger.info("[DynSegWords]Size of words to be reactived is {}.", size);
if (logger.isDebugEnabled()) {
logger.debug("[DynSegWords]The words list to be reactived: {}{}", LINE_SEPARATOR,words);
}
if (size == 0) {
logger.info("[DynSegWords]ESWordsService.reactiveWords finished for no words to be reactived.");
return words;
}
// 1. 先从数据库里查询出所有的
Set<String> wordsInDB = getESWordsFromDBByType(wordType);
logger.info("[DynSegWords]Size of words in the DB is {}.", wordsInDB.size());
if (logger.isDebugEnabled()) {
logger.debug("[DynSegWords]The words list in the DB: {}{}" ,LINE_SEPARATOR,wordsInDB);
}
// 2. 获取所有增量的词
Set<String> toAddWords = new HashSet<String>();
List<String> toUpdWords = new ArrayList<String>();
for (String word : words) {
if (wordsInDB.contains(word)) {
toUpdWords.add(word);
} else {
toAddWords.add(word);
}
}
logger.info("[DynSegWords]Size of words need to add is {} and need to reactive is {}", toAddWords.size(),
toUpdWords.size());
if (logger.isDebugEnabled()) {
logger.debug("[DynSegWords]The words need to add: {}{}" , LINE_SEPARATOR ,toAddWords);
logger.debug("[DynSegWords]The words need to reactive: {}{}" , LINE_SEPARATOR , toUpdWords);
}
// 3. 插入新的词
List<EsWordDef> wordList = new ArrayList<EsWordDef>();
for (String word : toAddWords) {
wordList.add(new EsWordDef(word, wordType));
}
if (!wordList.isEmpty()) {
esWordDefService.insertBatch(wordList);
logger.info("[DynSegWords]Batch insert words to DB. size: " + wordList.size());
}
if (!toUpdWords.isEmpty()) {
esWordDefService.updateStatusBatch(toUpdWords, ACTIVE_WORD_STATUS,
(int) (System.currentTimeMillis() / 1000));
logger.info("[DynSegWords]Batch active words in DB. size: " + toUpdWords.size());
}
logger.info("[DynSegWords]ESWordsService.reactiveWords finished.");
return words;
}
public Set<String> addNewWords(Set<String> words, int wordType) {
logger.info("[DynSegWords]ESWordsService.addNewWords start");
int size = words != null ? words.size() : 0;
logger.info("[DynSegWords]Size of words to be added is {}.", size);
if (logger.isDebugEnabled()) {
logger.debug("[DynSegWords]The words list to be added: {}{}",LINE_SEPARATOR ,words);
}
if (size == 0) {
logger.info("[DynSegWords]ESWordsService.addNewWords finished for no words to be added.");
return words;
}
// 1. 先从数据库里查询出所有的
Set<String> wordsInDB = getESWordsFromDBByType(wordType);
logger.info("[DynSegWords]Size of words in the DB is {}.", wordsInDB.size());
if (logger.isDebugEnabled()) {
logger.debug("[DynSegWords]The words list in the DB:{}{} " ,LINE_SEPARATOR,wordsInDB);
}
// 2. 获取所有增量的词
words.removeAll(wordsInDB);
logger.info("[DynSegWords]Size of words need to add is {}.", words.size());
if (logger.isDebugEnabled()) {
logger.debug("[DynSegWords]The words need to add:{}{} " ,LINE_SEPARATOR , words);
}
if (words.isEmpty()) {
logger.info("[DynSegWords]ESWordsService.addNewWords finished for no words to add.");
return words;
}
// 3. 分批次插入
List<EsWordDef> wordList = new ArrayList<EsWordDef>();
for (String word : words) {
wordList.add(new EsWordDef(word, wordType));
if (wordList.size() == BATCH_NUMBER) {
esWordDefService.insertBatch(wordList);
logger.info("[DynSegWords]Batch insert words to DB. size: " + wordList.size());
wordList = new ArrayList<EsWordDef>();
}
}
if (wordList.size() > 0) {
esWordDefService.insertBatch(wordList);
logger.info("[DynSegWords]Batch insert words to DB. size: " + wordList.size());
}
logger.info("[DynSegWords]ESWordsService.addNewWords finished.");
return words;
}
public Set<String> addWords(String wordFile, int wordType) {
logger.info("[DynSegWords]ESWordsService.addWords start. wordFile: {}, wordType: {}", wordFile, wordType);
String classpath = this.getClass().getResource("/").getPath();
Set<String> words = readFile(classpath + "/dicts/" + wordFile);
words = addNewWords(words, wordType);
// 记录插入的文件
writeFile(classpath + "/dicts/" + wordFile + ".tmp", words);
logger.info("[DynSegWords]ESWordsService.addWords finished.");
return words;
}
private void writeFile(String path, Set<String> words) {
BufferedWriter writer = null;
try {
writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(path), "UTF-8"));
for (String word : words) {
writer.write(word);
writer.newLine();
}
writer.close();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (writer != null) {
try {
writer.close();
} catch (IOException e1) {
}
}
}
}
private Set<String> readFile(String path) {
BufferedReader reader = null;
Set<String> result = new TreeSet<String>();
try {
reader = new BufferedReader(new InputStreamReader(new FileInputStream(path), StandardCharsets.UTF_8));
String tempString = null;
while ((tempString = reader.readLine()) != null) {
result.add(tempString);
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e1) {
}
}
}
return result;
}
private Set<String> getESWordsFromDBByType(int wordType) {
Set<String> wordList = esWordDefService.selectByWordType(wordType);
return wordList != null ? wordList : new HashSet<String>();
}
public void deleteWordsFromFile(String wordFile) {
logger.info("[DynSegWords]ESWordsService.deleteWordsFromFile start. wordFile: {}", wordFile);
String classpath = this.getClass().getResource("/").getPath();
Set<String> words = readFile(classpath + "/dicts/" + wordFile);
int size = words != null ? words.size() : 0;
logger.info("[DynSegWords]Size of words in the file is {}.", size);
if (logger.isDebugEnabled()) {
logger.debug("[DynSegWords]The words list in the file: {}{}" , LINE_SEPARATOR , words);
}
if (size == 0) {
logger.info("[DynSegWords]ESWordsService.deleteWordsFromFile finished for no words in file.");
return;
}
// 3. 分批次删除
List<String> wordList = new ArrayList<String>();
for (String word : words) {
if (word != null && word.trim().length() > 0) {
wordList.add(word);
}
if (wordList.size() == BATCH_NUMBER) {
esWordDefService.updateStatusBatch(wordList, DISABLED_WORD_STATUS,
(int) (System.currentTimeMillis() / 1000));
logger.info("[DynSegWords]Batch diabled words to DB. size: " + wordList.size());
wordList = new ArrayList<String>();
}
}
if (wordList.size() > 0) {
esWordDefService.updateStatusBatch(wordList, DISABLED_WORD_STATUS,
(int) (System.currentTimeMillis() / 1000));
logger.info("[DynSegWords]Batch diabled words to DB. size: " + wordList.size());
}
logger.info("[DynSegWords]ESWordsService.deleteWordsFromFile finished.");
}
public void deleteWords(List<String> wordsToDelete) {
logger.info("[DynSegWords]ESWordsService.deleteWords start");
int size = wordsToDelete != null ? wordsToDelete.size() : 0;
logger.info("[DynSegWords]Size of words to be delete is {}.", size);
if (logger.isDebugEnabled()) {
logger.debug("[DynSegWords]The words list to be deleted:{}{} " , LINE_SEPARATOR , wordsToDelete);
}
if (size == 0) {
logger.info("[DynSegWords]ESWordsService.deleteWords finished for no words to be deleted.");
return;
}
esWordDefService.updateStatusBatch(wordsToDelete, DISABLED_WORD_STATUS,
(int) (System.currentTimeMillis() / 1000));
logger.info("[DynSegWords]ESWordsService.deleteWords finished.");
}
}
... ... @@ -12,9 +12,7 @@ import com.yoho.search.core.es.model.ESBluk;
import org.elasticsearch.action.bulk.BulkResponse;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.BeansException;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
import org.springframework.context.ApplicationEventPublisher;
import org.springframework.context.ApplicationEventPublisherAware;
import org.springframework.stereotype.Component;
... ... @@ -27,7 +25,7 @@ import java.util.concurrent.Executors;
import java.util.concurrent.Future;
@Component
public class YohoIndexDataLoader implements IYohoIndexDataLoader, ApplicationEventPublisherAware, ApplicationContextAware {
public class YohoIndexDataLoader implements IYohoIndexDataLoader, ApplicationEventPublisherAware {
private final Logger INDEX_REBUILD_LOG = LoggerFactory.getLogger("INDEX_REBULDER");
... ... @@ -73,7 +71,7 @@ public class YohoIndexDataLoader implements IYohoIndexDataLoader, ApplicationEve
INDEX_REBUILD_LOG.info("[yohoIndexName=[{}]][builder={}][loadAllData][totalPageSize={}]", yohoIndexName, indexBuilder.getClass().getSimpleName(), totalPageSize);
// 3、分配任务、并包装异步执行结果
List<Future<Boolean>> futureResults = new ArrayList<Future<Boolean>>();
for (int pageNo = 1; pageNo <= totalPageSize; pageNo++) {
for (int pageNo = 1; pageNo <= totalPageSize; pageNo++) {
final int taskPageNo = pageNo;
Future<Boolean> futureResult = executorService.submit(new Callable<Boolean>() {
@Override
... ... @@ -106,7 +104,7 @@ public class YohoIndexDataLoader implements IYohoIndexDataLoader, ApplicationEve
}
private boolean doLoadDataWithRetry(final String yohoIndexName, final String tempIndexRealName, final IIndexBuilder indexBuilder, final IElasticsearchClient client,
int pageNo, int limit) throws Exception {
int pageNo, int limit) {
int tryCount = 1;
boolean result = false;
while (tryCount <= MAX_RETRY_TIMES && !result) {
... ... @@ -163,10 +161,6 @@ public class YohoIndexDataLoader implements IYohoIndexDataLoader, ApplicationEve
}
}
@Override
public void setApplicationContext(ApplicationContext applicationContext) throws BeansException {
this.applicationContext = applicationContext;
}
@Override
public void setApplicationEventPublisher(ApplicationEventPublisher applicationEventPublisher) {
... ...
... ... @@ -28,7 +28,7 @@ public class TblProductIndexBuilder extends IIndexBuilder {
}
private int getCount() {
StringBuffer countUrl = new StringBuffer();
StringBuilder countUrl = new StringBuilder();
countUrl.append(ISearchConstants.TBL_PRODUCT_URL);
countUrl.append("getproductscountforsearch?client_type=iphone&client_secret=");
String md5 = MD5Util.string2MD5("client_type=iphone&private_key=" + ISearchConstants.PRIVATE_KEY);
... ... @@ -47,17 +47,23 @@ public class TblProductIndexBuilder extends IIndexBuilder {
@SuppressWarnings("rawtypes")
private List<Map> getPageList(int start, int limit) {
StringBuffer url = new StringBuffer();
url.append(ISearchConstants.TBL_PRODUCT_URL);
url.append("getproductslistforsearch?client_type=iphone");
url.append("&limit=" + limit);
url.append("&offset=" + start);
StringBuilder url = new StringBuilder();
url.append(ISearchConstants.TBL_PRODUCT_URL)
.append("getproductslistforsearch?client_type=iphone")
.append("&limit=")
.append(limit)
.append("&offset=")
.append(start);
StringBuffer param = new StringBuffer();
param.append("client_type=iphone");
param.append("&limit=" + limit);
param.append("&offset=" + start);
param.append("&private_key=" + ISearchConstants.PRIVATE_KEY);
StringBuilder param = new StringBuilder();
param.append("client_type=iphone")
.append("&limit=")
.append(limit)
.append("&offset=")
.append(start)
.append("&private_key=")
.append(ISearchConstants.PRIVATE_KEY);
String md5 = MD5Util.string2MD5(param.toString());
url.append("&client_secret=" + md5);
... ...
... ... @@ -57,8 +57,8 @@ public class GoodsBuilder {
JSONArray jsonArray = JSONArray.parseArray(tblProductSkc.getPics());
String defaultSknImages = getCover(jsonArray, true);
productIndexBO.setDefaultImages(defaultSknImages);
String colorNames = new String();
String colorIds = new String();
String colorNames = "";
String colorIds = "";
JSONArray goodsArray = new JSONArray();
for (TblProductSkc skc : tblProductSkcs) {
// 颜色名称
... ...
... ... @@ -22,7 +22,7 @@ public class ShowStatusBuilder {
showStatus = "N";
}
if (tblProduct.getSource().equals("1")) {
if (tblSite.getStatus().equals("1") && tblSite != null ) {
if (tblSite.getStatus().equals("1")) {
showStatus = "N";
}
}
... ...
... ... @@ -28,7 +28,7 @@ public class SizeBuilder {
productIndexBO.setSizeNames(sizeNames);
//赋值sizeIds(通过size名称获取size列表,如果列表中有sort_id等于本skn小分类id的size取这个,如果没有取sort_id=0的那个)
String sizeIds = new String();
String sizeIds = "";
for (TblProductSku tblProductSku : tblProductSkuList) {
List<Size> sizeList = sizeMap.get(tblProductSku.getSize());
if (CollectionUtils.isEmpty(sizeList)) {
... ...
package com.yoho.search.consumer.index.increment.bulks;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import javax.annotation.PostConstruct;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationEventPublisher;
import org.springframework.context.ApplicationEventPublisherAware;
import org.springframework.stereotype.Component;
import com.alibaba.fastjson.JSON;
import com.yoho.error.event.SearchEvent;
import com.yoho.search.base.utils.EventReportEnum;
... ... @@ -27,6 +11,20 @@ import com.yoho.search.consumer.service.base.ProductIndexService;
import com.yoho.search.consumer.service.bo.ProductIndexBO;
import com.yoho.search.core.es.model.ESBluk;
import com.yoho.search.core.es.utils.IgnoreSomeException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationEventPublisher;
import org.springframework.context.ApplicationEventPublisherAware;
import org.springframework.stereotype.Component;
import javax.annotation.PostConstruct;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
/**
* Created by wangnan on 2016/12/23.
... ... @@ -87,14 +85,16 @@ public class GlobalIndexBulkService implements ApplicationEventPublisherAware {
}
}
});
Runtime.getRuntime().addShutdownHook(new Thread(() -> {
updateExecutorService.shutdown();
deleteExecutorService.shutdown();
}));
}
/**
* 按id更新索引数据
*
* @param skn
* @param begin
* @param key
*/
public void updateGlobalIndex(Integer skn) {
try {
... ... @@ -110,9 +110,6 @@ public class GlobalIndexBulkService implements ApplicationEventPublisherAware {
/**
* 按id删除索引数据
*
* @param skn
* @param begin
* @param key
*/
public void deleteGlobalIndex(Integer id) {
try {
... ...
... ... @@ -4,7 +4,6 @@ import com.alibaba.fastjson.JSONObject;
import com.yoho.search.base.utils.ISearchConstants;
import com.yoho.search.consumer.index.common.IYohoIndexService;
import com.yoho.search.consumer.index.rebuild.RebuildFlagService;
import com.yoho.search.consumer.service.base.ProductIndexService;
import com.yoho.search.consumer.service.base.ProductService;
import com.yoho.search.consumer.service.base.ProductVectorFeatureService;
import com.yoho.search.consumer.service.logic.ProductVectorFeatureLogicService;
... ... @@ -40,8 +39,6 @@ public class ProductIndexFeatureVectorUpdateFlow implements RetryBusinessFlow {
@Autowired
private ProductService productService;
@Autowired
private ProductIndexService productIndexService;
@Autowired
private IYohoIndexService yohoIndexService;
@Autowired
private RebuildFlagService rebuildFlagService;
... ...
... ... @@ -45,7 +45,7 @@ public class ScoreSknRuleMqListener extends AbstractIndexMqListener {
@Override
protected void deleteData(String id) throws Exception {
int intId=Integer.valueOf(id);
int intId=Integer.parseInt(id);
ScoreSknRule scoreSknRule = scoreSknRuleService.selectById(intId);
if (scoreSknRule ==null) {
return;
... ... @@ -53,7 +53,7 @@ public class ScoreSknRuleMqListener extends AbstractIndexMqListener {
int result=scoreSknRuleService.deleteById(intId);
if (result > 0) {
// 获取product数据
Product product = productService.getBySkn(Integer.valueOf(scoreSknRule.getSkn()));
Product product = productService.getBySkn(scoreSknRule.getSkn());
if (product == null) {
logger.warn("[func=deleteData][indexName={}] can not find this productId(skn= {}) in table product", this.getIndexName(), scoreSknRule.getSkn());
return;
... ...
... ... @@ -35,8 +35,8 @@ public class TblProductMqListener extends AbstractIndexMqListener {
@Override
protected void deleteData(String id) throws Exception {
tblProductService.delete(Integer.valueOf(id));
Integer tblId = Integer.valueOf(id) * (-1);
tblProductService.delete(Integer.parseInt(id));
Integer tblId = Integer.parseInt(id) * (-1);
globalIndexBulkService.deleteGlobalIndex(tblId);
}
... ...
... ... @@ -40,7 +40,7 @@ public class ScoreSknRuleService {
StringBuilder result = new StringBuilder();
List<Integer> scoreSknRules = scoreSknRuleMapper.selectAllSkn();
for (Integer skn: scoreSknRules) {
result.append(",").append(skn);
result.append(',').append(skn);
}
if (result.length() < 1) {
return null;
... ...
... ... @@ -107,7 +107,7 @@ public class PromotionCond {
}
}
class CondDetail {
static class CondDetail {
private final Aggregator aggregator;
private final List<CondItem> condItemList;
... ... @@ -141,7 +141,7 @@ public class PromotionCond {
}
}
class CondItem {
static class CondItem {
private final ItemType itemType;
private final Operator operator;
... ...
... ... @@ -6,9 +6,9 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import javax.annotation.Resource;
import java.nio.charset.StandardCharsets;
/**
* Created by gemingdan on 2017/6/19.
... ... @@ -49,9 +49,9 @@ public class ScoreSknRuleLogicService implements InitializingBean {
String path = CONFIG_ROOT_PATH + "/" + KEY;
if (this.client.checkExists().forPath(path) == null) {
this.client.create().forPath(path, value.getBytes("UTF-8"));
this.client.create().forPath(path, value.getBytes(StandardCharsets.UTF_8));
} else {
this.client.setData().forPath(path, value.getBytes("UTF-8"));
this.client.setData().forPath(path, value.getBytes(StandardCharsets.UTF_8));
}
} catch (Exception e) {
logger.error("publish function score rule " + KEY + " to zk failed!", e);
... ...
... ... @@ -133,14 +133,14 @@ public class ProductSizesLogicService {
return results;
}
<K, V> void increaseMapCount(K key, Map<K, Integer> map) {
Integer value = map.get(key);
if (value == null) {
value = 0;
}
value = value + 1;
map.put(key, value);
}
// <K, V> void increaseMapCount(K key, Map<K, Integer> map) {
// Integer value = map.get(key);
// if (value == null) {
// value = 0;
// }
// value = value + 1;
// map.put(key, value);
// }
private <K, V> void addValueToSet(K key, V value, Map<K, Set<V>> map) {
Set<V> values = map.get(key);
... ...
... ... @@ -37,11 +37,12 @@ public class ProductPoolDetailSknBuilder implements ViewBuilder {
private List<ProductPoolDetailSknBO> getProductPoolDetailSknBOs(List<Integer> sknList) {
List<ProductPoolDetailSknBO> productPoolDetailSknBOs = new ArrayList<>();
Set<Integer> poolIdSet = new HashSet<>();
for (Integer skn : sknList) {
//把每个skn对应的多个ProductPoolDetail的PoolId拼起来
List<ProductPoolDetail> productPoolDetails = productPoolDetailService.selectByProductSkn(skn);
//先根据poolId去重
Set<Integer> poolIdSet = new HashSet<>();
poolIdSet.clear();
List<ProductPoolDetail> productPoolDetailsTemp = new ArrayList<>();
for (ProductPoolDetail productPoolDetail : productPoolDetails) {
if (!poolIdSet.contains(productPoolDetail.getPoolId())) {
... ...
... ... @@ -38,7 +38,7 @@ public class ForbidenSortBrandLogicService {
@Autowired
private ForbiddenSortBrandMapper forbiddenSortBrandMapper;
private Map<String, List<Integer>> forbiddenSortBrandsMap;
private volatile Map<String, List<Integer>> forbiddenSortBrandsMap;
private String getKey(Integer maxSortId, Integer middleSortId, Integer smallSortId) {
return new StringBuilder().append(maxSortId).append("_").append(middleSortId).append("_").append(smallSortId).toString();
... ...
... ... @@ -54,27 +54,27 @@ public class SpiderBasedHttpRequest {
return null;
}
StringBuffer summary = new StringBuffer();
StringBuilder summary = new StringBuilder();
Object[] summaryNodes = tagNode.evaluateXPath("//div[@class='lemma-summary']");
if (summaryNodes != null && summaryNodes.length != 0) {
for (Object obja : summaryNodes) {
TagNode tna = (TagNode) obja;
summary.append(" ");
summary.append(' ');
summary.append(tna.getText().toString());
}
}
String content = "";
StringBuilder content = new StringBuilder();
Object[] contentNodes = tagNode.evaluateXPath("//div[@class='main-content']");
if (contentNodes != null && contentNodes.length != 0) {
for (Object obja : contentNodes) {
TagNode tna = (TagNode) obja;
content = content + " " + tna.getText().toString();
content.append(' ').append(tna.getText().toString());
}
}
title = title.replaceAll("_百度百科", "");
return new BaikeBO(url, title, summary.toString(), content);
return new BaikeBO(url, title, summary.toString(), content.toString());
}
... ... @@ -86,7 +86,7 @@ public class SpiderBasedHttpRequest {
* @return URL 所代表远程资源的响应结果
*/
private String sendGet(String url, String param) throws Exception {
StringBuffer result = new StringBuffer();
StringBuilder result = new StringBuilder();
BufferedReader in = null;
try {
String urlNameString = url + "?" + param;
... ...
package com.yoho.search.spider.increment;
import com.yoho.core.redis.YHRedisTemplate;
import com.yoho.core.redis.YHSetOperations;
import com.yoho.search.base.utils.RedisKeys;
import org.apache.commons.collections.CollectionUtils;
... ... @@ -19,6 +20,9 @@ public class BlackKeywordsMgr {
@Resource(name = "yhNoSyncSetOperations")
private YHSetOperations<String, String> yhNoSyncSetOperations;
@Resource(name = "yhRedisTemplate")
private YHRedisTemplate yhRedisTemplate;
public Set<String> getBlackKeywords() {
return yhNoSyncSetOperations.members(RedisKeys.YOHO_SEARCH_KEYWORDS_INVALID);
}
... ... @@ -30,4 +34,8 @@ public class BlackKeywordsMgr {
}
}
public void clearAll(){
yhRedisTemplate.delete(RedisKeys.YOHO_SEARCH_KEYWORDS_INVALID);
}
}
... ...
... ... @@ -5,6 +5,7 @@ import com.yoho.search.consumer.job.IndexRebuildJob;
import com.yoho.search.consumer.suggests.common.RetryBusinessFlowExecutor;
import com.yoho.search.spider.conversation.SuggestConversionUpdateFlow;
import com.yoho.search.spider.conversation.SuggestConvertorFlow;
import com.yoho.search.spider.increment.BlackKeywordsMgr;
import com.yoho.search.spider.increment.IncrementCrawlerFlow;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
... ... @@ -37,9 +38,21 @@ public class SpiderJob {
@Autowired
private IndexRebuildJob indexRebuildJob;
@Autowired
private BlackKeywordsMgr blackKeywordsMgr;
// 避免连续触发
private volatile AtomicBoolean lockStatus = new AtomicBoolean(false);
@Scheduled(cron = "0 1 0 1 * ?")
public void clearAllBlackWords() {
long begin = System.currentTimeMillis();
LOGGER.info("[func=clearAllBlackWords.start][begin={}]", begin);
blackKeywordsMgr.clearAll();
LOGGER.info("[func=clearAllBlackWords.end][cost={}]", System.currentTimeMillis() - begin);
}
@Scheduled(cron = "0 30 0 * * ?")
public void crawleEmptySearchKeywords() {
// 分析前一天前1000个搜索无结果或者小于10个的关键词 到baidubaike爬虫获取内容
... ...