Authored by Gino Zhang

consumer支持对同义词的增删改操作

package com.yoho.search.consumer.dynwords;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.servlet.http.HttpServletRequest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.util.Assert;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.ResponseBody;
import javax.servlet.http.HttpServletRequest;
import java.util.*;
@Controller
public class DynSegWordsToolsController {
private static Logger logger = LoggerFactory.getLogger(DynSegWordsToolsController.class);
@Autowired
private UpgradeDynSegWordsFlagService upgradeDynSegWordsFlagService;
@Autowired
private DynSegWordsService dynSegWordsService;
@Autowired
private ESWordsService esWordsService;
private static Map<String, Integer> allDictFiles = new LinkedHashMap<String, Integer>();
static {
// 词的类型 1为主词典的词 2为停用词 3为姓氏 4为量词 5为后缀词 6为介词
allDictFiles.put("main.dic", Integer.valueOf(1)); // IK主词典
allDictFiles.put("mydict.dic", Integer.valueOf(1)); // IK扩展词典
allDictFiles.put("single_word_full.dic", Integer.valueOf(1)); // IK单字主词典
allDictFiles.put("words.dic", Integer.valueOf(1)); // MMSEG主词典
allDictFiles.put("words-yoho.dic", Integer.valueOf(1)); // MMSEG扩展词典
// allDictFiles.put("single_word_low_freq.dic", Integer.valueOf(1));
// //IK低频单字主词典 被包括在single_word_full.dic中
allDictFiles.put("stopword.dic", Integer.valueOf(2)); // IK停用词词典
allDictFiles.put("ext_stopword.dic", Integer.valueOf(2)); // IK扩展停用词词典
allDictFiles.put("surname.dic", Integer.valueOf(3)); // IK姓氏词典
allDictFiles.put("quantifier.dic", Integer.valueOf(4)); // IK量词词典
allDictFiles.put("suffix.dic", Integer.valueOf(5)); // IK后缀词词典
allDictFiles.put("preposition.dic", Integer.valueOf(6)); // IK介词词典
allDictFiles.put("synonyms.dic", Integer.valueOf(7)); // 同义词规则
}
/**
* 批量插入字典文件中的词到数据库表
*
* @return 处理结果
*/
@RequestMapping(value = "/dictTools/loadDictFileToDB")
@ResponseBody
public Map<String, Object> loadDictFileToDB() {
Map<String, Object> rtnMap = new HashMap<String, Object>();
if (upgradeDynSegWordsFlagService.isForceUpgrade()) {
rtnMap.put("code", "400");
rtnMap.put("msg", "Force upgrade now, please try later...");
return rtnMap;
}
long begin = System.currentTimeMillis();
logger.info("[DynSegWords]DynSegWordsToolsController.loadDictFileToDB start");
List<String> wordsToAdd = dynSegWordsService.addMainWordsInFile();
rtnMap.put("code", "200");
rtnMap.put("msg", "Update force upgrade seg words succeed.");
rtnMap.put("count", wordsToAdd == null ? 0 : wordsToAdd.size());
logger.info("[DynSegWords]DynSegWordsToolsController.loadDictFileToDB end. cost: {}",
System.currentTimeMillis() - begin);
return rtnMap;
}
/**
* 批量插入字典文件中的词到数据库表
*
* @return 处理结果
*/
@RequestMapping(value = "/dictTools/loadWordFile")
@ResponseBody
public Map<String, Object> loadWordFile(HttpServletRequest request) {
Map<String, Object> rtnMap = new HashMap<String, Object>();
long begin = System.currentTimeMillis();
logger.info("[DynSegWords]DynSegWordsToolsController.loadWordFile start");
String wordFile = "main.dic";
int wordType = 1;
if (request.getParameter("wordFile") != null && request.getParameter("wordFile").trim().length() > 0) {
wordFile = request.getParameter("wordFile").trim();
}
if (request.getParameter("wordType") != null && request.getParameter("wordType").trim().length() > 0) {
wordType = Integer.valueOf(request.getParameter("wordType").trim());
}
Set<String> wordsToAdd = esWordsService.addWords(wordFile, wordType);
rtnMap.put("code", "200");
rtnMap.put("msg", "Load word file succeed.");
rtnMap.put("count", wordsToAdd == null ? 0 : wordsToAdd.size());
logger.info("[DynSegWords]DynSegWordsToolsController.loadWordFile end. cost: {}",
System.currentTimeMillis() - begin);
return rtnMap;
}
/**
* 批量插入字典文件中的词到数据库表
*
* @return 处理结果
*/
@RequestMapping(value = "/dictTools/loadAllWordFile")
@ResponseBody
public Map<String, Object> loadAllWordFile(HttpServletRequest request) {
Map<String, Object> rtnMap = new HashMap<String, Object>();
long begin = System.currentTimeMillis();
logger.info("[DynSegWords]DynSegWordsToolsController.loadAllWordFile start");
StringBuffer sb = new StringBuffer(75);
Set<String> wordsToAdd;
for (Map.Entry<String, Integer> entry : allDictFiles.entrySet()) {
wordsToAdd = esWordsService.addWords(entry.getKey(), entry.getValue());
sb.append("file:").append(entry.getKey()).append(", wordType:").append(entry.getValue())
.append(", added count:").append(wordsToAdd.size()).append("|");
}
esWordsService.deleteWordsFromFile("_delete.dic");
rtnMap.put("code", "200");
rtnMap.put("msg", "Load all word file succeed.");
rtnMap.put("detail", sb.toString());
logger.info("[DynSegWords]DynSegWordsToolsController.loadAllWordFile end. cost: {}",
System.currentTimeMillis() - begin);
return rtnMap;
}
@RequestMapping(value = "/dictTools/addNewWords")
@ResponseBody
public Map<String, Object> addNewWords(HttpServletRequest request) {
Map<String, Object> rtnMap = new HashMap<String, Object>();
long begin = System.currentTimeMillis();
logger.info("[DynSegWords]DynSegWordsToolsController.addNewWords start");
String words = "";
if (request.getParameter("words") != null && request.getParameter("words").trim().length() > 0) {
words = request.getParameter("words").trim();
}
String wordType = "1";
if (request.getParameter("wordType") != null && request.getParameter("wordType").trim().length() > 0) {
wordType = request.getParameter("wordType").trim();
}
if ("7".equals(wordType)) {
// 调用/dictTools/addNewSynonymRule添加同义词
rtnMap.put("code", "404");
rtnMap.put("msg", "invalid word type");
logger.info("[DynSegWords]DynSegWordsToolsController.addNewWords end. cost: {}",
System.currentTimeMillis() - begin);
return rtnMap;
}
Set<String> wordSet = new HashSet<String>();
for (String word : words.split(",")) {
if (word != null && word.trim().length() > 0) {
wordSet.add(word.trim());
}
}
Set<String> addedWords = esWordsService.addNewWords(wordSet, Integer.valueOf(wordType));
rtnMap.put("code", "200");
rtnMap.put("msg", "Add new word succeed.");
rtnMap.put("detail", addedWords);
logger.info("[DynSegWords]DynSegWordsToolsController.addNewWords end. cost: {}",
System.currentTimeMillis() - begin);
return rtnMap;
}
@RequestMapping(value = "/dictTools/reactiveWords")
@ResponseBody
public Map<String, Object> reactiveWords(HttpServletRequest request) {
Map<String, Object> rtnMap = new HashMap<String, Object>();
long begin = System.currentTimeMillis();
logger.info("[DynSegWords]DynSegWordsToolsController.reactiveWords start");
String words = "";
if (request.getParameter("words") != null && request.getParameter("words").trim().length() > 0) {
words = request.getParameter("words").trim();
}
String wordType = "1";
if (request.getParameter("wordType") != null && request.getParameter("wordType").trim().length() > 0) {
wordType = request.getParameter("wordType").trim();
}
if ("7".equals(wordType)) {
// 调用/dictTools/addNewSynonymRule添加同义词
rtnMap.put("code", "404");
rtnMap.put("msg", "invalid word type");
logger.info("[DynSegWords]DynSegWordsToolsController.reactiveWords end. cost: {}",
System.currentTimeMillis() - begin);
return rtnMap;
}
Set<String> wordSet = new HashSet<String>();
for (String word : words.split(",")) {
if (word != null && word.trim().length() > 0) {
wordSet.add(word.trim());
}
}
esWordsService.reactiveWords(wordSet, Integer.valueOf(wordType));
rtnMap.put("code", "200");
rtnMap.put("msg", "reactive words succeed.");
logger.info("[DynSegWords]DynSegWordsToolsController.reactiveWords end. cost: {}",
System.currentTimeMillis() - begin);
return rtnMap;
}
@RequestMapping(value = "/dictTools/deleteWords")
@ResponseBody
public Map<String, Object> deleteWords(HttpServletRequest request) {
Map<String, Object> rtnMap = new HashMap<String, Object>();
long begin = System.currentTimeMillis();
logger.info("[DynSegWords]DynSegWordsToolsController.deleteWords start");
String words = "";
if (request.getParameter("words") != null && request.getParameter("words").trim().length() > 0) {
words = request.getParameter("words").trim();
}
List<String> list = new ArrayList<String>();
for (String word : words.split(",")) {
if (word != null && word.trim().length() > 0) {
list.add(word.trim());
}
}
esWordsService.deleteWords(list);
rtnMap.put("code", "200");
rtnMap.put("msg", "delete words succeed.");
logger.info("[DynSegWords]DynSegWordsToolsController.deleteWords end. cost: {}",
System.currentTimeMillis() - begin);
return rtnMap;
}
@RequestMapping(value = "/dictTools/deleteWordsFromFile")
@ResponseBody
public Map<String, Object> deleteWordsFromFile(HttpServletRequest request) {
Map<String, Object> rtnMap = new HashMap<String, Object>();
long begin = System.currentTimeMillis();
logger.info("[DynSegWords]DynSegWordsToolsController.deleteWords start");
String wordFile = "_delete.dic";
if (request.getParameter("wordFile") != null && request.getParameter("wordFile").trim().length() > 0) {
wordFile = request.getParameter("wordFile").trim();
}
esWordsService.deleteWordsFromFile(wordFile);
rtnMap.put("code", "200");
rtnMap.put("msg", "delete words succeed.");
logger.info("[DynSegWords]DynSegWordsToolsController.deleteWords end. cost: {}",
System.currentTimeMillis() - begin);
return rtnMap;
}
private static Logger logger = LoggerFactory.getLogger(DynSegWordsToolsController.class);
@Autowired
private UpgradeDynSegWordsFlagService upgradeDynSegWordsFlagService;
@Autowired
private DynSegWordsService dynSegWordsService;
@Autowired
private ESWordsService esWordsService;
private static Map<String, Integer> allDictFiles = new LinkedHashMap<String, Integer>();
static {
// 词的类型 1为主词典的词 2为停用词 3为姓氏 4为量词 5为后缀词 6为介词
allDictFiles.put("main.dic", Integer.valueOf(1)); // IK主词典
allDictFiles.put("mydict.dic", Integer.valueOf(1)); // IK扩展词典
allDictFiles.put("single_word_full.dic", Integer.valueOf(1)); // IK单字主词典
allDictFiles.put("words.dic", Integer.valueOf(1)); // MMSEG主词典
allDictFiles.put("words-yoho.dic", Integer.valueOf(1)); // MMSEG扩展词典
// allDictFiles.put("single_word_low_freq.dic", Integer.valueOf(1));
// //IK低频单字主词典 被包括在single_word_full.dic中
allDictFiles.put("stopword.dic", Integer.valueOf(2)); // IK停用词词典
allDictFiles.put("ext_stopword.dic", Integer.valueOf(2)); // IK扩展停用词词典
allDictFiles.put("surname.dic", Integer.valueOf(3)); // IK姓氏词典
allDictFiles.put("quantifier.dic", Integer.valueOf(4)); // IK量词词典
allDictFiles.put("suffix.dic", Integer.valueOf(5)); // IK后缀词词典
allDictFiles.put("preposition.dic", Integer.valueOf(6)); // IK介词词典
allDictFiles.put("synonyms.dic", Integer.valueOf(7)); // 同义词规则
}
/**
* 批量插入字典文件中的词到数据库表
*
* @return 处理结果
*/
@RequestMapping(value = "/dictTools/loadDictFileToDB")
@ResponseBody
public Map<String, Object> loadDictFileToDB() {
Map<String, Object> rtnMap = new HashMap<String, Object>();
if (upgradeDynSegWordsFlagService.isForceUpgrade()) {
rtnMap.put("code", "400");
rtnMap.put("msg", "Force upgrade now, please try later...");
return rtnMap;
}
long begin = System.currentTimeMillis();
logger.info("[DynSegWords]DynSegWordsToolsController.loadDictFileToDB start");
List<String> wordsToAdd = dynSegWordsService.addMainWordsInFile();
rtnMap.put("code", "200");
rtnMap.put("msg", "Update force upgrade seg words succeed.");
rtnMap.put("count", wordsToAdd == null ? 0 : wordsToAdd.size());
logger.info("[DynSegWords]DynSegWordsToolsController.loadDictFileToDB end. cost: {}",
System.currentTimeMillis() - begin);
return rtnMap;
}
/**
* 批量插入字典文件中的词到数据库表
*
* @return 处理结果
*/
@RequestMapping(value = "/dictTools/loadWordFile")
@ResponseBody
public Map<String, Object> loadWordFile(HttpServletRequest request) {
Map<String, Object> rtnMap = new HashMap<String, Object>();
long begin = System.currentTimeMillis();
logger.info("[DynSegWords]DynSegWordsToolsController.loadWordFile start");
String wordFile = "main.dic";
int wordType = 1;
if (request.getParameter("wordFile") != null && request.getParameter("wordFile").trim().length() > 0) {
wordFile = request.getParameter("wordFile").trim();
}
if (request.getParameter("wordType") != null && request.getParameter("wordType").trim().length() > 0) {
wordType = Integer.valueOf(request.getParameter("wordType").trim());
}
Set<String> wordsToAdd = esWordsService.addWords(wordFile, wordType);
rtnMap.put("code", "200");
rtnMap.put("msg", "Load word file succeed.");
rtnMap.put("count", wordsToAdd == null ? 0 : wordsToAdd.size());
logger.info("[DynSegWords]DynSegWordsToolsController.loadWordFile end. cost: {}",
System.currentTimeMillis() - begin);
return rtnMap;
}
/**
* 批量插入字典文件中的词到数据库表
*
* @return 处理结果
*/
@RequestMapping(value = "/dictTools/loadAllWordFile")
@ResponseBody
public Map<String, Object> loadAllWordFile(HttpServletRequest request) {
Map<String, Object> rtnMap = new HashMap<String, Object>();
long begin = System.currentTimeMillis();
logger.info("[DynSegWords]DynSegWordsToolsController.loadAllWordFile start");
StringBuffer sb = new StringBuffer(75);
Set<String> wordsToAdd;
for (Map.Entry<String, Integer> entry : allDictFiles.entrySet()) {
wordsToAdd = esWordsService.addWords(entry.getKey(), entry.getValue());
sb.append("file:").append(entry.getKey()).append(", wordType:").append(entry.getValue())
.append(", added count:").append(wordsToAdd.size()).append("|");
}
esWordsService.deleteWordsFromFile("_delete.dic");
rtnMap.put("code", "200");
rtnMap.put("msg", "Load all word file succeed.");
rtnMap.put("detail", sb.toString());
logger.info("[DynSegWords]DynSegWordsToolsController.loadAllWordFile end. cost: {}",
System.currentTimeMillis() - begin);
return rtnMap;
}
@RequestMapping(value = "/dictTools/addNewWords")
@ResponseBody
public Map<String, Object> addNewWords(HttpServletRequest request) {
Map<String, Object> rtnMap = new HashMap<String, Object>();
long begin = System.currentTimeMillis();
logger.info("[DynSegWords]DynSegWordsToolsController.addNewWords start");
String words = "";
if (request.getParameter("words") != null && request.getParameter("words").trim().length() > 0) {
words = request.getParameter("words").trim();
}
Assert.isTrue(words != null && !words.trim().isEmpty(), "Words cannot be null.");
String wordType = "1";
if (request.getParameter("wordType") != null && request.getParameter("wordType").trim().length() > 0) {
wordType = request.getParameter("wordType").trim();
}
Set<String> wordSet = new HashSet<String>();
if ("7".equals(wordType)) {
wordSet.add(words.trim());
} else {
for (String word : words.split(",")) {
if (word != null && word.trim().length() > 0) {
wordSet.add(word.trim());
}
}
}
Set<String> addedWords = esWordsService.addNewWords(wordSet, Integer.valueOf(wordType));
rtnMap.put("code", "200");
rtnMap.put("msg", "Add new word succeed.");
rtnMap.put("detail", addedWords);
logger.info("[DynSegWords]DynSegWordsToolsController.addNewWords end. cost: {}",
System.currentTimeMillis() - begin);
return rtnMap;
}
@RequestMapping(value = "/dictTools/reactiveWords")
@ResponseBody
public Map<String, Object> reactiveWords(HttpServletRequest request) {
Map<String, Object> rtnMap = new HashMap<String, Object>();
long begin = System.currentTimeMillis();
logger.info("[DynSegWords]DynSegWordsToolsController.reactiveWords start");
String words = "";
if (request.getParameter("words") != null && request.getParameter("words").trim().length() > 0) {
words = request.getParameter("words").trim();
}
Assert.isTrue(words != null && !words.trim().isEmpty(), "Words cannot be null.");
String wordType = "1";
if (request.getParameter("wordType") != null && request.getParameter("wordType").trim().length() > 0) {
wordType = request.getParameter("wordType").trim();
}
Set<String> wordSet = new HashSet<String>();
if ("7".equals(wordType)) {
wordSet.add(words.trim());
} else {
for (String word : words.split(",")) {
if (word != null && word.trim().length() > 0) {
wordSet.add(word.trim());
}
}
}
esWordsService.reactiveWords(wordSet, Integer.valueOf(wordType));
rtnMap.put("code", "200");
rtnMap.put("msg", "reactive words succeed.");
logger.info("[DynSegWords]DynSegWordsToolsController.reactiveWords end. cost: {}",
System.currentTimeMillis() - begin);
return rtnMap;
}
@RequestMapping(value = "/dictTools/deleteWords")
@ResponseBody
public Map<String, Object> deleteWords(HttpServletRequest request) {
Map<String, Object> rtnMap = new HashMap<String, Object>();
long begin = System.currentTimeMillis();
logger.info("[DynSegWords]DynSegWordsToolsController.deleteWords start");
String words = "";
if (request.getParameter("words") != null && request.getParameter("words").trim().length() > 0) {
words = request.getParameter("words").trim();
}
Assert.isTrue(words != null && !words.trim().isEmpty(), "Words cannot be null.");
String wordType = "1";
if (request.getParameter("wordType") != null && request.getParameter("wordType").trim().length() > 0) {
wordType = request.getParameter("wordType").trim();
}
List<String> list = new ArrayList<String>();
if ("7".equals(wordType)) {
list.add(words.trim());
} else {
for (String word : words.split(",")) {
if (word != null && word.trim().length() > 0) {
list.add(word.trim());
}
}
}
esWordsService.deleteWords(list);
rtnMap.put("code", "200");
rtnMap.put("msg", "delete words succeed.");
logger.info("[DynSegWords]DynSegWordsToolsController.deleteWords end. cost: {}",
System.currentTimeMillis() - begin);
return rtnMap;
}
@RequestMapping(value = "/dictTools/deleteWordsFromFile")
@ResponseBody
public Map<String, Object> deleteWordsFromFile(HttpServletRequest request) {
Map<String, Object> rtnMap = new HashMap<String, Object>();
long begin = System.currentTimeMillis();
logger.info("[DynSegWords]DynSegWordsToolsController.deleteWords start");
String wordFile = "_delete.dic";
if (request.getParameter("wordFile") != null && request.getParameter("wordFile").trim().length() > 0) {
wordFile = request.getParameter("wordFile").trim();
}
esWordsService.deleteWordsFromFile(wordFile);
rtnMap.put("code", "200");
rtnMap.put("msg", "delete words succeed.");
logger.info("[DynSegWords]DynSegWordsToolsController.deleteWords end. cost: {}",
System.currentTimeMillis() - begin);
return rtnMap;
}
}
... ...