Authored by 周少峰

Merge branch 'feature/keywordsPage'

@@ -6,7 +6,7 @@ const yohoLib = require('yoho-node-lib'); @@ -6,7 +6,7 @@ const yohoLib = require('yoho-node-lib');
6 6
7 // 全局注册library 7 // 全局注册library
8 yohoLib.global(config); 8 yohoLib.global(config);
9 - 9 +global.yoho.redis = require('./libs/redis');
10 const logger = global.yoho.logger; 10 const logger = global.yoho.logger;
11 const app = express(); 11 const app = express();
12 const seo = require('./apps/seo'); 12 const seo = require('./apps/seo');
@@ -14,10 +14,10 @@ const seo = require('./apps/seo'); @@ -14,10 +14,10 @@ const seo = require('./apps/seo');
14 // 定时任务 主动推送和生成xml 14 // 定时任务 主动推送和生成xml
15 seo.start(); 15 seo.start();
16 16
17 -// 提供sitemap给搜索百度访问  
18 -app.use(express.static(config.sitemapPath)); 17 +app.get('/synchronousKeywords', seo.synchronousKeywords);
  18 +app.get('/sendKeywordsUrls', seo.sendKeywordsUrls);
19 19
20 app.listen(config.port, function() { 20 app.listen(config.port, function() {
21 - logger.info('yoho seo start'); 21 + logger.info(`yoho seo start : ${config.port}`);
22 }); 22 });
23 23
1 'use strict'; 1 'use strict';
2 2
3 -const api = global.yoho.API;  
4 -const rp = require('request-promise');  
5 -const serviceApi = global.yoho.ServiceAPI;  
6 -const Promise = require('bluebird');  
7 -const co = Promise.coroutine;  
8 -const _ = require('lodash');  
9 -const logger = global.yoho.logger;  
10 -const helper = global.yoho.helpers;  
11 -const config = require('../config/config');  
12 const schedule = require('node-schedule'); 3 const schedule = require('node-schedule');
13 -const qs = require('querystring');  
14 -const baiduUrls = {  
15 - urls: 'http://data.zz.baidu.com/urls',  
16 - update: 'http://data.zz.baidu.com/update',  
17 - del: 'http://data.zz.baidu.com/del'  
18 -};  
19 -  
20 -const siteUrls = {  
21 - pcProduct: {  
22 - site: 'https://item.yohobuy.com',  
23 - url: []  
24 - },  
25 - pcGuang: {  
26 - site: 'https://guang.yohobuy.com',  
27 - url: []  
28 - },  
29 - mProduct: {  
30 - site: 'https://m.yohobuy.com',  
31 - url: []  
32 - },  
33 - mGuang: {  
34 - site: 'https://guang.m.yohobuy.com',  
35 - url: [],  
36 - type: 'mip'  
37 - }  
38 -  
39 -};  
40 - 4 +const seoModel = require('./seoModel');
41 5
42 /** 6 /**
43 - * 获取最新1000条商品详情链接和逛详情链接 7 + * 同步建议词(把接口拓展的建议词同步到灰度redis)
44 */ 8 */
45 -const getUrls = () => {  
46 - let apiArr = [api.get('', {method: 'web.product.bdPromotion'}),  
47 - serviceApi.get('/guang/api/v2/article/getLastArticleList', {limit: 100})];  
48 -  
49 - return api.all(apiArr).spread((productData, articleData) => {  
50 -  
51 - _.forEach(_.get(productData, 'data', {}), value => {  
52 - siteUrls.pcProduct.url.push('https:' + helper.urlFormat(`/${value.id}.html`, null, 'item'));  
53 - siteUrls.mProduct.url.push('https:' + helper.urlFormat(`/product/${value.id}.html`, null, 'm'));  
54 - });  
55 -  
56 - _.forEach(_.get(articleData, 'data.artList', {}), value => {  
57 - siteUrls.pcGuang.url.push('https:' + helper.urlFormat(`/${value.articleId}.html`, null, 'guang'));  
58 - siteUrls.mGuang.url.push('https:' + helper.urlFormat(`/mip/guang/info/${value.articleId}.html`  
59 -, null, 'guang.m'));  
60 - });  
61 -  
62 - return siteUrls;  
63 - }); 9 +const synchronousKeywords = (req, res) => {
  10 + seoModel.synchronousKeywords();
  11 + res.end();
64 }; 12 };
65 13
66 /** 14 /**
67 - * 将链接推送到百度站长  
68 - * @param params object {site: 'https://www.yohobuy.com', type: 'mip'} 默认不需要type  
69 - * @param urls 15 + * 定时缓慢爬取关键词页面生成缓存,防止蜘蛛爬取
70 */ 16 */
71 -const sendUrlsToBaidu = (params, urls) => {  
72 - let paramsDef = {  
73 - token: config.baiduToken  
74 - };  
75 -  
76 - // 过滤无效的参数  
77 - _.forEach(params, (val, key) => {  
78 - if (!val) {  
79 - delete params[key];  
80 - }  
81 - });  
82 17
83 - qs.escape = (str) => {  
84 - return str;  
85 - };  
86 18
87 - let options = {  
88 - url: `${baiduUrls.urls}?${qs.stringify(Object.assign(paramsDef, params), null, null, {})}`,  
89 - headers: {  
90 - 'Content-Type': 'text/plain'  
91 - },  
92 - method: 'post',  
93 - form: urls.join('\n'),  
94 - json: true,  
95 - timeout: 10000,  
96 - gzip: true  
97 - };  
98 -  
99 - return rp(options).then(result => {  
100 - logger.info(Object.assign(params, result, {length: urls.length}));  
101 - }); 19 +/**
  20 + * 向百度推送页面关键词静态页面
  21 + */
  22 +const sendKeywordsUrls = (req, res) => {
  23 + seoModel.sendKeywordsUrls();
  24 + res.end();
102 }; 25 };
103 26
104 /** 27 /**
105 - * 获取最新商品详情1000条和逛详情100条推送到相应的站点域名(pc和wap) 28 + * 商品详情,逛推送
106 */ 29 */
107 const sendUrls = () => { 30 const sendUrls = () => {
108 -  
109 - co(function*() {  
110 - // 获取pc/wap的商品详情和逛的链接  
111 - let sendArr = [],  
112 - urls = yield getUrls();  
113 -  
114 - _.forEach(urls, value => {  
115 - sendArr.push(sendUrlsToBaidu({site: value.site, type: value.type}, value.url));  
116 - });  
117 -  
118 - // 推送url  
119 - api.all(sendArr);  
120 - })(); 31 + seoModel.sendUrls();
121 }; 32 };
122 33
123 /** 34 /**
124 - * 定时每天1点推送最新商品和文章,更新站点sitemap 35 + * 定时任务
125 */ 36 */
126 const start = () => { 37 const start = () => {
127 schedule.scheduleJob('0 0 1 * * *', function() { 38 schedule.scheduleJob('0 0 1 * * *', function() {
  39 +
  40 + // 推送最新的商品详情和逛文章
128 sendUrls(); 41 sendUrls();
  42 +
  43 + // 同步关键词
  44 + synchronousKeywords();
  45 + });
  46 +
  47 + schedule.scheduleJob('0 0 2 * * *', function() {
  48 +
  49 + // 推送关键词页面
  50 + sendKeywordsUrls();
129 }); 51 });
130 }; 52 };
131 53
132 module.exports = { 54 module.exports = {
133 start, 55 start,
134 - sendUrls 56 + synchronousKeywords,
  57 + sendKeywordsUrls
135 }; 58 };
  1 +'use strict';
  2 +const api = global.yoho.API;
  3 +const redis = global.yoho.redis;
  4 +const rp = require('request-promise');
  5 +const serviceApi = global.yoho.ServiceAPI;
  6 +const Promise = require('bluebird');
  7 +const co = Promise.coroutine;
  8 +const _ = require('lodash');
  9 +const logger = global.yoho.logger;
  10 +const helper = global.yoho.helpers;
  11 +const config = require('../config/config');
  12 +const qs = require('querystring');
  13 +
  14 +
  15 +/**
  16 + * redis multi command
  17 + */
  18 +const multiAsync = (multi)=>{
  19 + return multi.execAsync().then(function(res) {
  20 + return res;
  21 + });
  22 +};
  23 +
  24 +const baiduUrls = {
  25 + urls: 'http://data.zz.baidu.com/urls',
  26 + update: 'http://data.zz.baidu.com/update',
  27 + del: 'http://data.zz.baidu.com/del'
  28 +};
  29 +
  30 +const siteUrls = {
  31 + pcProduct: {
  32 + site: 'https://item.yohobuy.com',
  33 + url: []
  34 + },
  35 + pcGuang: {
  36 + site: 'https://guang.yohobuy.com',
  37 + url: []
  38 + },
  39 + mProduct: {
  40 + site: 'https://m.yohobuy.com',
  41 + url: []
  42 + },
  43 + mGuang: {
  44 + site: 'https://guang.m.yohobuy.com',
  45 + url: [],
  46 + type: 'mip'
  47 + }
  48 +
  49 +};
  50 +
  51 +
  52 +
  53 +// 配置
  54 +const redisKey = {
  55 + keywordsList: 'keywords_mana_list' // 关键词列表
  56 +};
  57 +
  58 +/**
  59 + * 将链接推送到百度站长
  60 + * @param params object {site: 'https://www.yohobuy.com', type: 'mip'} 默认不需要type
  61 + * @param urls
  62 + */
  63 +const sendUrlsToBaidu = (params, urls) => {
  64 + let paramsDef = {
  65 + token: config.baiduToken
  66 + };
  67 +
  68 + // 过滤无效的参数
  69 + _.forEach(params, (val, key) => {
  70 + if (!val) {
  71 + delete params[key];
  72 + }
  73 + });
  74 +
  75 + qs.escape = (str) => {
  76 + return str;
  77 + };
  78 +
  79 + let options = {
  80 + url: `${baiduUrls.urls}?${qs.stringify(Object.assign(paramsDef, params), null, null, {})}`,
  81 + headers: {
  82 + 'Content-Type': 'text/plain'
  83 + },
  84 + method: 'post',
  85 + form: urls.join('\n'),
  86 + json: true,
  87 + timeout: 10000,
  88 + gzip: true
  89 + };
  90 +
  91 + return rp(options).then(result => {
  92 + logger.info(Object.assign(params, result, {length: urls.length}));
  93 + });
  94 +};
  95 +
  96 +/**
  97 + * 获取最新1000条商品详情链接和逛详情链接
  98 + */
  99 +const getUrls = () => {
  100 + let apiArr = [api.get('', {method: 'web.product.bdPromotion'}),
  101 + serviceApi.get('/guang/api/v2/article/getLastArticleList', {limit: 100})];
  102 +
  103 + return api.all(apiArr).spread((productData, articleData) => {
  104 +
  105 + _.forEach(_.get(productData, 'data', {}), value => {
  106 + siteUrls.pcProduct.url.push('https:' + helper.urlFormat(`/${value.id}.html`, null, 'item'));
  107 + siteUrls.mProduct.url.push('https:' + helper.urlFormat(`/product/${value.id}.html`, null, 'm'));
  108 + });
  109 +
  110 + _.forEach(_.get(articleData, 'data.artList', {}), value => {
  111 + siteUrls.pcGuang.url.push('https:' + helper.urlFormat(`/${value.articleId}.html`, null, 'guang'));
  112 + siteUrls.mGuang.url.push('https:' + helper.urlFormat(`/mip/guang/info/${value.articleId}.html`
  113 + , null, 'guang.m'));
  114 + });
  115 +
  116 + return siteUrls;
  117 + });
  118 +};
  119 +
  120 +/**
  121 + * 发送最新商品详情1000条和逛详情100条推送到相应的站点域名(pc和wap)
  122 + */
  123 +const sendUrls = () => {
  124 +
  125 + co(function*() {
  126 + // 获取pc/wap的商品详情和逛的链接
  127 + let sendArr = [],
  128 + urls = yield getUrls();
  129 +
  130 + _.forEach(urls, value => {
  131 + sendArr.push(sendUrlsToBaidu({site: value.site, type: value.type}, value.url));
  132 + });
  133 +
  134 + // 推送url
  135 + api.all(sendArr);
  136 + })();
  137 +};
  138 +
  139 +
  140 +
  141 +/**
  142 + * 调用接口建议词
  143 + */
  144 +const getKeywordsApi = (page, limit) => {
  145 + let params = {
  146 + page: page || 1,
  147 + limit: limit || 1000,
  148 + method: 'web.search.suggestList'
  149 + };
  150 +
  151 + return api.get('', params);
  152 +};
  153 +
  154 +/**
  155 + * 关键词同步到redis
  156 + */
  157 +const synchronousKeywords = () => {
  158 +
  159 + return getKeywordsApi(1, 1).then(res => {
  160 + let start = 0,
  161 + page = 1,
  162 + intervalTime = 1000, // 循环调用的时间间隔
  163 + limit = 1000, // 每次请求接口关键词数量
  164 + total = _.get(res, 'data.total', 0);
  165 +
  166 + // 接口调用失败
  167 + if (total <= 0) {
  168 + console.log('no data');
  169 + return;
  170 + }
  171 +
  172 + // 循环遍历接口关键词写入redis
  173 + let interval = setInterval(() => {
  174 + if (start > total) {
  175 + clearInterval(interval);
  176 + }
  177 +
  178 + getKeywordsApi(page, limit).then(result => {
  179 + let multi = redis.multi();
  180 +
  181 + start += limit;
  182 + page++;
  183 + console.log(page);
  184 + _.forEach(_.get(result, 'data.suggest_list', []), value => {
  185 + let key = `keywords_mana:${value.keyword}`;
  186 +
  187 + multi.set(key, value.keyword);
  188 + multi.lrem('keywords_mana_list', 1, key).lpush('keywords_mana_list', key);
  189 + });
  190 +
  191 + multiAsync(multi);
  192 +
  193 + }).catch(()=>{
  194 + clearInterval(interval);
  195 + });
  196 +
  197 + }, intervalTime);
  198 +
  199 + });
  200 +};
  201 +
  202 +/**
  203 + * 查询 redis中 关键词
  204 + * @type {{getKeyWordsUrl}}
  205 + */
  206 +
  207 +const getRedisKeywords = (start, end) => {
  208 + return redis.lrangeAsync(redisKey.keywordsList, start, end).then(res => {
  209 + let urls = {pc: [], wap: []};
  210 +
  211 + _.forEach(res, keyword => {
  212 + let buff = new Buffer(keyword).toString('hex').toUpperCase();
  213 +
  214 + urls.pc.push(`https://www.yohobuy.com/so/${buff}.html`);
  215 + urls.wap.push(`https://m.yohobuy.com/so/${buff}.html`);
  216 + });
  217 +
  218 + return urls;
  219 + });
  220 +};
  221 +
  222 +/**
  223 + * 推送url
  224 + */
  225 +const sendKeywordsUrls = () => {
  226 + return redis.llenAsync(redisKey.keywordsList).then(total => {
  227 + console.log(total);
  228 + if (total <= 0) {
  229 + return;
  230 + }
  231 +
  232 + let start = 0,
  233 + intervalTime = 1000, // 循环调用的时间间隔
  234 + count = 1000;
  235 +
  236 + let interval = setInterval(() => {
  237 + if (start >= total) {
  238 + clearInterval(interval);
  239 + }
  240 +
  241 + getRedisKeywords(start, start + count).then(urls => {
  242 + console.log(urls);
  243 +
  244 + // 发送到百度
  245 + sendUrlsToBaidu({site: 'https://www.yohobuy.com'}, urls.pc);
  246 + sendUrlsToBaidu({site: 'https://m.yohobuy.com'}, urls.wap);
  247 + }).catch(() => {
  248 + clearInterval(interval);
  249 + });
  250 +
  251 + start += count;
  252 +
  253 + }, intervalTime);
  254 +
  255 + return [];
  256 +
  257 + });
  258 +};
  259 +
  260 +module.exports = {
  261 + sendUrls,
  262 + synchronousKeywords,
  263 + sendKeywordsUrls
  264 +};
@@ -17,18 +17,18 @@ module.exports = { @@ -17,18 +17,18 @@ module.exports = {
17 cookieDomain: '.yohobuy.com', 17 cookieDomain: '.yohobuy.com',
18 domains: { 18 domains: {
19 // test3 19 // test3
20 - // singleApi: 'http://api-test3.yohops.com:9999/',  
21 - // api: 'http://api-test3.yohops.com:9999/',  
22 - // service: 'http://service-test3.yohops.com:9999/',  
23 - // serviceNotify: 'http://service-test3.yohops.com:9999/',  
24 - // global: 'http://global-test-soa.yohops.com:9999/', 20 + singleApi: 'http://api-test3.yohops.com:9999/',
  21 + api: 'http://api-test3.yohops.com:9999/',
  22 + service: 'http://service-test3.yohops.com:9999/',
  23 + serviceNotify: 'http://service-test3.yohops.com:9999/',
  24 + global: 'http://global-test-soa.yohops.com:9999/',
25 25
26 // prod 26 // prod
27 - singleApi: 'http://single.yoho.cn/',  
28 - api: 'http://api.yoho.cn/',  
29 - service: 'http://service.yoho.cn/',  
30 - serviceNotify: 'http://service.yoho.cn/',  
31 - global: 'http://api-global.yohobuy.com/', 27 + // singleApi: 'http://single.yoho.cn/',
  28 + // api: 'http://api.yoho.cn/',
  29 + // service: 'http://service.yoho.cn/',
  30 + // serviceNotify: 'http://service.yoho.cn/',
  31 + // global: 'http://api-global.yohobuy.com/',
32 32
33 // gray 33 // gray
34 // singleApi: 'http://single.gray.yohops.com/', 34 // singleApi: 'http://single.gray.yohops.com/',
@@ -133,7 +133,25 @@ module.exports = { @@ -133,7 +133,25 @@ module.exports = {
133 maxQps: 1200, 133 maxQps: 1200,
134 sessionMemcachedPrefix: 'yohobuy_session:', 134 sessionMemcachedPrefix: 'yohobuy_session:',
135 baiduToken: '0lSAO4ZxEKsYopMG', // 百度站长推送的token 135 baiduToken: '0lSAO4ZxEKsYopMG', // 百度站长推送的token
136 - sitemapPath: './files' 136 + redis: {
  137 + connect: {
  138 + host: '127.0.0.1',
  139 + port: '6379',
  140 + retry_strategy(options) {
  141 + if (options.error && options.error.code === 'ECONNREFUSED') {
  142 + console.log('redis连接不成功');
  143 + }
  144 + if (options.total_retry_time > 1000 * 60 * 60 * 6) {
  145 + console.log('redis连接超时');
  146 + return;
  147 + }
  148 + if (options.attempt > 10) {
  149 + return 1000 * 60 * 60 * 0.5;
  150 + }
  151 + return Math.min(options.attempt * 100, 1000);
  152 + }
  153 + }
  154 + }
137 }; 155 };
138 156
139 if (isProduction) { 157 if (isProduction) {
@@ -167,7 +185,26 @@ if (isProduction) { @@ -167,7 +185,26 @@ if (isProduction) {
167 open: false, 185 open: false,
168 url: 'http://123.206.2.55/strategy' 186 url: 'http://123.206.2.55/strategy'
169 }, 187 },
170 - zookeeperServer: 'web.zookeeper.yohoops.org:2181' 188 + zookeeperServer: 'web.zookeeper.yohoops.org:2181',
  189 + redis: {
  190 + connect: {
  191 + host: 'web.redis.yohoops.org'
  192 + },
  193 + port: '6379',
  194 + retry_strategy(options) {
  195 + if (options.error && options.error.code === 'ECONNREFUSED') {
  196 + console.log('redis连接不成功');
  197 + }
  198 + if (options.total_retry_time > 1000 * 60 * 60 * 6) {
  199 + console.log('redis连接超时');
  200 + return;
  201 + }
  202 + if (options.attempt > 10) {
  203 + return 1000 * 60 * 60 * 0.5;
  204 + }
  205 + return Math.min(options.attempt * 100, 1000);
  206 + }
  207 + }
171 }); 208 });
172 } else if (isTest) { 209 } else if (isTest) {
173 Object.assign(module.exports, { 210 Object.assign(module.exports, {
  1 +const redis = require('redis');
  2 +const bluebird = require('bluebird');
  3 +const config = require('../config/config');
  4 +let client;
  5 +
  6 +try {
  7 + client = redis.createClient(config.redis.connect);
  8 +
  9 + bluebird.promisifyAll(redis.RedisClient.prototype);
  10 + bluebird.promisifyAll(redis.Multi.prototype);
  11 +
  12 + client.on('error', function() {
  13 + global.yoho.redis = '';
  14 + });
  15 +
  16 + client.on('connect', function() {
  17 + global.yoho.redis = client;
  18 + });
  19 +} catch (e) {
  20 + global.yoho.redis = '';
  21 +}
  22 +
  23 +module.exports = client;
@@ -23,6 +23,7 @@ @@ -23,6 +23,7 @@
23 "moment": "^2.18.1", 23 "moment": "^2.18.1",
24 "node-schedule": "^1.2.1", 24 "node-schedule": "^1.2.1",
25 "nodemon": "1.9.2", 25 "nodemon": "1.9.2",
  26 + "redis": "^2.7.1",
26 "request": "^2.79.0", 27 "request": "^2.79.0",
27 "request-promise": "^4.1.1", 28 "request-promise": "^4.1.1",
28 "shelljs": "^0.7.7", 29 "shelljs": "^0.7.7",