Authored by 周少峰

keywords page

'use strict';
const api = global.yoho.API;
const rp = require('request-promise');
const serviceApi = global.yoho.ServiceAPI;
const Promise = require('bluebird');
const co = Promise.coroutine;
const _ = require('lodash');
const logger = global.yoho.logger;
const helper = global.yoho.helpers;
const config = require('../config/config');
const schedule = require('node-schedule');
const qs = require('querystring');
const seoModel = require('./seoModel');
const baiduUrls = {
urls: 'http://data.zz.baidu.com/urls',
update: 'http://data.zz.baidu.com/update',
del: 'http://data.zz.baidu.com/del'
};
const siteUrls = {
pcProduct: {
site: 'https://item.yohobuy.com',
url: []
},
pcGuang: {
site: 'https://guang.yohobuy.com',
url: []
},
mProduct: {
site: 'https://m.yohobuy.com',
url: []
},
mGuang: {
site: 'https://guang.m.yohobuy.com',
url: [],
type: 'mip'
}
};
/**
* 获取最新1000条商品详情链接和逛详情链接
* 同步建议词(把接口拓展的建议词同步到灰度redis)
*/
const getUrls = () => {
let apiArr = [api.get('', {method: 'web.product.bdPromotion'}),
serviceApi.get('/guang/api/v2/article/getLastArticleList', {limit: 100})];
return api.all(apiArr).spread((productData, articleData) => {
_.forEach(_.get(productData, 'data', {}), value => {
siteUrls.pcProduct.url.push('https:' + helper.urlFormat(`/${value.id}.html`, null, 'item'));
siteUrls.mProduct.url.push('https:' + helper.urlFormat(`/product/${value.id}.html`, null, 'm'));
});
_.forEach(_.get(articleData, 'data.artList', {}), value => {
siteUrls.pcGuang.url.push('https:' + helper.urlFormat(`/${value.articleId}.html`, null, 'guang'));
siteUrls.mGuang.url.push('https:' + helper.urlFormat(`/mip/guang/info/${value.articleId}.html`
, null, 'guang.m'));
});
return siteUrls;
});
const synchronousKeywords = (req, res) => {
seoModel.synchronousKeywords();
res.end();
};
/**
* 将链接推送到百度站长
* @param params object {site: 'https://www.yohobuy.com', type: 'mip'} 默认不需要type
* @param urls
* 定时缓慢爬取关键词页面生成缓存,防止蜘蛛爬取
*/
const sendUrlsToBaidu = (params, urls) => {
let paramsDef = {
token: config.baiduToken
};
// 过滤无效的参数
_.forEach(params, (val, key) => {
if (!val) {
delete params[key];
}
});
qs.escape = (str) => {
return str;
};
let options = {
url: `${baiduUrls.urls}?${qs.stringify(Object.assign(paramsDef, params), null, null, {})}`,
headers: {
'Content-Type': 'text/plain'
},
method: 'post',
form: urls.join('\n'),
json: true,
timeout: 10000,
gzip: true
};
return rp(options).then(result => {
logger.info(Object.assign(params, result, {length: urls.length}));
});
};
/**
* 获取最新商品详情1000条和逛详情100条推送到相应的站点域名(pc和wap)
* 向百度推送页面关键词静态页面
*/
const sendUrls = () => {
co(function*() {
// 获取pc/wap的商品详情和逛的链接
let sendArr = [],
urls = yield getUrls();
_.forEach(urls, value => {
sendArr.push(sendUrlsToBaidu({site: value.site, type: value.type}, value.url));
});
// 推送url
api.all(sendArr);
})();
};
// 同步建议词(把接口拓展的建议词同步到灰度redis)
const synchronousKeywords = (req, res) => {
seoModel.synchronousKeywords();
res.end();
};
// 定时缓慢爬取关键词页面生成缓存,防止蜘蛛爬取
// 向百度推送页面新的页面
const sendKeywordsUrls = (req, res) => {
seoModel.sendKeywordsUrls();
res.end();
};
/**
* 定时每天1点推送最新商品和文章,更新站点sitemap
* 商品详情,逛推送
*/
const sendUrls = () => {
seoModel.sendUrls();
}
/**
* 定时任务
*/
const start = () => {
schedule.scheduleJob('0 0 1 * * *', function() {
// 推送最新的商品详情和逛文章
sendUrls();
// 同步关键词
synchronousKeywords();
});
schedule.scheduleJob('0 0 2 * * *', function() {
// 推送关键词页面
sendKeywordsUrls();
});
};
module.exports = {
start,
sendUrls,
synchronousKeywords,
sendKeywordsUrls
};
... ...
'use strict';
const api = global.yoho.API;
const redis = global.yoho.redis;
const rp = require('request-promise');
const serviceApi = global.yoho.ServiceAPI;
const Promise = require('bluebird');
const co = Promise.coroutine;
const _ = require('lodash');
const logger = global.yoho.logger;
const helper = global.yoho.helpers;
const config = require('../config/config');
const qs = require('querystring');
/**
* redis multi command
... ... @@ -12,44 +21,126 @@ const multiAsync = (multi)=>{
});
};
const baiduUrls = {
urls: 'http://data.zz.baidu.com/urls',
update: 'http://data.zz.baidu.com/update',
del: 'http://data.zz.baidu.com/del'
};
const siteUrls = {
pcProduct: {
site: 'https://item.yohobuy.com',
url: []
},
pcGuang: {
site: 'https://guang.yohobuy.com',
url: []
},
mProduct: {
site: 'https://m.yohobuy.com',
url: []
},
mGuang: {
site: 'https://guang.m.yohobuy.com',
url: [],
type: 'mip'
}
};
// 配置
const redisKey = {
keywordsList: 'keywords_mana_list', // 关键词列表
}
/**
* 将链接推送到百度站长
* @param site string 站点
* @param urls object {site: 'https://www.yohobuy.com', type: 'mip'} 默认不需要type
* @param params object {site: 'https://www.yohobuy.com', type: 'mip'} 默认不需要type
* @param urls
*/
// const sendUrlsToBaidu = (params, urls) => {
// let paramsDef = {
// token: config.baiduToken
// };
//
// // 过滤无效的参数
// _.forEach(params, (val, key) => {
// if (!val) {
// delete params[key];
// }
// });
//
// qs.escape = (str) => {
// return str;
// };
//
// let options = {
// url: `${baiduUrls.urls}?${qs.stringify(Object.assign(paramsDef, params), null, null, {})}`,
// headers: {
// 'Content-Type': 'text/plain'
// },
// method: 'post',
// form: urls.join('\n'),
// json: true,
// timeout: 10000,
// gzip: true
// };
//
// return rp(options).then(result => {
// logger.info(result);
// });
// };
const sendUrlsToBaidu = (params, urls) => {
let paramsDef = {
token: config.baiduToken
};
// 过滤无效的参数
_.forEach(params, (val, key) => {
if (!val) {
delete params[key];
}
});
qs.escape = (str) => {
return str;
};
let options = {
url: `${baiduUrls.urls}?${qs.stringify(Object.assign(paramsDef, params), null, null, {})}`,
headers: {
'Content-Type': 'text/plain'
},
method: 'post',
form: urls.join('\n'),
json: true,
timeout: 10000,
gzip: true
};
return rp(options).then(result => {
logger.info(Object.assign(params, result, {length: urls.length}));
});
};
/**
* 获取最新1000条商品详情链接和逛详情链接
*/
const getUrls = () => {
let apiArr = [api.get('', {method: 'web.product.bdPromotion'}),
serviceApi.get('/guang/api/v2/article/getLastArticleList', {limit: 100})];
return api.all(apiArr).spread((productData, articleData) => {
_.forEach(_.get(productData, 'data', {}), value => {
siteUrls.pcProduct.url.push('https:' + helper.urlFormat(`/${value.id}.html`, null, 'item'));
siteUrls.mProduct.url.push('https:' + helper.urlFormat(`/product/${value.id}.html`, null, 'm'));
});
_.forEach(_.get(articleData, 'data.artList', {}), value => {
siteUrls.pcGuang.url.push('https:' + helper.urlFormat(`/${value.articleId}.html`, null, 'guang'));
siteUrls.mGuang.url.push('https:' + helper.urlFormat(`/mip/guang/info/${value.articleId}.html`
, null, 'guang.m'));
});
return siteUrls;
});
};
/**
* 发送最新商品详情1000条和逛详情100条推送到相应的站点域名(pc和wap)
*/
const sendUrls = () => {
co(function*() {
// 获取pc/wap的商品详情和逛的链接
let sendArr = [],
urls = yield getUrls();
_.forEach(urls, value => {
sendArr.push(sendUrlsToBaidu({site: value.site, type: value.type}, value.url));
});
// 推送url
api.all(sendArr);
})();
};
/**
* 调用接口建议词
*/
const getKeywordsApi = (page, limit) => {
let params = {
page: page || 1,
... ... @@ -68,6 +159,7 @@ const synchronousKeywords = () => {
return getKeywordsApi(1, 1).then(res => {
let start = 0,
page = 1,
intervalTime = 1000, // 循环调用的时间间隔
limit = 1000, // 每次请求接口关键词数量
total = _.get(res, 'data.total', 0);
... ... @@ -102,7 +194,7 @@ const synchronousKeywords = () => {
clearInterval(interval);
});
}, 1000);
}, intervalTime);
});
};
... ... @@ -113,13 +205,14 @@ const synchronousKeywords = () => {
*/
const getRedisKeywords = (start, end) => {
return redis.lrangeAsync('keywords_mana_list', start, end).then(res => {
let urls = [];
return redis.lrangeAsync(redisKey.keywordsList, start, end).then(res => {
let urls = {pc:[], wap: []};
_.forEach(res, keyword => {
let buff = new Buffer(keyword).toString('hex').toUpperCase();
urls.push(`https://www.yohobuy.com/so/${buff}.html`);
urls.pc.push(`https://www.yohobuy.com/so/${buff}.html`);
urls.wap.push(`https://m.yohobuy.com/so/${buff}.html`);
});
return urls;
... ... @@ -130,13 +223,14 @@ const getRedisKeywords = (start, end) => {
* 推送url
*/
const sendKeywordsUrls = () => {
return redis.llenAsync('keywords_mana_list').then(total => {
return redis.llenAsync(redisKey.keywordsList).then(total => {
console.log(total);
if (total <= 0) {
return;
}
let start = 0,
intervalTime = 1000, // 循环调用的时间间隔
count = 1000;
let interval = setInterval(() => {
... ... @@ -144,19 +238,19 @@ const sendKeywordsUrls = () => {
clearInterval(interval);
}
console.log(start);
getRedisKeywords(start, start + count).then(urls => {
console.log(urls);
// 发送到百度
// sendUrlsToBaidu({site: 'https://www.yohobuy.com'}, urls);
sendUrlsToBaidu({site: 'https://www.yohobuy.com'}, urls.pc);
sendUrlsToBaidu({site: 'https://m.yohobuy.com'}, urls.wap);
}).catch(() => {
clearInterval(interval);
});
start += count;
}, 1000);
}, intervalTime);
return [];
... ... @@ -164,6 +258,7 @@ const sendKeywordsUrls = () => {
};
module.exports = {
sendUrls,
synchronousKeywords,
sendKeywordsUrls
};
... ...