...
|
...
|
@@ -11,6 +11,13 @@ const helper = global.yoho.helpers; |
|
|
const config = require('../config/config');
|
|
|
const qs = require('querystring');
|
|
|
const fs = require('fs');
|
|
|
const util = require('../libs/util');
|
|
|
|
|
|
const ret = {
|
|
|
code: 200,
|
|
|
message: '',
|
|
|
data: {}
|
|
|
};
|
|
|
|
|
|
/**
|
|
|
* redis multi command
|
...
|
...
|
@@ -217,6 +224,19 @@ const writeFile = (file, str) => { |
|
|
* @type {{getKeyWordsUrl}}
|
|
|
*/
|
|
|
|
|
|
const getRedisKeywords2 = (start, end) => {
|
|
|
return redis.lrangeAsync(redisKey.keywordsList, start, end).then(res => {
|
|
|
return _.map(res, (elem) => {
|
|
|
return elem.replace('keywords_mana:', '');
|
|
|
});
|
|
|
});
|
|
|
};
|
|
|
|
|
|
/**
|
|
|
* 查询 redis中 关键词
|
|
|
* @type {{getKeyWordsUrl}}
|
|
|
*/
|
|
|
|
|
|
const getRedisKeywords = (start, end) => {
|
|
|
return redis.lrangeAsync(redisKey.keywordsList, start, end).then(res => {
|
|
|
let urls = {pc: [], wap: []};
|
...
|
...
|
@@ -274,8 +294,90 @@ const sendKeywordsUrls = () => { |
|
|
});
|
|
|
};
|
|
|
|
|
|
const rpKeyWordsUrl = (url) => {
|
|
|
if (!url) {
|
|
|
return Promise.resolve(Object.assign({}, ret, {
|
|
|
code: 400,
|
|
|
message: 'url is empty'
|
|
|
}));
|
|
|
}
|
|
|
|
|
|
return rp({
|
|
|
uri: url,
|
|
|
method: 'GET',
|
|
|
headers: {
|
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 Chrome/55.0.2883.75 Safari/537.36'
|
|
|
},
|
|
|
resolveWithFullResponse: true, // header、statusCode
|
|
|
timeout: 10 * 1000,
|
|
|
gzip: true
|
|
|
}).then(body => {
|
|
|
return Object.assign({}, ret, {
|
|
|
data: {
|
|
|
url: url,
|
|
|
cache: body.headers['x-cache-status']
|
|
|
}
|
|
|
});
|
|
|
}).catch(err => {
|
|
|
return Object.assign({}, ret, {
|
|
|
code: err.statusCode,
|
|
|
data: {
|
|
|
url: url
|
|
|
}
|
|
|
});
|
|
|
});
|
|
|
};
|
|
|
|
|
|
const visitKeyWordsUrl = () => {
|
|
|
let size = 5;
|
|
|
let currentPage = 1;
|
|
|
|
|
|
return redis.hgetAsync('yohoSeo', 'rpPage').then(page => {
|
|
|
page = page || 1;
|
|
|
|
|
|
let intval = setInterval(() => {
|
|
|
currentPage = page;
|
|
|
return redis.hsetAsync('yohoSeo', 'rpPage', page++).then(() => {
|
|
|
return util.sleep(100);
|
|
|
}).then(() => {
|
|
|
return getRedisKeywords2((currentPage - 1) * size, currentPage * size - 1);
|
|
|
}).then(d => {
|
|
|
let ddata = [];
|
|
|
let buff;
|
|
|
|
|
|
if (d.length <= 0) {
|
|
|
clearInterval(intval);
|
|
|
return this.redis.hset('yohoSeo', 'rpPage', 1).then(() => {
|
|
|
return [];
|
|
|
});
|
|
|
}
|
|
|
|
|
|
_.forEach(d, keyword => {
|
|
|
buff = new Buffer(keyword).toString('hex').toUpperCase();
|
|
|
|
|
|
// TODO 纪录已经推送的关键词
|
|
|
ddata.push(
|
|
|
rpKeyWordsUrl(`https://www1.yohobuy.com/so/${buff}.html`),
|
|
|
rpKeyWordsUrl(`https://www2.yohobuy.com/so/${buff}.html`),
|
|
|
rpKeyWordsUrl(`https://m1.yohobuy.com/so/${buff}.html`),
|
|
|
rpKeyWordsUrl(`https://m2.yohobuy.com/so/${buff}.html`)
|
|
|
);
|
|
|
});
|
|
|
|
|
|
return Promise.all(ddata);
|
|
|
}).then(d => {
|
|
|
console.log(`rpKeyWordsUrl => page: ${page}, result: ${JSON.stringify(d)}`);
|
|
|
return d;
|
|
|
});
|
|
|
}, 300);
|
|
|
|
|
|
return page;
|
|
|
});
|
|
|
};
|
|
|
|
|
|
module.exports = {
|
|
|
sendUrls,
|
|
|
synchronousKeywords,
|
|
|
sendKeywordsUrls
|
|
|
sendKeywordsUrls,
|
|
|
visitKeyWordsUrl
|
|
|
}; |
...
|
...
|
|