seoModel.js 9.69 KB
'use strict';
const api = global.yoho.API;
const redis = global.yoho.redis;
const rp = require('request-promise');
const serviceApi = global.yoho.ServiceAPI;
const Promise = require('bluebird');
const co = Promise.coroutine;
const _ = require('lodash');
const logger = global.yoho.logger;
const helper = global.yoho.helpers;
const config = require('../config/config');
const qs = require('querystring');
const fs = require('fs');
const util = require('../libs/util');

const ret = {
    code: 200,
    message: '',
    data: {}
};

/**
 * redis multi command
 */
const multiAsync = (multi)=>{
    return multi.execAsync().then(function(res) {
        return res;
    });
};

const baiduUrls = {
    urls: 'http://data.zz.baidu.com/urls',
    update: 'http://data.zz.baidu.com/update',
    del: 'http://data.zz.baidu.com/del'
};

const siteUrls = {
    pcProduct: {
        site: 'https://item.yohobuy.com',
        url: []
    },
    pcGuang: {
        site: 'https://guang.yohobuy.com',
        url: []
    },
    mProduct: {
        site: 'https://m.yohobuy.com',
        url: []
    },
    mGuang: {
        site: 'https://guang.m.yohobuy.com',
        url: []
    }

};



// 配置
const redisKey = {
    keywordsList: 'keywords_mana_list' // 关键词列表
};

/**
 * 将链接推送到百度站长
 * @param params object {site: 'https://www.yohobuy.com', type: 'mip'} 默认不需要type
 * @param urls
 */
const sendUrlsToBaidu = (params, urls) => {
    let paramsDef = {
        token: config.baiduToken
    };

    // 过滤无效的参数
    _.forEach(params, (val, key) => {
        if (!val) {
            delete params[key];
        }
    });

    qs.escape = (str) => {
        return str;
    };

    let options = {
        url: `${baiduUrls.urls}?${qs.stringify(Object.assign(paramsDef, params), null, null, {})}`,
        headers: {
            'Content-Type': 'text/plain'
        },
        method: 'post',
        form: urls.join('\n'),
        json: true,
        timeout: 10000,
        gzip: true
    };

    return rp(options).then(result => {
        logger.info(Object.assign(params, result, {length: urls.length}));
    });
};

/**
 * 获取最新1000条商品详情链接和逛详情链接
 */
const getUrls = () => {
    let apiArr = [api.get('', {method: 'web.product.bdPromotion'}),
        serviceApi.get('/guang/api/v2/article/getLastArticleList', {limit: 100})];

    return api.all(apiArr).spread((productData, articleData) => {

        _.forEach(_.get(productData, 'data', {}), value => {
            siteUrls.pcProduct.url.push('https:' + helper.urlFormat(`/${value.erpProductId}.html`, null, 'item'));
            siteUrls.mProduct.url.push('https:' + helper.urlFormat(`/product/${value.erpProductId}.html`, null, 'm'));
        });

        _.forEach(_.get(articleData, 'data.artList', {}), value => {
            siteUrls.pcGuang.url.push('https:' + helper.urlFormat(`/${value.articleId}.html`, null, 'guang'));
            siteUrls.mGuang.url.push('https:' + helper.urlFormat(`/guang/info/${value.articleId}.html`
        , null, 'guang.m'));
        });

        return siteUrls;
    });
};

/**
 * 发送最新商品详情1000条和逛详情100条推送到相应的站点域名(pc和wap)
 */
const sendUrls = () => {

    co(function*() {
        // 获取pc/wap的商品详情和逛的链接
        let sendArr = [],
            urls = yield getUrls();

        _.forEach(urls, value => {
            sendArr.push(sendUrlsToBaidu({site: value.site, type: value.type}, value.url));
        });

        // 推送url
        api.all(sendArr);
    })();
};



/**
 * 调用接口建议词
 */
const getKeywordsApi = (page, limit, count) => {
    let params = {
        page: page || 1,
        limit: limit || 1000,
        count: count || 6,
        method: 'web.search.suggestList'
    };

    return api.get('', params);
};

/**
 * 关键词同步到redis
 */
const synchronousKeywords = () => {

    return getKeywordsApi(1, 1).then(res => {
        let start = 0,
            page = 1,
            intervalTime = 1000, // 循环调用的时间间隔
            limit = 1000, // 每次请求接口关键词数量
            total = _.get(res, 'data.total', 0);

            // 接口调用失败
        if (total <= 0) {
            console.log('no data');
            return;
        }

            // 循环遍历接口关键词写入redis
        let interval = setInterval(() => {
            if (start > total) {
                clearInterval(interval);
            }

            getKeywordsApi(page, limit).then(result => {
                let multi = redis.multi();

                start += limit;
                page++;
                _.forEach(_.get(result, 'data.suggest_list', []), value => {
                    value.keyword = value.keyword.split(' ').join('');
                    if (value.keyword.length <= 2) {
                        return;
                    }

                    let key = `keywords_mana:${value.keyword}`;

                    multi.set(key, value.keyword);
                    multi.lrem('keywords_mana_list', 1, key).lpush('keywords_mana_list', key);
                });

                multiAsync(multi);

            }).catch(()=>{
                clearInterval(interval);
            });

        }, intervalTime);

    });
};

// 纪录关键词
const writeFile = (file, str) => {
    fs.appendFile(file, `${str}\n`, function(err) {
        if (err) {
            logger.info(err);
        }
    });
};

/**
 * 查询 redis中 关键词
 * @type {{getKeyWordsUrl}}
 */

const getRedisKeywords2 = (start, end) => {
    return redis.lrangeAsync(redisKey.keywordsList, start, end).then(res => {
        return _.map(res, (elem) => {
            return elem.replace('keywords_mana:', '');
        });
    });
};

/**
 * 查询 redis中 关键词
 * @type {{getKeyWordsUrl}}
 */

const getRedisKeywords = (start, end) => {
    return redis.lrangeAsync(redisKey.keywordsList, start, end).then(res => {
        let urls = {pc: [], wap: []};

        _.forEach(res, keyword => {
            keyword = keyword.replace('keywords_mana:', '');
            let buff = new Buffer(keyword).toString('hex').toUpperCase();

            writeFile(`./logs/keywords_${helper.dateFormat('YYYY-MM-DD H', new Date())}.log`, `${keyword} https://www.yohobuy.com/so/${buff}.html`);

            // TODO 纪录已经推送的关键词
            urls.pc.push(`https://www.yohobuy.com/so/${buff}.html`);
            urls.wap.push(`https://m.yohobuy.com/so/${buff}.html`);
        });

        return urls;
    });
};

/**
 * 推送url
 */
const sendKeywordsUrls = () => {
    return redis.llenAsync(redisKey.keywordsList).then(total => {
        console.log(total);
        if (total <= 0) {
            return;
        }

        let start = 0,
            intervalTime = 1000, // 循环调用的时间间隔
            count = 1000;

        let interval = setInterval(() => {
            if (start >= total) {
                clearInterval(interval);
            }

            getRedisKeywords(start, start + count).then(urls => {
                // 发送到百度
                sendUrlsToBaidu({site: 'https://www.yohobuy.com'}, urls.pc);
                sendUrlsToBaidu({site: 'https://m.yohobuy.com'}, urls.wap);
            }).catch(() => {
                clearInterval(interval);
            });

            start += count;

        }, intervalTime);

        return [];

    });
};

const rpKeyWordsUrl = (url) => {
    if (!url) {
        return Promise.resolve(Object.assign({}, ret, {
            code: 400,
            message: 'url is empty'
        }));
    }

    return rp({
        uri: url,
        method: 'GET',
        headers: {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 Chrome/55.0.2883.75 Safari/537.36'
        },
        resolveWithFullResponse: true, // header、statusCode
        timeout: 10 * 1000,
        gzip: true
    }).then(body => {
        return Object.assign({}, ret, {
            data: {
                url: url,
                cache: body.headers['x-cache-status']
            }
        });
    }).catch(err => {
        return Object.assign({}, ret, {
            code: err.statusCode,
            data: {
                url: url
            }
        });
    });
};

const visitKeyWordsUrl = () => {
    let size = 5;
    let currentPage = 1;

    return redis.hgetAsync('yohoSeo', 'rpPage').then(page => {
        page = page || 1;

        let intval = setInterval(() => {
            currentPage = page;
            return redis.hsetAsync('yohoSeo', 'rpPage', page++).then(() => {
                return util.sleep(100);
            }).then(() => {
                return getRedisKeywords2((currentPage - 1) * size, currentPage * size - 1);
            }).then(d => {
                let ddata = [];
                let buff;

                if (d.length <= 0) {
                    clearInterval(intval);
                    return redis.hsetAsync('yohoSeo', 'rpPage', 1).then(() => {
                        return [];
                    });
                }

                _.forEach(d, keyword => {
                    buff = new Buffer(keyword).toString('hex').toUpperCase();

                    // TODO 纪录已经推送的关键词
                    ddata.push(
                        rpKeyWordsUrl(`https://www.yohobuy.com/so/${buff}.html`),
                        rpKeyWordsUrl(`https://m.yohobuy.com/so/${buff}.html`)
                    );
                });

                return Promise.all(ddata);
            }).then(d => {
                console.log(`rpKeyWordsUrl => page: ${page}, result: ${JSON.stringify(d)}`);
                return d;
            });
        }, 300);

        return page;
    });
};

module.exports = {
    sendUrls,
    synchronousKeywords,
    sendKeywordsUrls,
    visitKeyWordsUrl
};