spider-community.js 3.64 KB
const rp = require('request-promise');
const lockup = require('node-lockup');
const {logger} = require('./libs/logger');
const config = require('./spider-buyers.json');
const MysqlAdapter = require('./libs/mysql');

const DEFAULT_MAX_ID = 21913490;
const REQUEST_URL = `https://m.poizon.com/trend/share?trendId=`;

const mysql = new MysqlAdapter(config.connect, config.connect.database);

const requestPage = async (trendId, tid, errTick = 0) => {
  if (skip[tid]) {
    return;
  }
  try {
    let html = await rp.get({
      url: `${REQUEST_URL}${trendId}`,
      headers: {
        Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
        appVersion: '3.5.0',
        'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
      },
      timeout: 2000
    });
  
    const dataStartIndex = html.indexOf('that.data = ');
  
    html = html.substring(dataStartIndex + 12);
  
    const dataEndIndex = html.indexOf('};');
  
    const jsonData = html.substring(0, dataEndIndex + 1);
  
    try {
      return JSON.parse(jsonData);
    } catch (error) {
      return html;
    }
  } catch (error) {
    errTick++;
    logger.error(`request error: ${error.message}, trendId: ${trendId} networkErrTick: ${errTick}`);

    if (errTick < 3) {
      return requestPage(trendId, tid, errTick)
    }
    return void 0;
  }
  
}

const insertTable = async(params) => {
  try {
    const sql = 'insert `trend_log` (`trendId`, `userId`, `createAt`, `userName`, `icon`, `sex`, `readCount`, `reply`, `fav`, `city`, `images`) values (:trendId, :userId, :createAt, :userName, :icon, :sex, :readCount, :reply, :fav, :city, :images)';

    const result = await mysql.insert(sql, params);

    return result;
  } catch (error) {
    logger.error(`error: ${error.message}, data: ${JSON.stringify(params)}`);
  }
}

const getMaxId = async() => {
  const result = await mysql.query('select max(`trendId`) as maxId from trend_log');

  if (result.length) {
    return result[0].maxId || DEFAULT_MAX_ID;
  }
  return DEFAULT_MAX_ID;
};

let skip = {};
const doWorker = async() => {
  const length = 10000;
  const thread = 5;

  const threadIds = Array.from({length: thread}).map((v, tid) => {
    const tids = [];

    for (let i = 0; i < length; i++) {
        if (i % thread === tid) {
          tids.push(i);
        }
    }
    return tids;
  });
  const maxId = await getMaxId();

  threadIds.forEach((trendIds, tid) => {
    const locktask = lockup(requestPage);
    let errTick = 0;
    skip[tid] = false;
  
    trendIds.forEach(async(i) => {
      const trendId = i + maxId + 1;
      const json = await locktask(trendId, tid);
  
      if (skip[tid]) {
        return;
      }
      if (typeof json === 'object') {
        const detail = json.detail;
  
        insertTable({
          trendId: detail.trendId,
          userId: detail.userInfo.userId,
          createAt: detail.createAt,
          userName: detail.userInfo.userName,
          icon: detail.userInfo.icon,
          sex: detail.userInfo.sex,
          readCount: detail.readCount,
          reply: detail.reply,
          fav: detail.fav,
          city: detail.city.city,
          images: detail.images.length
        });
        logger.info(`success: trendId: ${trendId}, reply: ${detail.reply}`);
        errTick = 0;
      } else {
        errTick++;
        logger.error(`request not content trendId: ${trendId}, errTick: ${errTick}`);
  
        if (errTick >= 20) {
          skip[tid] = true;
          logger.info('over!!!');
        }
      }
    });
  })
}

module.exports = doWorker;