spider-buyers.js 4.29 KB
const dayjs = require('dayjs');
const lockup = require('node-lockup');
const isBetween = require('dayjs/plugin/isBetween');
const _ = require('lodash');
const MysqlAdapter = require('../libs/mysql');
const spider = require('../libs/spider');
const config = require('./spider-buyers.json');
const {logger} = require('../libs/logger');

dayjs.extend(isBetween)

const mysql = new MysqlAdapter(config.connect, config.connect.database);

const RegNow = /刚刚/;
const RegMinute = /(\d+)分钟前/;
const RegHour = /(\d+)小时前/;
const RegDay = /(\d+)天前/;
const RegDate = /(\d+)(\d+)日/;
const RegDateFull = /\d+\.\d+\.\d+/;


const parseTime = (relativeTime) => {
  const matchMinute = relativeTime.match(RegMinute);

  if (matchMinute) {
    return dayjs().add(0 - matchMinute[1], 'minute');
  }
  const matchHour = relativeTime.match(RegHour);

  if (matchHour) {
    return dayjs().add(0 - matchHour[1], 'hour');
  }
  const matchDay = relativeTime.match(RegDay);

  if (matchDay) {
    return dayjs().add(0 - matchDay[1], 'day');
  }
  const matchDate = relativeTime.match(RegDate);

  if (matchDate) {
    return dayjs(`2019/${matchDate[1]}/${matchDate[2]}`);
  }
  if (RegDateFull.test(relativeTime)) {
    return dayjs(relativeTime);
  }
  if (RegNow.test(relativeTime)) {
    return dayjs();
  }
  return dayjs('1990-01-01');
};

let time = Date.now();
let end = 0;


const spiderBuyers = async(productId, lsId = 0, page = 0, trys = 0, tid) => {
  const result = await spider.spiderFetch(productId, 'https://du.hupu.com/mapi/product/lastSoldList', {
    lastId: lsId
  });

  let lastId;
  let tryLogic = false;
  let list = [];
  if (result.status === 200) {
    let skip = false;
    lastId = result.data.lastId;
    list = result.data.list;


    list.forEach(info => {
      const time = parseTime(info.formatTime);
      const startTime = dayjs('2018-03-31').add(1, 'second');
      
      if (!time.isBefore(startTime)) {
        mysql.insert('INSERT INTO `' + config.table + '` (`productId`, `nickName`, `icon`, `time`, `size`, `soldNum`, `price`) VALUES (:productId, :nickName, :icon, :time, :size, :soldNum, :price)', {
          productId: productId,
          nickName: info.buyer.userName,
          icon: info.buyer.icon,
          size: info.item.size,
          time: time.format('YYYY-MM-DD HH:mm:ss'),
          soldNum: products[productId].count,
          price: products[productId].price,
        });
      } else {
        if (!skip) {
          logger.info(`thread: ${tid}, productid: ${productId}, skip time:${time.format('YYYY-MM-DD HH:mm:ss')}`)
        }
        skip = true;
      }
    });

    if (skip) {
      return {productId};
    }
  } else {
    if (trys < 3) {
      tryLogic = true;
    }
  }

  if (tryLogic) {
    logger.info(`tryLogic ${trys}`)
    return spiderBuyers(productId, lsId, page, trys + 1, tid);
  } else {
    if (lastId) {
      logger.info(`thread: ${tid}, productId: ${productId}, nextPage: ${page}, lastId: ${lastId}`)
      return spiderBuyers(productId, lastId, page + 1, 0, tid);
    }
    return {productId};
  }
};

//8202200
const products = {};

let max = 32000;
let failNum = 0;
let over = false;

const start = async({inx, tid}) => {
  if (over) {
    return;
  }
  const productId = inx;

  inx++;
  if (inx >= max) {
    over = true;
    logger.info('over!!!!!!');
    return;
  }
  
  const result = await spider.spiderFetch(productId);

  if (result.status === 200) {
    failNum = 0;
    products[productId] = {
      count: _.get(result, 'data.detail.soldNum', 0),
      price: _.get(result, 'data.item.price', 0)
    };
    try {
      await spiderBuyers(productId, 0, 0, 0, tid);
    } catch (error) {
      logger.info('error', error);
    }
    

  } else {
    logger.info(productId, result.status)
    if (result.status) {
      failNum++;
    }
  }
  if (failNum >= 20) {
    over = true;
    return logger.info('over!!!!!!', JSON.stringify(result));
  }
  return true;
};


//4211292
module.exports = () => {
  const thread = 4;
  const ids = Array.from({length: max}).map((v, i) => i);
  const threads = [];
  const page = ids.length / thread;

  for (let index = 0; index < thread; index++) {
    threads.push(ids.splice(0, page));
  }
  threads.forEach((tids, tid) => {
    const locktask = lockup(start);

    tids.map((inx) => {
      locktask({inx, tid});
    })
  })
  
}

module.exports()