...
|
...
|
@@ -3,44 +3,43 @@ |
|
|
|
|
|
import requests
|
|
|
import urlparse
|
|
|
import json, ast
|
|
|
|
|
|
def fetech_all_tasks():
|
|
|
|
|
|
# fetech the url list from platform
|
|
|
platform_url = "http://api.platform.yohoops.org:8088/platform/productUrl/queryUrlList?page=0&size=50000";
|
|
|
|
|
|
result = requests.get(platform_url)
|
|
|
r = ast.literal_eval(result.text)
|
|
|
r = requests.get(platform_url).json()
|
|
|
if r['code'] != 200:
|
|
|
print("Fetch task failed! %s" % r )
|
|
|
return
|
|
|
|
|
|
tasks = {} # id -> url
|
|
|
for item in r['data']:
|
|
|
# get url from taobaoUrl or tmallUrl
|
|
|
url = item['taobaoUrl']
|
|
|
if item['tmallUrl']:
|
|
|
|
|
|
url = item['jdUrl']
|
|
|
# proccessing taobaoUrl or tmallUrl
|
|
|
if not url :
|
|
|
if item['tmallUrl']:
|
|
|
url = item['tmallUrl']
|
|
|
if item['jdUrl']:
|
|
|
url = item['jdUrl']
|
|
|
|
|
|
if not url:
|
|
|
if item['taobaoUrl']:
|
|
|
url = item['taobaoUrl']
|
|
|
if not url:
|
|
|
print("url not found at %s" %item)
|
|
|
continue
|
|
|
|
|
|
# get id & skuId
|
|
|
query = urlparse.urlparse(url)
|
|
|
params = urlparse.parse_qs(query.query)
|
|
|
id = params.get('id')
|
|
|
skuId = params.get('skuId')
|
|
|
if not id :
|
|
|
print("can not get id from url: %s" % url)
|
|
|
if not skuId:
|
|
|
skuId = ''
|
|
|
|
|
|
# final url
|
|
|
tasks[item['productId']] = ("%s://%s/item.html?id=%s&skuId=%s" %(query.scheme, query.netloc, id, skuId)).encode('utf-8')
|
|
|
# get id & skuId
|
|
|
query = urlparse.urlparse(url)
|
|
|
params = urlparse.parse_qs(query.query)
|
|
|
id = params.get('id')
|
|
|
skuId = params.get('skuId')
|
|
|
if not id :
|
|
|
print("can not get id from url: %s" % url)
|
|
|
continue
|
|
|
if not skuId:
|
|
|
skuId = ''
|
|
|
# final taobao or tmall url
|
|
|
url = ("%s://%s/item.html?id=%s&skuId=%s" %(query.scheme, query.netloc, id, skuId)).encode('utf-8')
|
|
|
tasks[item['productId']] = url
|
|
|
return tasks
|
|
|
|
|
|
def write_to_file(tasks = {}):
|
...
|
...
|
|