Authored by chunhua.zhang

add

... ... @@ -3,32 +3,30 @@
import requests
import urlparse
import json, ast
def fetech_all_tasks():
# fetech the url list from platform
platform_url = "http://api.platform.yohoops.org:8088/platform/productUrl/queryUrlList?page=0&size=50000";
result = requests.get(platform_url)
r = ast.literal_eval(result.text)
r = requests.get(platform_url).json()
if r['code'] != 200:
print("Fetch task failed! %s" % r )
return
tasks = {} # id -> url
for item in r['data']:
# get url from taobaoUrl or tmallUrl
url = item['taobaoUrl']
url = item['jdUrl']
# proccessing taobaoUrl or tmallUrl
if not url :
if item['tmallUrl']:
url = item['tmallUrl']
if item['jdUrl']:
url = item['jdUrl']
if item['taobaoUrl']:
url = item['taobaoUrl']
if not url:
print("url not found at %s" %item)
continue
# get id & skuId
query = urlparse.urlparse(url)
params = urlparse.parse_qs(query.query)
... ... @@ -36,11 +34,12 @@ def fetech_all_tasks():
skuId = params.get('skuId')
if not id :
print("can not get id from url: %s" % url)
continue
if not skuId:
skuId = ''
# final url
tasks[item['productId']] = ("%s://%s/item.html?id=%s&skuId=%s" %(query.scheme, query.netloc, id, skuId)).encode('utf-8')
# final taobao or tmall url
url = ("%s://%s/item.html?id=%s&skuId=%s" %(query.scheme, query.netloc, id, skuId)).encode('utf-8')
tasks[item['productId']] = url
return tasks
def write_to_file(tasks = {}):
... ...