Showing
1 changed file
with
22 additions
and
23 deletions
@@ -3,44 +3,43 @@ | @@ -3,44 +3,43 @@ | ||
3 | 3 | ||
4 | import requests | 4 | import requests |
5 | import urlparse | 5 | import urlparse |
6 | -import json, ast | ||
7 | 6 | ||
8 | def fetech_all_tasks(): | 7 | def fetech_all_tasks(): |
9 | 8 | ||
10 | # fetech the url list from platform | 9 | # fetech the url list from platform |
11 | platform_url = "http://api.platform.yohoops.org:8088/platform/productUrl/queryUrlList?page=0&size=50000"; | 10 | platform_url = "http://api.platform.yohoops.org:8088/platform/productUrl/queryUrlList?page=0&size=50000"; |
12 | 11 | ||
13 | - result = requests.get(platform_url) | ||
14 | - r = ast.literal_eval(result.text) | 12 | + r = requests.get(platform_url).json() |
15 | if r['code'] != 200: | 13 | if r['code'] != 200: |
16 | print("Fetch task failed! %s" % r ) | 14 | print("Fetch task failed! %s" % r ) |
17 | return | 15 | return |
18 | 16 | ||
19 | tasks = {} # id -> url | 17 | tasks = {} # id -> url |
20 | for item in r['data']: | 18 | for item in r['data']: |
21 | - # get url from taobaoUrl or tmallUrl | ||
22 | - url = item['taobaoUrl'] | ||
23 | - if item['tmallUrl']: | 19 | + |
20 | + url = item['jdUrl'] | ||
21 | + # proccessing taobaoUrl or tmallUrl | ||
22 | + if not url : | ||
23 | + if item['tmallUrl']: | ||
24 | url = item['tmallUrl'] | 24 | url = item['tmallUrl'] |
25 | - if item['jdUrl']: | ||
26 | - url = item['jdUrl'] | ||
27 | - | ||
28 | - if not url: | 25 | + if item['taobaoUrl']: |
26 | + url = item['taobaoUrl'] | ||
27 | + if not url: | ||
29 | print("url not found at %s" %item) | 28 | print("url not found at %s" %item) |
30 | continue | 29 | continue |
31 | - | ||
32 | - # get id & skuId | ||
33 | - query = urlparse.urlparse(url) | ||
34 | - params = urlparse.parse_qs(query.query) | ||
35 | - id = params.get('id') | ||
36 | - skuId = params.get('skuId') | ||
37 | - if not id : | ||
38 | - print("can not get id from url: %s" % url) | ||
39 | - if not skuId: | ||
40 | - skuId = '' | ||
41 | - | ||
42 | - # final url | ||
43 | - tasks[item['productId']] = ("%s://%s/item.html?id=%s&skuId=%s" %(query.scheme, query.netloc, id, skuId)).encode('utf-8') | 30 | + # get id & skuId |
31 | + query = urlparse.urlparse(url) | ||
32 | + params = urlparse.parse_qs(query.query) | ||
33 | + id = params.get('id') | ||
34 | + skuId = params.get('skuId') | ||
35 | + if not id : | ||
36 | + print("can not get id from url: %s" % url) | ||
37 | + continue | ||
38 | + if not skuId: | ||
39 | + skuId = '' | ||
40 | + # final taobao or tmall url | ||
41 | + url = ("%s://%s/item.html?id=%s&skuId=%s" %(query.scheme, query.netloc, id, skuId)).encode('utf-8') | ||
42 | + tasks[item['productId']] = url | ||
44 | return tasks | 43 | return tasks |
45 | 44 | ||
46 | def write_to_file(tasks = {}): | 45 | def write_to_file(tasks = {}): |
-
Please register or login to post a comment