Authored by chunhua.zhang

add

@@ -3,44 +3,43 @@ @@ -3,44 +3,43 @@
3 3
4 import requests 4 import requests
5 import urlparse 5 import urlparse
6 -import json, ast  
7 6
8 def fetech_all_tasks(): 7 def fetech_all_tasks():
9 8
10 # fetech the url list from platform 9 # fetech the url list from platform
11 platform_url = "http://api.platform.yohoops.org:8088/platform/productUrl/queryUrlList?page=0&size=50000"; 10 platform_url = "http://api.platform.yohoops.org:8088/platform/productUrl/queryUrlList?page=0&size=50000";
12 11
13 - result = requests.get(platform_url)  
14 - r = ast.literal_eval(result.text) 12 + r = requests.get(platform_url).json()
15 if r['code'] != 200: 13 if r['code'] != 200:
16 print("Fetch task failed! %s" % r ) 14 print("Fetch task failed! %s" % r )
17 return 15 return
18 16
19 tasks = {} # id -> url 17 tasks = {} # id -> url
20 for item in r['data']: 18 for item in r['data']:
21 - # get url from taobaoUrl or tmallUrl  
22 - url = item['taobaoUrl']  
23 - if item['tmallUrl']: 19 +
  20 + url = item['jdUrl']
  21 + # proccessing taobaoUrl or tmallUrl
  22 + if not url :
  23 + if item['tmallUrl']:
24 url = item['tmallUrl'] 24 url = item['tmallUrl']
25 - if item['jdUrl']:  
26 - url = item['jdUrl']  
27 -  
28 - if not url: 25 + if item['taobaoUrl']:
  26 + url = item['taobaoUrl']
  27 + if not url:
29 print("url not found at %s" %item) 28 print("url not found at %s" %item)
30 continue 29 continue
31 -  
32 - # get id & skuId  
33 - query = urlparse.urlparse(url)  
34 - params = urlparse.parse_qs(query.query)  
35 - id = params.get('id')  
36 - skuId = params.get('skuId')  
37 - if not id :  
38 - print("can not get id from url: %s" % url)  
39 - if not skuId:  
40 - skuId = ''  
41 -  
42 - # final url  
43 - tasks[item['productId']] = ("%s://%s/item.html?id=%s&skuId=%s" %(query.scheme, query.netloc, id, skuId)).encode('utf-8') 30 + # get id & skuId
  31 + query = urlparse.urlparse(url)
  32 + params = urlparse.parse_qs(query.query)
  33 + id = params.get('id')
  34 + skuId = params.get('skuId')
  35 + if not id :
  36 + print("can not get id from url: %s" % url)
  37 + continue
  38 + if not skuId:
  39 + skuId = ''
  40 + # final taobao or tmall url
  41 + url = ("%s://%s/item.html?id=%s&skuId=%s" %(query.scheme, query.netloc, id, skuId)).encode('utf-8')
  42 + tasks[item['productId']] = url
44 return tasks 43 return tasks
45 44
46 def write_to_file(tasks = {}): 45 def write_to_file(tasks = {}):