unique ids fix
This commit is contained in:
parent
93297f952e
commit
29800af1d4
@ -36,15 +36,15 @@ def get_isurl(category_id): # "get itemSearchURL"
|
||||
return url
|
||||
try:
|
||||
data = response.json()
|
||||
print(data)
|
||||
# NOTE approx 220 pages of listings per cat @ 35 items per page
|
||||
item_cond = "&rt=nc&LH_ItemCondition=3000&mag=1" # preowned
|
||||
item_cond_new = '&LH_ItemCondition=3'
|
||||
urls = []
|
||||
url = data['findItemsByCategoryResponse'][0]['itemSearchURL'][0]
|
||||
url = url+item_cond
|
||||
j = list(range(1,221))
|
||||
for i in j:
|
||||
pg = "&_pgn={}".format(str(i))
|
||||
url = url.replace('&_pgn=1', pg)
|
||||
base_url = data['findItemsByCategoryResponse'][0]['itemSearchURL'][0]
|
||||
for pg in list(range(1,34)): # No results after around page 32
|
||||
url = base_url+"&_pgn="+str(pg)+item_cond
|
||||
print(url)
|
||||
urls.append(url)
|
||||
|
||||
except (AttributeError, KeyError):
|
||||
@ -70,17 +70,22 @@ def get_ids(url):
|
||||
'''
|
||||
html = requests.get(url).text
|
||||
soup = b(html, "html.parser")
|
||||
print(soup)
|
||||
ids = list(soup.find_all(href=re.compile(r"[\d]+(?=\?hash)")))
|
||||
ids = [id['href'] for id in ids]
|
||||
ids = [re.findall(r"[\d]+(?=\?)", id)[0] for id in ids]
|
||||
ids = list(set(ids)) # necessary; two links are returned with pattern match
|
||||
print(ids)
|
||||
|
||||
return ids
|
||||
|
||||
def threaded_get_ids(urls):
|
||||
|
||||
'''
|
||||
Runs get_ids() w/in ThreadPoolExecutor() for multi threaded requests.
|
||||
Constructs and saves unique ids and 20_itemIDs for use with ebay_api
|
||||
methods
|
||||
'''
|
||||
try:
|
||||
with open('item_id_results.txt') as f:
|
||||
with open('ids.txt') as f:
|
||||
ids = json.load(f)
|
||||
except FileNotFoundError:
|
||||
ids = []
|
||||
@ -89,14 +94,32 @@ def threaded_get_ids(urls):
|
||||
for future in executor.map(get_ids, urls):
|
||||
ids.extend(future)
|
||||
|
||||
ids = list(set(ids)) # necessary; two links are returned with pattern match
|
||||
item_id_results = [','.join(ids[n:n+20]) for n in list(range(0,
|
||||
len(ids), 20))] # 20-ItemID list created to maximize dataset/decrease calls given call constraints
|
||||
|
||||
with open('ids.txt', 'w') as f:
|
||||
json.dump(ids,f)
|
||||
|
||||
with open('item_id_results.txt', 'w') as f:
|
||||
json.dump(item_id_results, f)
|
||||
|
||||
return item_id_results
|
||||
|
||||
def id_count():
|
||||
'''
|
||||
Counts Unique IDs of item_id_results for testing
|
||||
'''
|
||||
with open('item_id_results.txt') as f:
|
||||
item_id_results = json.load(f)
|
||||
|
||||
ids = ','.join(item_id_results)
|
||||
ids = ids.split(',')
|
||||
uniq = len(list(set(ids)))
|
||||
print('{} Unique IDs'.format(uniq))
|
||||
|
||||
return ids
|
||||
|
||||
def main():
|
||||
urls = threaded_urls()
|
||||
item_id_results = threaded_get_ids(urls)
|
||||
|
Loading…
Reference in New Issue
Block a user