scrape_ids working. Added function to conky in ebay_api

This commit is contained in:
scott 2021-11-26 18:46:51 -07:00
parent ab064ce5cf
commit 0f65ab0bc2
2 changed files with 41 additions and 12 deletions

View File

@ -1,4 +1,5 @@
import os
import scrape_ids
from datetime import datetime, timedelta
import dateutil
from dateutil import parser
@ -261,7 +262,7 @@ class ShoppingApi:
fnd_srvc = 4
finding = FindingApi(fnd_srvc, target_idspc)
item_id_results = finding.get_ids_from_cats()
item_id_results = scrape_ids.main()
with concurrent.futures.ThreadPoolExecutor() as executor:
for future in executor.map(self.get_item_from_findItemsByCategory, item_id_results):
for item in future:

View File

@ -11,6 +11,7 @@ def get_isurl(category_id): # "get itemSearchURL"
Gets raw JSON data fom FindingApi service call. Currently being used to
get itemIDs from categories;
'''
params = {
"OPERATION-NAME":'findItemsByCategory',
"SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'],
@ -35,20 +36,23 @@ def get_isurl(category_id): # "get itemSearchURL"
return url
try:
data = response.json()
i = 1 # NOTE approx 220 pages of listings per cat @ 35 items per page
pg = "&_pgn={}".format(str(i))
# NOTE approx 220 pages of listings per cat @ 35 items per page
item_cond = "&rt=nc&LH_ItemCondition=3000&mag=1" # preowned
url = data['findItemsByCategoryResponse'][0]['itemSearchURL'][0].replace('&_pgn=1', pg)
urls = []
url = data['findItemsByCategoryResponse'][0]['itemSearchURL'][0]
url = url+item_cond
j = list(range(1,221))
for i in j:
pg = "&_pgn={}".format(str(i))
url = url.replace('&_pgn=1', pg)
urls.append(url)
except (AttributeError, KeyError):
print('AttributeError or KeyError. Exiting')
print(response.json())
return data
return url
return urls
def threaded_urls(url):
def threaded_urls():
urls = []
with open('cat_list.txt') as jf:
@ -56,13 +60,37 @@ def threaded_urls(url):
with concurrent.futures.ThreadPoolExecutor() as executor:
for future in executor.map(get_isurl, cat_list):
urls.append(future)
urls.extend(future)
return urls
def get_ids(url):
html = requests.get(url).text
soup = b(html, "html.parser")
ids = list(ids = soup.find_all(href=re.compile(r"[\d]+(?=\?hash)")))
ids = list(soup.find_all(href=re.compile(r"[\d]+(?=\?hash)")))
ids = [id['href'] for id in ids]
ids = [re.findall(r"[\d]+(?=\?)", id) for id in ids]
ids = [re.findall(r"[\d]+(?=\?)", id)[0] for id in ids]
ids = list(set(ids))
return ids
def threaded_get_ids(urls):
ids = []
with concurrent.futures.ThreadPoolExecutor() as executor:
for future in executor.map(get_ids, urls):
ids.extend(future)
with open('ids.txt', 'w') as f:
json.dump(ids, f)
item_id_results = [','.join(ids[n:n+20]) for n in list(range(0,
len(ids), 20))] # 20-ItemID list created to maximize dataset/decrease calls given call constraints
return item_id_results
def main():
urls = threaded_urls()
item_id_results = threaded_get_ids(urls)
return item_id_results
if __name__=="__main__":
main()