From 97c5900a5b48578912d41505032bf2a2e01136dd Mon Sep 17 00:00:00 2001 From: spbeach46 Date: Thu, 12 Nov 2020 13:22:51 -0700 Subject: [PATCH] added multithreading to both finding and shopping apis including conky() def --- ebay_api.py | 104 +++++++++++++++++++++++++++++----------------------- 1 file changed, 58 insertions(+), 46 deletions(-) diff --git a/ebay_api.py b/ebay_api.py index 4ce0ba3..b2c23bb 100644 --- a/ebay_api.py +++ b/ebay_api.py @@ -15,45 +15,53 @@ class FindingApi: # this will give equal weights to cats given call constraints # departments = ["3034","93427"] (womens and mens) - def get_data(self): # TODO FIX THIS TO WORK WITH MULTITHREADING. Need to figure out how to handle data variable. Simplest solution would be to append in def and every call of def in thread. every other method in following classes depend on data variable generated from here. You'll need to decide on way to append data var. - '''Gets raw JSON data fom FindingApi service call + + def get_data(self, i): + ''' - with open('cat_list.txt') as jf: - cat_list = json.load(jf) - for category_id in cat_list: - for i in self.pageNumber: - params = { - "OPERATION-NAME":self.service, - "SECURITY-APPNAME":"scottbea-xlister-PRD-6796e0ff6-14862949", - "SERVICE-VERSION":"1.13.0", - "RESPONSE-DATA-FORMAT":"JSON", - "categoryId":category_id, - "paginationInput.entriesPerPage":"100", - "paginationInput.PageNumber":i - } - response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1", - params=params) - data = response.json() - return data # TODO MAY HAVE TO RUN IN THREADPOOLEXECUTOR() IN MAIN() AND SAVE DATA TO FILE + Gets raw JSON data fom FindingApi service call + Currently being used to get itemIDs from categories + ''' + + params = { + "OPERATION-NAME":self.service, + "SECURITY-APPNAME":"scottbea-xlister-PRD-6796e0ff6-14862949", + "SERVICE-VERSION":"1.13.0", + "RESPONSE-DATA-FORMAT":"JSON", + "categoryId":category_id, + "paginationInput.entriesPerPage":"100", + "paginationInput.PageNumber":i + } + response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1", + params=params) + data = response.json() + return data def get_ids_from_cats(self): ''' Creates a 20-itemId list to use for the ShoppingApi call ''' - data = self.get_data() # TODO consider using different variable names to avoid confusion between FindingApi data and ShoppingApi data + with open('cat_list.txt') as jf: + cat_list = json.load(jf) + itemid_results_list = [] - try: - training = pd.read_csv('training.csv') - for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']: - if (item not in training.values) and (item not in itemid_results_list): - itemid_results_list.append(item['itemId'][0]) # TODO something funky going on here. zeroth index? why? itemIds from FindingApi call are in lists (due to "variations" listings) + for category_id in cat_list: + with concurrent.futures.ThreadPoolExecutor() as executor: + for future in executor.map(self.get_data(), self.pageNumber): + data = future - except (pd.errors.EmptyDataError, FileNotFoundError): - for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']: - if item not in itemid_results_list: - itemid_results_list.append(item['itemId'][0]) + try: + training = pd.read_csv('training.csv') + for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']: + if (item not in training.values) and (item not in itemid_results_list): + itemid_results_list.append(item['itemId'][0]) + + except (pd.errors.EmptyDataError, FileNotFoundError): + for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']: + if item not in itemid_results_list: + itemid_results_list.append(item['itemId'][0]) item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0, len(itemid_results_list), 20))] @@ -65,24 +73,31 @@ class ShoppingApi: Creates objects from ShoppingApi service calls that can interact with pandas dataframes ''' - def get_item_from_findItemsByCategory(self, item_id_results): + def get_item_from_findItemsByCategory(self, twenty_id): ''' Gets raw JSON data from multiple live listings given multiple itemIds ''' - for twenty_id in item_id_results: - params = { - "callname":"GetMultipleItems", - "appid":"scottbea-xlister-PRD-6796e0ff6-14862949", - "version":"671", - "responseencoding":"JSON", - "ItemID":twenty_id, - "IncludeSelector":"ItemSpecifics", - } + params = { + "callname":"GetMultipleItems", + "appid":"scottbea-xlister-PRD-6796e0ff6-14862949", + "version":"671", + "responseencoding":"JSON", + "ItemID":twenty_id, + "IncludeSelector":"ItemSpecifics", + } - response = requests.get("https://open.api.ebay.com/shopping?", params=params) - data = response.json() - return data # TODO save data as file?? + response = requests.get("https://open.api.ebay.com/shopping?", params=params) + response = response.json() + return response + def conky(self): + data = {} + with concurrent.futures.ThreadPoolExecutor() as executor: + for future in executor.map(self.get_item_from_findItemsByCategory(), item_id_results): + data.update(future) + return data # TODO save data as file?? + + # TODO CONSIDER IMPLEMENTING MULTITHREADING AROUND HERE TOO. class CurateData: ''' Contains functions for curating data for machine learning training sets; @@ -159,13 +174,10 @@ def main(): service = int(service) pageNumber = int(pageNumber) finding = FindingApi(service, pageNumber) - # TODO START MULTITHREADING HERE FOR FINDINGAPI CALL? - with concurrent.futures.ThreadPoolExecutor() as executor: - for future in executor.map(finding.get_ids_from_cats(), finding.pageNumber): item_id_results = finding.get_ids_from_cats() shopping = ShoppingApi() - data = shopping.get_item_from_findItemsByCategory(item_id_results) + data = shopping.conky() curate = CurateData() curate.update_df(data)