From bcb11de8557866df7dfe0811c90c4b58717e34b9 Mon Sep 17 00:00:00 2001 From: spbeach46 Date: Thu, 12 Nov 2020 14:46:37 -0700 Subject: [PATCH] changed data var returned in shoppingapi to data['Item']. Vice versa in update_data func. This is so data.update is correct --- ebay_api.py | 5 ++-- finding_api.py | 43 --------------------------- shopping_api.py | 79 ------------------------------------------------- 3 files changed, 3 insertions(+), 124 deletions(-) delete mode 100644 finding_api.py delete mode 100644 shopping_api.py diff --git a/ebay_api.py b/ebay_api.py index b2c23bb..183ec4c 100644 --- a/ebay_api.py +++ b/ebay_api.py @@ -95,9 +95,10 @@ class ShoppingApi: with concurrent.futures.ThreadPoolExecutor() as executor: for future in executor.map(self.get_item_from_findItemsByCategory(), item_id_results): data.update(future) - return data # TODO save data as file?? + return data + +# TODO the structure of data as is (ie, as an updated dict) means CurateData class methods are going to have trouble running - # TODO CONSIDER IMPLEMENTING MULTITHREADING AROUND HERE TOO. class CurateData: ''' Contains functions for curating data for machine learning training sets; diff --git a/finding_api.py b/finding_api.py deleted file mode 100644 index 8ecfad1..0000000 --- a/finding_api.py +++ /dev/null @@ -1,43 +0,0 @@ -import requests -import json -from bs4 import BeautifulSoup as b -import pandas as p - -# keywords = input('keyword search: ') - -with open('cat_list.txt') as jf: - cat_list = json.load(jf) -finding_service = ['findItemsAdvanced', 'findCompletedItems', 'findItemsByKeywords', 'findItemsIneBayStores', 'findItemsByCategory', 'findItemsByProduct'] - -pageNumber = list(range(1, 63)) - -# departments = ["3034","93427"] - -def get_ids(): - itemid_results_list = [] - for categoryID in cat_list[0:2]: - params = { - "OPERATION-NAME":finding_service[4], - "SECURITY-APPNAME":"scottbea-xlister-PRD-6796e0ff6-14862949", - "SERVICE-VERSION":"1.13.0", - "RESPONSE-DATA-FORMAT":"JSON", - "categoryId":categoryID , - "paginationInput.entriesPerPage":"100", - "paginationInput.PageNumber":pageNumber[0] - } - # extract item id here for piping into shopping_test.py - - response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1", params=params) - data = response.json() - pretty_data = json.dumps(data, indent=2) - return data -# can use pandas.json_normalize(custom dict cobbled from respons.json()) - - -# Additional problem you will run into when getting labeled data is shoe types and features not in features, accents, styles, categories or subcategories. - -# also limited to 5000 calls per day. This leaves you with 500k listings - -# If you want to split up each cat equally with their respective maxes then use 62 pages with 100 -# Entries per page. At this amount you'll have the max number of calls you can make on the -# shopping api. diff --git a/shopping_api.py b/shopping_api.py deleted file mode 100644 index 49d1a29..0000000 --- a/shopping_api.py +++ /dev/null @@ -1,79 +0,0 @@ -import json -import requests -import pandas as pd - -# OPEN CSV AS VARIALBE RIGHT HERE -with open('cat_list.txt') as jf: - cat_list = json.load(jf) - -big_data = pd.read_csv('big_data.csv') - -class FindingApi: - '''Some docstring to get rid of linting errors''' - def __init__(self): - self.service = [ - 'findItemsAdvanced', 'findCompletedItems', - 'findItemsByKeywords', 'findItemsIneBayStores', 'findItemsByCategory', - 'findItemsByProduct' - ] - self.pageNumber = list(range(1, 63)) - - # departments = ["3034","93427"] (womens and mens) - - def get_ids_from_cats(self): - '''Stop bothering me for docstrings.''' - itemid_results_list = [] - for category_id in cat_list: - for i in self.pageNumber: - params = { - "OPERATION-NAME":self.service[4], - "SECURITY-APPNAME":"scottbea-xlister-PRD-6796e0ff6-14862949", - "SERVICE-VERSION":"1.13.0", - "RESPONSE-DATA-FORMAT":"JSON", - "categoryId":category_id, - "paginationInput.entriesPerPage":"100", - "paginationInput.PageNumber":self.pageNumber[i] - } - response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1", - params=params) - data = response.json() - for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']: - if item not in big_data.values: - itemid_results_list.append(item['itemId']) - item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0, - len(itemid_results_list), 20))] - - return item_id_results - -class ShoppingApi(FindingApi): - def get_item_from_findItemsByCategory(self, item_id_results): - for twenty_id in item_id_results: - params = { - "callname":"GetMultipleItems", - "appid":"scottbea-xlister-PRD-6796e0ff6-14862949", - "version":"671", - "responseencoding":"JSON", - "ItemID":twenty_id, # you pass in a list? If not then maybe a comma-separated - "IncludeSelector":"ItemSpecifics", - } - - response = requests.get("https://open.api.ebay.com/shopping?", params=params) - data = response.json() - - names = [] - values = [] - nvl = data['Item'][0]['ItemSpecifics']['NameValueList'] - - for nvl_dict in nvl: - names.append(nvl_dict['Name']) - values.append(nvl_dict['Value']) - - nvl_dict = dict(zip(names, values)) - data.update(nvl_dict) - df = pd.json_normalize(data) - df.to_csv('big_data.csv') - -# Limited to 5000 calls to shopping api per day, and getMultpileitems service maxes out at 20 items -# per call leaving you 100,000 items per day for you pandas dataframe initially. So you'll have -# to divide these up into the categories. This will leave you with about 6.25K results per cat. -# More than enough data for your dataset. Consider