diff --git a/ebay_api.py b/ebay_api.py index 892a0b6..a4685bf 100644 --- a/ebay_api.py +++ b/ebay_api.py @@ -2,11 +2,6 @@ import json import requests import pandas as pd -with open('cat_list.txt') as jf: - cat_list = json.load(jf) - -big_data = pd.read_csv('big_data.csv') - class FindingApi: '''Some docstring to get rid of linting errors''' def __init__(self, service, pageNumber): @@ -20,6 +15,10 @@ class FindingApi: # departments = ["3034","93427"] (womens and mens) def get_data(self): + '''# Gets raw JSON data fom FindingApi service call + ''' + with open('cat_list.txt') as jf: + cat_list = json.load(jf) for category_id in cat_list: for i in self.pageNumber: params = { @@ -34,19 +33,25 @@ class FindingApi: response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1", params=params) data = response.json() - return data + return data # May want to save raw json as text file here or in main def get_ids_from_cats(self): ''' - get_ids_from cats creates a 20-itemId list to use for the ShoppingApi + Creates a 20-itemId list to use for the ShoppingApi call ''' data = self.get_data() itemid_results_list = [] - for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']: - if item not in big_data.values: + try: + big_data = pd.read_csv('big_data.csv') + for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']: + if item not in big_data.values: + itemid_results_list.append(item['itemId'][0]) # itemId + # values are in lists for some reason + except pd.errors.EmptyDataError: + for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']: itemid_results_list.append(item['itemId'][0]) # itemId - # values are in lists for some reason + # values are in lists for some reason item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0, len(itemid_results_list), 20))] @@ -58,6 +63,9 @@ class ShoppingApi(): pandas dataframes ''' def get_item_from_findItemsByCategory(self, item_id_results): + ''' + Gets raw JSON data from multiple live listings + ''' for twenty_id in item_id_results: params = { "callname":"GetMultipleItems", @@ -70,20 +78,41 @@ class ShoppingApi(): response = requests.get("https://open.api.ebay.com/shopping?", params=params) data = response.json() + return data + # Maybe end def here and create new def for curating data +class CurateData: + ''' + Contains functions for curating data for machine learning training sets + ''' + def update_df(self, data): + names = [] + values = [] + nvl = data['Item'][0]['ItemSpecifics']['NameValueList'][0] - names = [] - values = [] - nvl = data['Item'][0]['ItemSpecifics']['NameValueList'][0] + for nvl_dict in nvl: + names.append(nvl_dict['Name']) + values.append(nvl_dict['Value']) # Try to excract value from list here - for nvl_dict in nvl: - names.append(nvl_dict['Name']) - values.append(nvl_dict['Value']) # Try to excract value from list here + nvl_dict = dict(zip(names, values)) + data.update(nvl_dict) + df = pd.json_normalize(data) + df.to_csv('big_data.csv') - nvl_dict = dict(zip(names, values)) - data.update(nvl_dict) - df = pd.json_normalize(data) - df.to_csv('big_data.csv') +def main(): + ''' + Main program creates/updates a csv file to use for ML training from live + ebay listings + ''' + service, pageNumber = input('service and pageNumber:').split() + finding = FindingApi(service, pageNumber) + item_id_results = finding.get_ids_from_cats() + shopping = ShoppingApi() + data = shopping.get_item_from_findItemsByCategory(item_id_results) + curate = CurateData() + curate.update_df(data) +if __name__ == "__main__": + main() # Limited to 5000 calls to shopping api per day, and getMultpileitems service maxes out at 20 items # per call leaving you 100,000 items per day for you pandas dataframe initially. So you'll have # to divide these up into the categories. This will leave you with about 6.25K results per cat.