added error handling for initial/empty csv file
This commit is contained in:
parent
ae5a4e92bb
commit
7c338e69d5
69
ebay_api.py
69
ebay_api.py
@ -2,11 +2,6 @@ import json
|
||||
import requests
|
||||
import pandas as pd
|
||||
|
||||
with open('cat_list.txt') as jf:
|
||||
cat_list = json.load(jf)
|
||||
|
||||
big_data = pd.read_csv('big_data.csv')
|
||||
|
||||
class FindingApi:
|
||||
'''Some docstring to get rid of linting errors'''
|
||||
def __init__(self, service, pageNumber):
|
||||
@ -20,6 +15,10 @@ class FindingApi:
|
||||
|
||||
# departments = ["3034","93427"] (womens and mens)
|
||||
def get_data(self):
|
||||
'''# Gets raw JSON data fom FindingApi service call
|
||||
'''
|
||||
with open('cat_list.txt') as jf:
|
||||
cat_list = json.load(jf)
|
||||
for category_id in cat_list:
|
||||
for i in self.pageNumber:
|
||||
params = {
|
||||
@ -34,19 +33,25 @@ class FindingApi:
|
||||
response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1",
|
||||
params=params)
|
||||
data = response.json()
|
||||
return data
|
||||
return data # May want to save raw json as text file here or in main
|
||||
|
||||
def get_ids_from_cats(self):
|
||||
'''
|
||||
get_ids_from cats creates a 20-itemId list to use for the ShoppingApi
|
||||
Creates a 20-itemId list to use for the ShoppingApi
|
||||
call
|
||||
'''
|
||||
data = self.get_data()
|
||||
itemid_results_list = []
|
||||
for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
|
||||
if item not in big_data.values:
|
||||
try:
|
||||
big_data = pd.read_csv('big_data.csv')
|
||||
for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
|
||||
if item not in big_data.values:
|
||||
itemid_results_list.append(item['itemId'][0]) # itemId
|
||||
# values are in lists for some reason
|
||||
except pd.errors.EmptyDataError:
|
||||
for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
|
||||
itemid_results_list.append(item['itemId'][0]) # itemId
|
||||
# values are in lists for some reason
|
||||
# values are in lists for some reason
|
||||
item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0,
|
||||
len(itemid_results_list), 20))]
|
||||
|
||||
@ -58,6 +63,9 @@ class ShoppingApi():
|
||||
pandas dataframes
|
||||
'''
|
||||
def get_item_from_findItemsByCategory(self, item_id_results):
|
||||
'''
|
||||
Gets raw JSON data from multiple live listings
|
||||
'''
|
||||
for twenty_id in item_id_results:
|
||||
params = {
|
||||
"callname":"GetMultipleItems",
|
||||
@ -70,20 +78,41 @@ class ShoppingApi():
|
||||
|
||||
response = requests.get("https://open.api.ebay.com/shopping?", params=params)
|
||||
data = response.json()
|
||||
return data
|
||||
# Maybe end def here and create new def for curating data
|
||||
class CurateData:
|
||||
'''
|
||||
Contains functions for curating data for machine learning training sets
|
||||
'''
|
||||
def update_df(self, data):
|
||||
names = []
|
||||
values = []
|
||||
nvl = data['Item'][0]['ItemSpecifics']['NameValueList'][0]
|
||||
|
||||
names = []
|
||||
values = []
|
||||
nvl = data['Item'][0]['ItemSpecifics']['NameValueList'][0]
|
||||
for nvl_dict in nvl:
|
||||
names.append(nvl_dict['Name'])
|
||||
values.append(nvl_dict['Value']) # Try to excract value from list here
|
||||
|
||||
for nvl_dict in nvl:
|
||||
names.append(nvl_dict['Name'])
|
||||
values.append(nvl_dict['Value']) # Try to excract value from list here
|
||||
nvl_dict = dict(zip(names, values))
|
||||
data.update(nvl_dict)
|
||||
df = pd.json_normalize(data)
|
||||
df.to_csv('big_data.csv')
|
||||
|
||||
nvl_dict = dict(zip(names, values))
|
||||
data.update(nvl_dict)
|
||||
df = pd.json_normalize(data)
|
||||
df.to_csv('big_data.csv')
|
||||
def main():
|
||||
'''
|
||||
Main program creates/updates a csv file to use for ML training from live
|
||||
ebay listings
|
||||
'''
|
||||
service, pageNumber = input('service and pageNumber:').split()
|
||||
finding = FindingApi(service, pageNumber)
|
||||
item_id_results = finding.get_ids_from_cats()
|
||||
shopping = ShoppingApi()
|
||||
data = shopping.get_item_from_findItemsByCategory(item_id_results)
|
||||
curate = CurateData()
|
||||
curate.update_df(data)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
# Limited to 5000 calls to shopping api per day, and getMultpileitems service maxes out at 20 items
|
||||
# per call leaving you 100,000 items per day for you pandas dataframe initially. So you'll have
|
||||
# to divide these up into the categories. This will leave you with about 6.25K results per cat.
|
||||
|
Loading…
Reference in New Issue
Block a user