import json import requests import pandas as pd class FindingApi: '''Some docstring to get rid of linting errors''' def __init__(self, service, pageNumber): self.service = [ 'findItemsAdvanced', 'findCompletedItems', 'findItemsByKeywords', 'findItemsIneBayStores', 'findItemsByCategory', 'findItemsByProduct' ][service] self.pageNumber = list(range(1, pageNumber)) # 64 pages is recommended # this will give equal weights to cats given call constraints # departments = ["3034","93427"] (womens and mens) def get_data(self): '''# Gets raw JSON data fom FindingApi service call ''' with open('cat_list.txt') as jf: cat_list = json.load(jf) for category_id in cat_list: for i in self.pageNumber: params = { "OPERATION-NAME":self.service, "SECURITY-APPNAME":"scottbea-xlister-PRD-6796e0ff6-14862949", "SERVICE-VERSION":"1.13.0", "RESPONSE-DATA-FORMAT":"JSON", "categoryId":category_id, "paginationInput.entriesPerPage":"100", "paginationInput.PageNumber":i } response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1", params=params) data = response.json() return data def get_ids_from_cats(self): ''' Creates a 20-itemId list to use for the ShoppingApi call ''' data = self.get_data() itemid_results_list = [] try:# TODO run pdb here to see how to extract itemId before update_df training = pd.read_csv('training.csv') for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']: if (item not in training.values) and (item not in itemid_results_list): itemid_results_list.append(item['itemId'][0]) # itemId # values are in lists for some reason except (pd.errors.EmptyDataError, FileNotFoundError): for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']: if item not in itemid_results_list: itemid_results_list.append(item['itemId'][0]) item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0, len(itemid_results_list), 20))] return item_id_results class ShoppingApi: ''' Creates objects from ShoppingApi service calls that can interact with pandas dataframes ''' def get_item_from_findItemsByCategory(self, item_id_results): ''' Gets raw JSON data from multiple live listings given multiple itemIds ''' for twenty_id in item_id_results: params = { "callname":"GetMultipleItems", "appid":"scottbea-xlister-PRD-6796e0ff6-14862949", "version":"671", "responseencoding":"JSON", "ItemID":twenty_id, "IncludeSelector":"ItemSpecifics", } response = requests.get("https://open.api.ebay.com/shopping?", params=params) data = response.json() return data # TODO save data as file?? class CurateData: ''' Contains functions for curating data for machine learning training sets ''' def extract_itemid(self, data): for item in data['Item']: item_id = ['ItemID'] def extract_prime_cat(self, data): for item in data['Item']: prime_cat = ['PrimaryCategory'] def extract_picture_url(self, data): for item in data['Item']: picture_url_list = ['PictureURL'] def extract_nvl(self, data): for item in data['item']: training = {} names = [] values = [] nvl = item['itemspecifics']['namevaluelist'] for nvl_dict in nvl: names.append(nvl_dict['name']) values.append(nvl_dict['value']) # todo also append itemid and value to the dictionary somewhere nvl_dict = dict(zip(names, values)) def update_df(self, data): ''' Extracts itemIds and name-value list , creates new dict and appends df ''' for item in data['item']: training = {} names = [] values = [] nvl = item['itemspecifics']['namevaluelist'] for nvl_dict in nvl: names.append(nvl_dict['name']) values.append(nvl_dict['value']) # todo also append itemid and value to the dictionary somewhere nvl_dict = dict(zip(names, values)) training.update(nvl_dict) # todo just creating a training variable will not include itemid, picture urls, or categories which you will still need for your df so you can either extract them and append them to training or you can solely modify the data variable but you will have to deal with runtime error. # probably best to extract and making custom df df = pd.json_normalize(training) df.to_csv('training.csv', mode='a') def main(): ''' Main program creates/updates a csv file to use for ML training from live ebay listings ''' service, pageNumber = input('service and pageNumber:').split() service = int(service) pageNumber = int(pageNumber) finding = FindingApi(service, pageNumber) item_id_results = finding.get_ids_from_cats() shopping = ShoppingApi() data = shopping.get_item_from_findItemsByCategory(item_id_results) curate = CurateData() curate.update_df(data) if __name__ == "__main__": main() # Limited to 5000 calls to shopping api per day, and getMultpileitems service maxes out at 20 items # per call leaving you 100,000 items per day for you pandas dataframe initially. So you'll have # to divide these up into the categories. This will leave you with about 6.25K results per cat. # More than enough data for your dataset. # Need to make sure dataframe gets important stuff outside of nvl in order to # access values for cross referencing itemIds from calls # Need to decide if list gets accessed from df or if you're just going to have # list contents extracted and possibly placed into separate cells/labels