import json import requests import pandas as pd class FindingApi: '''Some docstring to get rid of linting errors''' def __init__(self, service, pageNumber): self.service = [ 'findItemsAdvanced', 'findCompletedItems', 'findItemsByKeywords', 'findItemsIneBayStores', 'findItemsByCategory', 'findItemsByProduct' ][service] self.pageNumber = list(range(1, pageNumber)) # 64 pages is recommended # this will give equal weights to cats given call restraints # departments = ["3034","93427"] (womens and mens) def get_data(self): '''# Gets raw JSON data fom FindingApi service call ''' with open('cat_list.txt') as jf: cat_list = json.load(jf) for category_id in cat_list: for i in self.pageNumber: params = { "OPERATION-NAME":self.service, "SECURITY-APPNAME":"scottbea-xlister-PRD-6796e0ff6-14862949", "SERVICE-VERSION":"1.13.0", "RESPONSE-DATA-FORMAT":"JSON", "categoryId":category_id, "paginationInput.entriesPerPage":"100", "paginationInput.PageNumber":i } response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1", params=params) data = response.json() return data def get_ids_from_cats(self): ''' Creates a 20-itemId list to use for the ShoppingApi call ''' data = self.get_data() itemid_results_list = [] try:# TODO run pdb here to see how to extract itemId before update_df training = pd.read_csv('training.csv') for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']: if item not in training.values: itemid_results_list.append(item['itemId'][0]) # itemId # values are in lists for some reason except (pd.errors.EmptyDataError, FileNotFoundError): for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']: itemid_results_list.append(item['itemId'][0]) item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0, len(itemid_results_list), 20))] return item_id_results class ShoppingApi: ''' Creates objects from ShoppingApi service calls that can interact with pandas dataframes ''' def get_item_from_findItemsByCategory(self, item_id_results): ''' Gets raw JSON data from multiple live listings given multiple itemIds ''' for twenty_id in item_id_results: params = { "callname":"GetMultipleItems", "appid":"scottbea-xlister-PRD-6796e0ff6-14862949", "version":"671", "responseencoding":"JSON", "ItemID":twenty_id, "IncludeSelector":"ItemSpecifics", } response = requests.get("https://open.api.ebay.com/shopping?", params=params) data = response.json() return data # TODO save data as file?? class CurateData: ''' Contains functions for curating data for machine learning training sets ''' def update_df(self, data): ''' Extracts itemIds and name-value list , creates new dict and appends df ''' names = [] values = [] nvl = data['Item'][0]['ItemSpecifics']['NameValueList'][0]# TODO is this only for one item? for nvl_dict in nvl: names.append(nvl_dict['Name']) values.append(nvl_dict['Value']) # TODO Also append itemId and value to the dictionary nvl_dict = dict(zip(names, values)) data.update(nvl_dict) df = pd.json_normalize(data) df.to_csv('training.csv', mode='a') def main(): ''' Main program creates/updates a csv file to use for ML training from live ebay listings ''' service, pageNumber = input('service and pageNumber:').split() service = int(service) pageNumber = int(pageNumber) finding = FindingApi(service, pageNumber) item_id_results = finding.get_ids_from_cats() shopping = ShoppingApi() data = shopping.get_item_from_findItemsByCategory(item_id_results) curate = CurateData() curate.update_df(data) if __name__ == "__main__": main() # Limited to 5000 calls to shopping api per day, and getMultpileitems service maxes out at 20 items # per call leaving you 100,000 items per day for you pandas dataframe initially. So you'll have # to divide these up into the categories. This will leave you with about 6.25K results per cat. # More than enough data for your dataset. # Need to make sure dataframe gets important stuff outside of nvl in order to # access values for cross referencing itemIds from calls # Need to decide if list gets accessed from df or if you're just going to have # list contents extracted and possibly placed into separate cells/labels