ebay-ml-lister/ebay_api.py

import json
import requests
import pandas as pd

class FindingApi:
    '''Some docstring to get rid of linting errors'''
    def __init__(self, service, pageNumber):
        self.service = [
            'findItemsAdvanced', 'findCompletedItems',
            'findItemsByKeywords', 'findItemsIneBayStores', 'findItemsByCategory',
            'findItemsByProduct'
            ][service]
        self.pageNumber = list(range(1, pageNumber)) # 64 pages is recommended
        # this will give equal weights to cats given call restraints

    # departments = ["3034","93427"] (womens and mens)
    def get_data(self):
        '''# Gets raw JSON data fom FindingApi service call
        '''
        with open('cat_list.txt') as jf:
            cat_list = json.load(jf)
        for category_id in cat_list:
            for i in self.pageNumber:
                params = {
                    "OPERATION-NAME":self.service,
                    "SECURITY-APPNAME":"scottbea-xlister-PRD-6796e0ff6-14862949",
                    "SERVICE-VERSION":"1.13.0",
                    "RESPONSE-DATA-FORMAT":"JSON",
                    "categoryId":category_id,
                    "paginationInput.entriesPerPage":"100",
                    "paginationInput.PageNumber":i
                    }
                response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1",
                        params=params)
                data = response.json()
            return data

    def get_ids_from_cats(self):
        '''
        Creates a 20-itemId list to use for the ShoppingApi
        call
        '''
        data = self.get_data()
        itemid_results_list = []
        try:# TODO run pdb here to see how to extract itemId before update_df
            training = pd.read_csv('training.csv')
            for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
                if item not in training.values not in itemid_results_list:# TODO need to figure out syntax to check if item_id is present in both list and training set. (list to avoid repeat results)
                    itemid_results_list.append(item['itemId'][0]) # itemId
                    # values are in lists for some reason
        except (pd.errors.EmptyDataError, FileNotFoundError):
            for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
                if item not in itemid_results_list:
                    itemid_results_list.append(item['itemId'][0])
        item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0,
            len(itemid_results_list), 20))]

        return item_id_results

class ShoppingApi:
    '''
    Creates objects from ShoppingApi service calls that can interact with
    pandas dataframes
    '''
    def get_item_from_findItemsByCategory(self, item_id_results):
        '''
        Gets raw JSON data from multiple live listings given multiple itemIds
        '''
        for twenty_id in item_id_results:
            params = {
                "callname":"GetMultipleItems",
                "appid":"scottbea-xlister-PRD-6796e0ff6-14862949",
                "version":"671",
                "responseencoding":"JSON",
                "ItemID":twenty_id,
                "IncludeSelector":"ItemSpecifics",
                }

            response = requests.get("https://open.api.ebay.com/shopping?", params=params)
            data = response.json()
            return data # TODO save data as file??

class CurateData:
    '''
    Contains functions for curating data for machine learning training sets
    '''
    def update_df(self, data):
        '''
        Extracts itemIds and name-value list , creates new dict and appends df
        '''
        for item in data:
            names = []
            values = []
            nvl = data['Item'][0]['ItemSpecifics']['NameValueList']
            for nvl_dict in nvl:
                names.append(nvl_dict['Name'])
                values.append(nvl_dict['Value'])
            # TODO Also append itemId and value to the dictionary somewhere

            nvl_dict = dict(zip(names, values))
            data.update(nvl_dict)
        df = pd.json_normalize(data)
        df.to_csv('training.csv', mode='a')

def main():
    '''
    Main program creates/updates a csv file to use for ML training from live
    ebay listings
    '''
    service, pageNumber = input('service and pageNumber:').split()
    service = int(service)
    pageNumber = int(pageNumber)
    finding = FindingApi(service, pageNumber)
    item_id_results = finding.get_ids_from_cats()
    shopping = ShoppingApi()
    data = shopping.get_item_from_findItemsByCategory(item_id_results)
    curate = CurateData()
    curate.update_df(data)

if __name__ == "__main__":
    main()
# Limited to 5000 calls to shopping api per day, and getMultpileitems service maxes out at 20 items
# per call leaving you 100,000 items per day for you pandas dataframe initially. So you'll have
# to divide these up into the categories. This will leave you with about 6.25K results per cat.
# More than enough data for your dataset.

# Need to make sure dataframe gets important stuff outside of nvl in order to
# access values for cross referencing itemIds from calls
# Need to decide if list gets accessed from df or if you're just going to have
# list contents extracted and possibly placed into separate cells/labels
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00			`import json`
			`import requests`
			`import pandas as pd`

			`class FindingApi:`
			`'''Some docstring to get rid of linting errors'''`
corrected pageNumber and service params. Need to fix typeError on line 24-27 2020-10-12 18:48:15 +00:00			`def __init__(self, service, pageNumber):`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00			`self.service = [`
			`'findItemsAdvanced', 'findCompletedItems',`
			`'findItemsByKeywords', 'findItemsIneBayStores', 'findItemsByCategory',`
			`'findItemsByProduct'`
corrected pageNumber and service params. Need to fix typeError on line 24-27 2020-10-12 18:48:15 +00:00			`][service]`
			`self.pageNumber = list(range(1, pageNumber)) # 64 pages is recommended`
			`# this will give equal weights to cats given call restraints`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00
			`# departments = ["3034","93427"] (womens and mens)`
added get_data method for easier debugging. Added zeroth index term to ...['itemId'] 2020-10-13 00:55:07 +00:00			`def get_data(self):`
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`'''# Gets raw JSON data fom FindingApi service call`
			`'''`
			`with open('cat_list.txt') as jf:`
			`cat_list = json.load(jf)`
added get_data method for easier debugging. Added zeroth index term to ...['itemId'] 2020-10-13 00:55:07 +00:00			`for category_id in cat_list:`
			`for i in self.pageNumber:`
			`params = {`
added get_data method call to get_ids_from_cats call 2020-10-13 01:42:57 +00:00			`"OPERATION-NAME":self.service,`
added get_data method for easier debugging. Added zeroth index term to ...['itemId'] 2020-10-13 00:55:07 +00:00			`"SECURITY-APPNAME":"scottbea-xlister-PRD-6796e0ff6-14862949",`
			`"SERVICE-VERSION":"1.13.0",`
			`"RESPONSE-DATA-FORMAT":"JSON",`
			`"categoryId":category_id,`
			`"paginationInput.entriesPerPage":"100",`
added get_data method call to get_ids_from_cats call 2020-10-13 01:42:57 +00:00			`"paginationInput.PageNumber":i`
added get_data method for easier debugging. Added zeroth index term to ...['itemId'] 2020-10-13 00:55:07 +00:00			`}`
			`response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1",`
			`params=params)`
			`data = response.json()`
Changed to_csv mode to append to allow for writing if not present and appending if present 2020-10-18 20:56:16 +00:00			`return data`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00
			`def get_ids_from_cats(self):`
corrected pageNumber and service params. Need to fix typeError on line 24-27 2020-10-12 18:48:15 +00:00			`'''`
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`Creates a 20-itemId list to use for the ShoppingApi`
corrected pageNumber and service params. Need to fix typeError on line 24-27 2020-10-12 18:48:15 +00:00			`call`
			`'''`
added CurateData class for preprocessing pipeline fucntionality 2020-10-17 23:21:11 +00:00			`data = self.get_data()`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00			`itemid_results_list = []`
had to convert inputs to ints and correct filenotfounderror in get_ids_from_cats fuction 2020-10-18 22:32:17 +00:00			`try:# TODO run pdb here to see how to extract itemId before update_df`
Changed to_csv mode to append to allow for writing if not present and appending if present 2020-10-18 20:56:16 +00:00			`training = pd.read_csv('training.csv')`
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:`
corrected update_df to iterate over items and nvl kv pairs. and began check for repeat results 2020-10-24 10:36:31 +00:00			`if item not in training.values not in itemid_results_list:# TODO need to figure out syntax to check if item_id is present in both list and training set. (list to avoid repeat results)`
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`itemid_results_list.append(item['itemId'][0]) # itemId`
			`# values are in lists for some reason`
had to convert inputs to ints and correct filenotfounderror in get_ids_from_cats fuction 2020-10-18 22:32:17 +00:00			`except (pd.errors.EmptyDataError, FileNotFoundError):`
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:`
corrected update_df to iterate over items and nvl kv pairs. and began check for repeat results 2020-10-24 10:36:31 +00:00			`if item not in itemid_results_list:`
			`itemid_results_list.append(item['itemId'][0])`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00			`item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0,`
			`len(itemid_results_list), 20))]`

			`return item_id_results`

changed big_data names to training 2020-10-18 07:08:04 +00:00			`class ShoppingApi:`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00			`'''`
corrected pageNumber and service params. Need to fix typeError on line 24-27 2020-10-12 18:48:15 +00:00			`Creates objects from ShoppingApi service calls that can interact with`
			`pandas dataframes`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00			`'''`
			`def get_item_from_findItemsByCategory(self, item_id_results):`
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`'''`
Changed to_csv mode to append to allow for writing if not present and appending if present 2020-10-18 20:56:16 +00:00			`Gets raw JSON data from multiple live listings given multiple itemIds`
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`'''`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00			`for twenty_id in item_id_results:`
			`params = {`
			`"callname":"GetMultipleItems",`
			`"appid":"scottbea-xlister-PRD-6796e0ff6-14862949",`
			`"version":"671",`
			`"responseencoding":"JSON",`
			`"ItemID":twenty_id,`
			`"IncludeSelector":"ItemSpecifics",`
			`}`

			`response = requests.get("https://open.api.ebay.com/shopping?", params=params)`
			`data = response.json()`
had to convert inputs to ints and correct filenotfounderror in get_ids_from_cats fuction 2020-10-18 22:32:17 +00:00			`return data # TODO save data as file??`
Changed to_csv mode to append to allow for writing if not present and appending if present 2020-10-18 20:56:16 +00:00
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`class CurateData:`
			`'''`
			`Contains functions for curating data for machine learning training sets`
			`'''`
			`def update_df(self, data):`
had to convert inputs to ints and correct filenotfounderror in get_ids_from_cats fuction 2020-10-18 22:32:17 +00:00			`'''`
			`Extracts itemIds and name-value list , creates new dict and appends df`
			`'''`
corrected update_df to iterate over items and nvl kv pairs. and began check for repeat results 2020-10-24 10:36:31 +00:00			`for item in data:`
			`names = []`
			`values = []`
			`nvl = data['Item'][0]['ItemSpecifics']['NameValueList']`
			`for nvl_dict in nvl:`
			`names.append(nvl_dict['Name'])`
			`values.append(nvl_dict['Value'])`
			`# TODO Also append itemId and value to the dictionary somewhere`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00
corrected update_df to iterate over items and nvl kv pairs. and began check for repeat results 2020-10-24 10:36:31 +00:00			`nvl_dict = dict(zip(names, values))`
			`data.update(nvl_dict)`
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`df = pd.json_normalize(data)`
Changed to_csv mode to append to allow for writing if not present and appending if present 2020-10-18 20:56:16 +00:00			`df.to_csv('training.csv', mode='a')`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`def main():`
			`'''`
			`Main program creates/updates a csv file to use for ML training from live`
			`ebay listings`
			`'''`
			`service, pageNumber = input('service and pageNumber:').split()`
had to convert inputs to ints and correct filenotfounderror in get_ids_from_cats fuction 2020-10-18 22:32:17 +00:00			`service = int(service)`
			`pageNumber = int(pageNumber)`
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`finding = FindingApi(service, pageNumber)`
			`item_id_results = finding.get_ids_from_cats()`
			`shopping = ShoppingApi()`
			`data = shopping.get_item_from_findItemsByCategory(item_id_results)`
			`curate = CurateData()`
			`curate.update_df(data)`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`if __name__ == "__main__":`
			`main()`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00			`# Limited to 5000 calls to shopping api per day, and getMultpileitems service maxes out at 20 items`
			`# per call leaving you 100,000 items per day for you pandas dataframe initially. So you'll have`
			`# to divide these up into the categories. This will leave you with about 6.25K results per cat.`
			`# More than enough data for your dataset.`

added CurateData class for preprocessing pipeline fucntionality 2020-10-17 23:21:11 +00:00			`# Need to make sure dataframe gets important stuff outside of nvl in order to`
			`# access values for cross referencing itemIds from calls`
			`# Need to decide if list gets accessed from df or if you're just going to have`
			`# list contents extracted and possibly placed into separate cells/labels`