ebay-ml-lister/ebay_api.py

import json
import requests
import pandas as pd

class FindingApi:
    '''Some docstring to get rid of linting errors'''
    def __init__(self, service, pageNumber):
        self.service = [
            'findItemsAdvanced', 'findCompletedItems',
            'findItemsByKeywords', 'findItemsIneBayStores', 'findItemsByCategory',
            'findItemsByProduct'
            ][service]
        self.pageNumber = list(range(1, pageNumber)) # 64 pages is recommended
        # this will give equal weights to cats given call restraints

    # departments = ["3034","93427"] (womens and mens)
    def get_data(self):
        '''# Gets raw JSON data fom FindingApi service call
        '''
        with open('cat_list.txt') as jf:
            cat_list = json.load(jf)
        for category_id in cat_list:
            for i in self.pageNumber:
                params = {
                    "OPERATION-NAME":self.service,
                    "SECURITY-APPNAME":"scottbea-xlister-PRD-6796e0ff6-14862949",
                    "SERVICE-VERSION":"1.13.0",
                    "RESPONSE-DATA-FORMAT":"JSON",
                    "categoryId":category_id,
                    "paginationInput.entriesPerPage":"100",
                    "paginationInput.PageNumber":i
                    }
                response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1",
                        params=params)
                data = response.json()
            return data

    def get_ids_from_cats(self):
        '''
        Creates a 20-itemId list to use for the ShoppingApi
        call
        '''
        data = self.get_data()
        itemid_results_list = []
        try:
            training = pd.read_csv('training.csv')
            for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
                if item not in training.values:
                    itemid_results_list.append(item['itemId'][0]) # itemId
                    # values are in lists for some reason
        except pd.errors.EmptyDataError:
            for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
                itemid_results_list.append(item['itemId'][0])
        item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0,
            len(itemid_results_list), 20))]

        return item_id_results

class ShoppingApi:
    '''
    Creates objects from ShoppingApi service calls that can interact with
    pandas dataframes
    '''
    def get_item_from_findItemsByCategory(self, item_id_results):
        '''
        Gets raw JSON data from multiple live listings given multiple itemIds
        '''
        for twenty_id in item_id_results:
            params = {
                "callname":"GetMultipleItems",
                "appid":"scottbea-xlister-PRD-6796e0ff6-14862949",
                "version":"671",
                "responseencoding":"JSON",
                "ItemID":twenty_id,
                "IncludeSelector":"ItemSpecifics",
                }

            response = requests.get("https://open.api.ebay.com/shopping?", params=params)
            data = response.json()
            return data

class CurateData:
    '''
    Contains functions for curating data for machine learning training sets
    '''
    def update_df(self, data):
        names = []
        values = []
        nvl = data['Item'][0]['ItemSpecifics']['NameValueList'][0]

        for nvl_dict in nvl:
            names.append(nvl_dict['Name'])
            values.append(nvl_dict['Value']) # TODO Try to excract value from list here

        nvl_dict = dict(zip(names, values))
        data.update(nvl_dict)
        df = pd.json_normalize(data)
        df.to_csv('training.csv', mode='a')

def main():
    '''
    Main program creates/updates a csv file to use for ML training from live
    ebay listings
    '''
    service, pageNumber = input('service and pageNumber:').split()
    finding = FindingApi(service, pageNumber)
    item_id_results = finding.get_ids_from_cats()
    shopping = ShoppingApi()
    data = shopping.get_item_from_findItemsByCategory(item_id_results)
    curate = CurateData()
    curate.update_df(data)

if __name__ == "__main__":
    main()
# Limited to 5000 calls to shopping api per day, and getMultpileitems service maxes out at 20 items
# per call leaving you 100,000 items per day for you pandas dataframe initially. So you'll have
# to divide these up into the categories. This will leave you with about 6.25K results per cat.
# More than enough data for your dataset.

# Need to make sure dataframe gets important stuff outside of nvl in order to
# access values for cross referencing itemIds from calls
# Need to decide if list gets accessed from df or if you're just going to have
# list contents extracted and possibly placed into separate cells/labels
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00			`import json`
			`import requests`
			`import pandas as pd`

			`class FindingApi:`
			`'''Some docstring to get rid of linting errors'''`
corrected pageNumber and service params. Need to fix typeError on line 24-27 2020-10-12 18:48:15 +00:00			`def __init__(self, service, pageNumber):`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00			`self.service = [`
			`'findItemsAdvanced', 'findCompletedItems',`
			`'findItemsByKeywords', 'findItemsIneBayStores', 'findItemsByCategory',`
			`'findItemsByProduct'`
corrected pageNumber and service params. Need to fix typeError on line 24-27 2020-10-12 18:48:15 +00:00			`][service]`
			`self.pageNumber = list(range(1, pageNumber)) # 64 pages is recommended`
			`# this will give equal weights to cats given call restraints`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00
			`# departments = ["3034","93427"] (womens and mens)`
added get_data method for easier debugging. Added zeroth index term to ...['itemId'] 2020-10-13 00:55:07 +00:00			`def get_data(self):`
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`'''# Gets raw JSON data fom FindingApi service call`
			`'''`
			`with open('cat_list.txt') as jf:`
			`cat_list = json.load(jf)`
added get_data method for easier debugging. Added zeroth index term to ...['itemId'] 2020-10-13 00:55:07 +00:00			`for category_id in cat_list:`
			`for i in self.pageNumber:`
			`params = {`
added get_data method call to get_ids_from_cats call 2020-10-13 01:42:57 +00:00			`"OPERATION-NAME":self.service,`
added get_data method for easier debugging. Added zeroth index term to ...['itemId'] 2020-10-13 00:55:07 +00:00			`"SECURITY-APPNAME":"scottbea-xlister-PRD-6796e0ff6-14862949",`
			`"SERVICE-VERSION":"1.13.0",`
			`"RESPONSE-DATA-FORMAT":"JSON",`
			`"categoryId":category_id,`
			`"paginationInput.entriesPerPage":"100",`
added get_data method call to get_ids_from_cats call 2020-10-13 01:42:57 +00:00			`"paginationInput.PageNumber":i`
added get_data method for easier debugging. Added zeroth index term to ...['itemId'] 2020-10-13 00:55:07 +00:00			`}`
			`response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1",`
			`params=params)`
			`data = response.json()`
Changed to_csv mode to append to allow for writing if not present and appending if present 2020-10-18 20:56:16 +00:00			`return data`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00
			`def get_ids_from_cats(self):`
corrected pageNumber and service params. Need to fix typeError on line 24-27 2020-10-12 18:48:15 +00:00			`'''`
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`Creates a 20-itemId list to use for the ShoppingApi`
corrected pageNumber and service params. Need to fix typeError on line 24-27 2020-10-12 18:48:15 +00:00			`call`
			`'''`
added CurateData class for preprocessing pipeline fucntionality 2020-10-17 23:21:11 +00:00			`data = self.get_data()`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00			`itemid_results_list = []`
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`try:`
Changed to_csv mode to append to allow for writing if not present and appending if present 2020-10-18 20:56:16 +00:00			`training = pd.read_csv('training.csv')`
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:`
Changed to_csv mode to append to allow for writing if not present and appending if present 2020-10-18 20:56:16 +00:00			`if item not in training.values:`
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`itemid_results_list.append(item['itemId'][0]) # itemId`
			`# values are in lists for some reason`
			`except pd.errors.EmptyDataError:`
			`for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:`
Changed to_csv mode to append to allow for writing if not present and appending if present 2020-10-18 20:56:16 +00:00			`itemid_results_list.append(item['itemId'][0])`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00			`item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0,`
			`len(itemid_results_list), 20))]`

			`return item_id_results`

changed big_data names to training 2020-10-18 07:08:04 +00:00			`class ShoppingApi:`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00			`'''`
corrected pageNumber and service params. Need to fix typeError on line 24-27 2020-10-12 18:48:15 +00:00			`Creates objects from ShoppingApi service calls that can interact with`
			`pandas dataframes`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00			`'''`
			`def get_item_from_findItemsByCategory(self, item_id_results):`
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`'''`
Changed to_csv mode to append to allow for writing if not present and appending if present 2020-10-18 20:56:16 +00:00			`Gets raw JSON data from multiple live listings given multiple itemIds`
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`'''`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00			`for twenty_id in item_id_results:`
			`params = {`
			`"callname":"GetMultipleItems",`
			`"appid":"scottbea-xlister-PRD-6796e0ff6-14862949",`
			`"version":"671",`
			`"responseencoding":"JSON",`
			`"ItemID":twenty_id,`
			`"IncludeSelector":"ItemSpecifics",`
			`}`

			`response = requests.get("https://open.api.ebay.com/shopping?", params=params)`
			`data = response.json()`
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`return data`
Changed to_csv mode to append to allow for writing if not present and appending if present 2020-10-18 20:56:16 +00:00
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`class CurateData:`
			`'''`
			`Contains functions for curating data for machine learning training sets`
			`'''`
			`def update_df(self, data):`
			`names = []`
			`values = []`
			`nvl = data['Item'][0]['ItemSpecifics']['NameValueList'][0]`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`for nvl_dict in nvl:`
			`names.append(nvl_dict['Name'])`
Changed to_csv mode to append to allow for writing if not present and appending if present 2020-10-18 20:56:16 +00:00			`values.append(nvl_dict['Value']) # TODO Try to excract value from list here`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`nvl_dict = dict(zip(names, values))`
			`data.update(nvl_dict)`
			`df = pd.json_normalize(data)`
Changed to_csv mode to append to allow for writing if not present and appending if present 2020-10-18 20:56:16 +00:00			`df.to_csv('training.csv', mode='a')`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`def main():`
			`'''`
			`Main program creates/updates a csv file to use for ML training from live`
			`ebay listings`
			`'''`
			`service, pageNumber = input('service and pageNumber:').split()`
			`finding = FindingApi(service, pageNumber)`
			`item_id_results = finding.get_ids_from_cats()`
			`shopping = ShoppingApi()`
			`data = shopping.get_item_from_findItemsByCategory(item_id_results)`
			`curate = CurateData()`
			`curate.update_df(data)`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00
added error handling for initial/empty csv file 2020-10-18 00:22:45 +00:00			`if __name__ == "__main__":`
			`main()`
comment before adding pagenumber and service to init 2020-10-12 07:53:29 +00:00			`# Limited to 5000 calls to shopping api per day, and getMultpileitems service maxes out at 20 items`
			`# per call leaving you 100,000 items per day for you pandas dataframe initially. So you'll have`
			`# to divide these up into the categories. This will leave you with about 6.25K results per cat.`
			`# More than enough data for your dataset.`

added CurateData class for preprocessing pipeline fucntionality 2020-10-17 23:21:11 +00:00			`# Need to make sure dataframe gets important stuff outside of nvl in order to`
			`# access values for cross referencing itemIds from calls`
			`# Need to decide if list gets accessed from df or if you're just going to have`
			`# list contents extracted and possibly placed into separate cells/labels`