added error handling for initial/empty csv file

2020-10-17 17:22:45 -07:00
parent ae5a4e92bb
commit 7c338e69d5
1 changed files with 49 additions and 20 deletions
--- a/ebay_api.py
+++ b/ebay_api.py
@@ -2,11 +2,6 @@ import json
 import requests
 import pandas as pd

-with open('cat_list.txt') as jf:
-    cat_list = json.load(jf)
-
-big_data = pd.read_csv('big_data.csv')
-
 class FindingApi:
    '''Some docstring to get rid of linting errors'''
    def __init__(self, service, pageNumber):
@@ -20,6 +15,10 @@ class FindingApi:

    # departments = ["3034","93427"] (womens and mens)
    def get_data(self):
+        '''# Gets raw JSON data fom FindingApi service call
+        '''
+        with open('cat_list.txt') as jf:
+            cat_list = json.load(jf)
        for category_id in cat_list:
            for i in self.pageNumber:
                params = {
@@ -34,19 +33,25 @@ class FindingApi:
                response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1",
                        params=params)
                data = response.json()
-            return data
+            return data # May want to save raw json as text file here or in main

    def get_ids_from_cats(self):
        '''
-        get_ids_from cats creates a 20-itemId list to use for the ShoppingApi
+        Creates a 20-itemId list to use for the ShoppingApi
        call
        '''
        data = self.get_data()
        itemid_results_list = []
-        for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
-            if item not in big_data.values:
+        try:
+            big_data = pd.read_csv('big_data.csv')
+            for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
+                if item not in big_data.values:
+                    itemid_results_list.append(item['itemId'][0]) # itemId
+                    # values are in lists for some reason
+        except pd.errors.EmptyDataError:
+            for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
                itemid_results_list.append(item['itemId'][0]) # itemId
-                # values are in lists for some reason
+                    # values are in lists for some reason
        item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0,
            len(itemid_results_list), 20))]

@@ -58,6 +63,9 @@ class ShoppingApi():
    pandas dataframes
    '''
    def get_item_from_findItemsByCategory(self, item_id_results):
+        '''
+        Gets raw JSON data from multiple live listings
+        '''
        for twenty_id in item_id_results:
            params = {
                "callname":"GetMultipleItems",
@@ -70,20 +78,41 @@ class ShoppingApi():

            response = requests.get("https://open.api.ebay.com/shopping?", params=params)
            data = response.json()
+            return data
+            # Maybe end def here and create new def for curating data
+class CurateData:
+    '''
+    Contains functions for curating data for machine learning training sets
+    '''
+    def update_df(self, data):
+        names = []
+        values = []
+        nvl = data['Item'][0]['ItemSpecifics']['NameValueList'][0]

-            names = []
-            values = []
-            nvl = data['Item'][0]['ItemSpecifics']['NameValueList'][0]
+        for nvl_dict in nvl:
+            names.append(nvl_dict['Name'])
+        values.append(nvl_dict['Value']) # Try to excract value from list here

-            for nvl_dict in nvl:
-                names.append(nvl_dict['Name'])
-            values.append(nvl_dict['Value']) # Try to excract value from list here
+        nvl_dict = dict(zip(names, values))
+        data.update(nvl_dict)
+        df = pd.json_normalize(data)
+        df.to_csv('big_data.csv')

-            nvl_dict = dict(zip(names, values))
-            data.update(nvl_dict)
-            df = pd.json_normalize(data)
-            df.to_csv('big_data.csv')
+def main():
+    '''
+    Main program creates/updates a csv file to use for ML training from live
+    ebay listings
+    '''
+    service, pageNumber = input('service and pageNumber:').split()
+    finding = FindingApi(service, pageNumber)
+    item_id_results = finding.get_ids_from_cats()
+    shopping = ShoppingApi()
+    data = shopping.get_item_from_findItemsByCategory(item_id_results)
+    curate = CurateData()
+    curate.update_df(data)

+if __name__ == "__main__":
+    main()
 # Limited to 5000 calls to shopping api per day, and getMultpileitems service maxes out at 20 items
 # per call leaving you 100,000 items per day for you pandas dataframe initially. So you'll have
 # to divide these up into the categories. This will leave you with about 6.25K results per cat.