From c8ab1b13d9ae7fde2e1ee9648d77d0d00c6f783a Mon Sep 17 00:00:00 2001
From: spbeach46 <spbeacg46@gmail.com>
Date: Mon, 10 May 2021 15:14:57 -0700
Subject: [PATCH] Added method to update category IDs var and txt file

---
 ebay_api.py | 98 ++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 75 insertions(+), 23 deletions(-)

diff --git a/ebay_api.py b/ebay_api.py
index 59a47de..15665ae 100644
--- a/ebay_api.py
+++ b/ebay_api.py
@@ -11,7 +11,10 @@ import shutil
 import re
 
 class FindingApi:
-    '''Methods for accessing eBays FindingApi services'''
+    '''
+    Methods for accessing eBay's FindingApi services
+    '''
+
     def __init__(self, service, pageNumber):
         self.service = [
             'findItemsAdvanced', 'findCompletedItems',
@@ -20,13 +23,45 @@ class FindingApi:
             ][service]
         self.pageNumber = list(range(1, pageNumber)) # 77 pgs will give equal weights to cats given call constraints
 
-    # departments = ["3034","93427"] (womens and mens)
-
     # examples of additional params you may want to add:
     # 'itemFilter(0).value':'Used'
     # 'itemFilter(1).name':'ListingType'
     # 'itemFilter(1).value':'AuctionWithBIN'
 
+    def update_cats(self):
+
+        parent_cats = ['3034', '93427']
+        cat_list = []
+
+        for department in parent_cats:
+
+            params = {
+                "callname":"GetCategoryInfo",
+                "appid":cfg.sec['SECURITY-APPNAME'],
+                "version":"671",
+                "responseencoding":"JSON",
+                "CategoryID":department,
+                "IncludeSelector":"ChildCategories",
+                }
+
+            try:
+                response = requests.get("https://open.api.ebay.com/shopping?", params=params, timeout=1)
+                response.raise_for_status()
+
+            except requests.exceptions.RequestException:
+                print('connection error')
+
+            response = response.json()
+            response = response['CategoryArray']['Category'][1:]
+            temp_cat_list = [cat['CategoryID'] for cat in response]
+            cat_list.extend(temp_cat_list)
+
+            with open('cat_list.txt', 'w') as f:
+                json.dump(cat_list, f)
+
+            # leaf_list = [node['LeafCategory'] for node in response]
+        return cat_list
+
     def get_data(self, category_id, i):
 
         '''
@@ -127,7 +162,7 @@ class ShoppingApi:
             response = requests.get("https://open.api.ebay.com/shopping?", params=params, timeout=1)
             response.raise_for_status()
 
-        except requests.exceptions.RequestException:
+        except requests.exceptions.RequestException: # TODO need better handling
             print('connection error')
 
         response = response.json()
@@ -137,8 +172,8 @@ class ShoppingApi:
 
     def conky(self):
         '''
-        For some reason item_id_results can only be passed as argument in executor.map
-        if the variable is made within function
+        Runs get_item_from_findItemsByCategory in multiple threads to get relevant
+        data for creating training sets
         '''
         data = []
         finding = FindingApi(4, 2) # TODO replace these test values before production
@@ -157,7 +192,7 @@ class ShoppingApi:
 
 class CurateData:
     '''
-    Contains functions for curating data for machine learning training sets;
+    Contains methods for curating data for machine learning training sets;
     Takes item in data from ShoppingApi request as argument and extracts/ creates key
     value pairs that gets updated to custom dataframe used in Ml training sets.
     '''
@@ -220,7 +255,7 @@ class CurateData:
 
         return extracted_df
 
-    def drop_nvl_cols(self, nvl_training):
+    def drop_nvl_cols(self, nvl_training): # NOTE this is wonky
         col_drop = [
                 'Fabric Type', 'Type of Sport', 'Mid Sole', 'Modified Item',
                 'Modification Description', 'Article Type', 'Customized',
@@ -281,15 +316,16 @@ class CurateData:
 
         try:
             with open('temp_pics_source_list.txt') as f:
-                temp_pics_source_list = json.load(f)
-                temp_pics_source_list.append(temp_pics_source_list)
-                temp_pics_source_list = list(set(temp_pics_source_list))
+                tpsl = json.load(f)
+                tpsl.extend(temp_pics_source_list)
+                temp_pics_source_list = list(set(tpsl))
                 with open('temp_pics_source_list.txt', 'w') as f:
                     json.dump(temp_pics_source_list, f)
         except (ValueError, FileNotFoundError):
             with open('temp_pics_source_list.txt', 'w') as f:
                 json.dump(temp_pics_source_list, f)
 
+        # TODO still need to save these as csv files
         return expanded_class, expanded_dropd
 
     def dl_pictures(self, *args):
@@ -301,16 +337,16 @@ class CurateData:
         try:
             with open('target_dirs.txt', 'r+') as f: # TODO you can add option to change directory here, too. Look up how to have optional arguments
                 target_dir = json.load(f)
-
         except (ValueError, FileNotFoundError):
-            target_dir = input('No target dirctory found. Create One? [y] or [n]:')
+            target_dir = input('No target dirctory found. Create One? [y] or [n]:') 
             if target_dir == ('y' or 'Y'):
-                target_dir = input('Please provide full URL to destination folder:')
-                with open('target_dirs.txt','w+') as f:
+                target_dir = input('Please provide full URL to destination folder:') # TODO need to catch human syntax errors here
+                with open('target_dirs.txt','w') as f:
                     json.dump(target_dir, f)
             else:
-                target_dir = os.mkdir(os.getcwd()+os.sep+'training_images')
-                with open('target_dirs.txt','w+') as f:
+                os.mkdir(os.getcwd()+os.sep+'training_images')
+                target_dir = os.getcwd()+os.sep+'training_images'
+                with open('target_dirs.txt','w') as f:
                     json.dump(target_dir, f)
                     print('Creating default folder in current directory @ ' + target_dir)
 
@@ -332,7 +368,7 @@ class CurateData:
         try:
             with open('dict_pics.txt') as f:
                 dict_pics = json.load(f)
-                dict_pics.update(temp_dict_pics)
+                dict_pics.update(temp_dict_pics) # TODO This still creates duplicates
             with open('dict_pics.txt', 'w') as f:
                 json.dump(dict_pics, f)
 
@@ -344,7 +380,8 @@ class CurateData:
         def dl_pic(dict_pics, pic):
 
             if os.path.exists(dict_pics[pic]): # or call temp_dict_pics[pic] can work
-                pass
+                pass # TODO This is not catching duplicates for some reason....possibly not? Upon inspection, files aren't duplicates...but why?
+            #TODO it would mean that temp_pics_source_list is changing for some reason?
 
             else:
                 r = requests.get(pic, stream=True)
@@ -352,16 +389,31 @@ class CurateData:
                 with open(temp_dict_pics[pic], 'wb') as  f: # Or call dict_pics[pic] can work
                     shutil.copyfileobj(r.raw, f)
 
-        breakpoint()
         bargs = [(dict_pics, pic) for pic in temp_pics_source_list]
         
         with concurrent.futures.ThreadPoolExecutor() as executor:
             for future in executor.map(lambda p: dl_pic(*p), bargs):
                 future
 
-        with open('temp_pics_source_list.txt','w') as f: # Overwrites old when complete
-            temp_pics_source_list = []
-            json.dump(temp_pics_source_list, f)
+        os.remove('temp_pics_source_list.txt') # Deletes file after downloads complete successfully
+
+class PreProcessing:
+    '''
+    Includes methods for pre-processing training set input and labels in the 
+    training set created from CurateData class. Whereas CurateData training
+    sets provided trimmed down data from the raw json response from the
+    ShoppingApi call and provided a bare minimum format for the dataframe to be
+    used in training, PreProcessing optimizes that dataframe for training and 
+    includes methods for image manipulation, creating test/train/validation
+    splits, etc. 
+    '''
+
+    def stt_training(self, dict_pics, expanded_class, expanded_dropd):
+        '''
+        Source to target training. Replaces source image URL with target URL
+        determined by values in dict_pics variable.
+        '''
+        pass
 
         # TODO pipeline gameplan: 5 files: master img download dict,raw_json.txt, raw_json.csv, master_class_training.csv, master_nvl_training.csv
         # cont... open raw_json.txt and append, same with csv --> process new data --> pull out image source+dest and expand new dfs for the additional pictures