From 5582cd29ef1dba57945a67a8f01360f04ca5f6cc Mon Sep 17 00:00:00 2001
From: spbeach46 <spbeacg46@gmail.com>
Date: Tue, 1 Jun 2021 22:28:54 -0700
Subject: [PATCH] working

---
 ebay_api.py | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/ebay_api.py b/ebay_api.py
index 7fdb368..92ede92 100644
--- a/ebay_api.py
+++ b/ebay_api.py
@@ -35,6 +35,10 @@ class FindingApi:
         get itemIDs from categories; 
         '''
 
+        '''
+        consider using the sortOrder param to update by the latest listings first.
+        Also consider using the exlude duplicates param
+        '''
         params = {
             "OPERATION-NAME":self.service,
             "SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'],
@@ -79,7 +83,8 @@ class FindingApi:
 
         for category_id in cat_list:
 
-            args = [(category_id, i) for i in pages]
+            args = [(category_id, i) for i in pages] # NOTE alternatively you can use args.extend(args) to create master list of tuples with all cats
+            # instead of running concurrent.futures.ThreadPoolExecutor in a loop. Might be faster
 
             with concurrent.futures.ThreadPoolExecutor() as executor:
                 for future in executor.map(lambda p: self.get_data(*p), args):
@@ -101,7 +106,7 @@ class FindingApi:
 
         item_id_results = list(set(itemid_results_list))
         item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0,
-            len(itemid_results_list), 20))]
+            len(itemid_results_list), 20))] # 20-ItemID list created to maximize dataset/decrease calls given call constraints
         return item_id_results
 
 # TODO during your try except conditionals just check the csv files. At the end you can create sets. You can creat another condition that says if the final set is smaller than 100k then you can call finding
@@ -339,10 +344,19 @@ class CurateData:
 
         user_input = input('drop or keep cols?:')
 
+        '''
+        dropping and or keeping/masking functions to create your filtered df below is 
+        producing errors due to some column lables in your predefined lists not being present.
+        Look at documentation to see if option exists to ignore items not present
+
+        keep col option is ideal due to users inputting crappy custom fields in
+        item specifics. Use this if you can
+        '''
         if 'keep' in user_input:
-            dropd = nvl_training.reindex([col_keep])
+            dropd = nvl_training.reindex([col_keep]) # TODO ERRORS HERE USING LOC OR REINDEX WITH MULTIPLE COL LABELS
         else:
-            dropd = nvl_training.drop(col_drop, axis=1)
+            dropd = nvl_training#.drop(col_drop, errors='ignore', axis=1) # errors='ignore' for non existent labels
+            
         return dropd
 
 # for future reference, to deal with inconsistent values in the nvl (due to sellers inputting custom values in the fields) you can drop either listings or k/v pairs that are unique which 
@@ -355,6 +369,8 @@ class CurateData:
         takes image url list from each cell and expands them into separate/duplicate
         instances. Modifies both class training and dropd dfs. Appends custom
         image url dict {'source':'target'}.
+        
+        * consider applying this function to other cells that have multiple values in their lists
         '''
         expanded_class = class_training.explode('PictureURL').reset_index(drop=True)
         expanded_class = expanded_class.dropna(subset=['PictureURL'])
@@ -366,8 +382,7 @@ class CurateData:
 
         expanded_dropd = self.extract_df(expanded_dropd) # convert lists to values
 
-        temp_pics_source_list = list(set(expanded_class.PictureURL.to_list())) # prolly need to create set long before df... immediately after Shopping or trading call
-        #: defined in the download function
+        temp_pics_source_list = list(set(expanded_class.PictureURL.to_list()))
 
 
         try:
@@ -445,7 +460,7 @@ class CurateData:
                 dict_pics = temp_dict_pics
 
         def dl_pic(dict_pics, pic):
-
+ 
             if os.path.exists(dict_pics[pic]): # or call temp_dict_pics[pic] can work
                 pass # TODO This is not catching duplicates for some reason....possibly not? Upon inspection, files aren't duplicates...but why?
             #TODO it would mean that temp_pics_source_list is changing for some reason?