working

2021-06-01 22:28:54 -07:00
parent 9fc00f4eab
commit 5582cd29ef
1 changed files with 22 additions and 7 deletions
--- a/ebay_api.py
+++ b/ebay_api.py
@@ -35,6 +35,10 @@ class FindingApi:
        get itemIDs from categories; 
        '''

+        '''
+        consider using the sortOrder param to update by the latest listings first.
+        Also consider using the exlude duplicates param
+        '''
        params = {
            "OPERATION-NAME":self.service,
            "SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'],
@@ -79,7 +83,8 @@ class FindingApi:

        for category_id in cat_list:

-            args = [(category_id, i) for i in pages]
+            args = [(category_id, i) for i in pages] # NOTE alternatively you can use args.extend(args) to create master list of tuples with all cats
+            # instead of running concurrent.futures.ThreadPoolExecutor in a loop. Might be faster

            with concurrent.futures.ThreadPoolExecutor() as executor:
                for future in executor.map(lambda p: self.get_data(*p), args):
@@ -101,7 +106,7 @@ class FindingApi:

        item_id_results = list(set(itemid_results_list))
        item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0,
-            len(itemid_results_list), 20))]
+            len(itemid_results_list), 20))] # 20-ItemID list created to maximize dataset/decrease calls given call constraints
        return item_id_results

 # TODO during your try except conditionals just check the csv files. At the end you can create sets. You can creat another condition that says if the final set is smaller than 100k then you can call finding
@@ -339,10 +344,19 @@ class CurateData:

        user_input = input('drop or keep cols?:')

+        '''
+        dropping and or keeping/masking functions to create your filtered df below is 
+        producing errors due to some column lables in your predefined lists not being present.
+        Look at documentation to see if option exists to ignore items not present
+
+        keep col option is ideal due to users inputting crappy custom fields in
+        item specifics. Use this if you can
+        '''
        if 'keep' in user_input:
-            dropd = nvl_training.reindex([col_keep])
+            dropd = nvl_training.reindex([col_keep]) # TODO ERRORS HERE USING LOC OR REINDEX WITH MULTIPLE COL LABELS
        else:
-            dropd = nvl_training.drop(col_drop, axis=1)
+            dropd = nvl_training#.drop(col_drop, errors='ignore', axis=1) # errors='ignore' for non existent labels
+            
        return dropd

 # for future reference, to deal with inconsistent values in the nvl (due to sellers inputting custom values in the fields) you can drop either listings or k/v pairs that are unique which 
@@ -355,6 +369,8 @@ class CurateData:
        takes image url list from each cell and expands them into separate/duplicate
        instances. Modifies both class training and dropd dfs. Appends custom
        image url dict {'source':'target'}.
+        
+        * consider applying this function to other cells that have multiple values in their lists
        '''
        expanded_class = class_training.explode('PictureURL').reset_index(drop=True)
        expanded_class = expanded_class.dropna(subset=['PictureURL'])
@@ -366,8 +382,7 @@ class CurateData:

        expanded_dropd = self.extract_df(expanded_dropd) # convert lists to values

-        temp_pics_source_list = list(set(expanded_class.PictureURL.to_list())) # prolly need to create set long before df... immediately after Shopping or trading call
-        #: defined in the download function
+        temp_pics_source_list = list(set(expanded_class.PictureURL.to_list()))


        try:
@@ -445,7 +460,7 @@ class CurateData:
                dict_pics = temp_dict_pics

        def dl_pic(dict_pics, pic):
-
+ 
            if os.path.exists(dict_pics[pic]): # or call temp_dict_pics[pic] can work
                pass # TODO This is not catching duplicates for some reason....possibly not? Upon inspection, files aren't duplicates...but why?
            #TODO it would mean that temp_pics_source_list is changing for some reason?