commit before push

2021-06-10 01:53:36 -07:00
parent fc7449827c
commit 2c5b878977
1 changed files with 32 additions and 23 deletions
--- a/ebay_api.py
+++ b/ebay_api.py
@@ -52,7 +52,7 @@ class FindingApi:
            "paginationInput.PageNumber":i,
            "itemFilter(0).name":"Condition",
            "itemFilter(0).value":"Used", # recommended is conditionId instead but for some reason that doesn't work either
-            # but may not be necessary anyways if you can eleminate dupes
+            # but may not be necessary anyways if you can eleminate dupes. TODO Still need to fix to work. Results are likely better than new items w/ shitty brands and pics
            "sortOrder":"StartTimeNewest",
            "itemFilter.name":"HideDuplicateItems", # this is working
            "itemFilter.value":'true'
@@ -190,7 +190,7 @@ class ShoppingApi:
        try:
            with open('raw_data.txt') as f:
                data = json.load(f)
-        except (FileNotFoundError, ValueError):
+        except (FileNotFoundError, ValueError): # TODO not catching error
            data = []

        service_dict = {
@@ -382,35 +382,44 @@ class CurateData:

            expanded_dropd = self.extract_df(expanded_dropd) # convert lists to values

+            temp_pics_source_list = list(set(expanded_class.PictureURL.to_list())) # TODO because var is del after dl_pictures you may be
+            # getting duplicate pictures. ie, expanded_class.PictureURL is a master series and will write temp_pics_source_list as such
+            # giving you many repeated pictureURLs (they will not get downloaded due to check @ dl_pic but checking will cont to grow in 
+            # computate power reqs. So, figure out a way to make a true temp list based on the current call executed
+
+        else:
+            class_training['PictureURL'] = class_training['PictureURL'].apply(lambda x: x[0])
+            expanded_class = class_training
+            dropd['PictureURL'] = dropd['PictureURL'].apply(lambda x: x[0])
+            expanded_dropd = dropd
+
+            expanded_dropd = self.extract_df(expanded_dropd) # convert lists to values
            temp_pics_source_list = list(set(expanded_class.PictureURL.to_list()))

-
-            try:
-                with open('temp_pics_source_list.txt') as f:
-                    tpsl = json.load(f)
-                    tpsl.extend(temp_pics_source_list)
-                    temp_pics_source_list = list(set(tpsl))
-                    with open('temp_pics_source_list.txt', 'w') as f:
-                        json.dump(temp_pics_source_list, f)
-            except (ValueError, FileNotFoundError):
+        try:
+            with open('temp_pics_source_list.txt') as f:
+                tpsl = json.load(f)
+                tpsl.extend(temp_pics_source_list)
+                temp_pics_source_list = list(set(tpsl))
                with open('temp_pics_source_list.txt', 'w') as f:
                    json.dump(temp_pics_source_list, f)
+        except (ValueError, FileNotFoundError):
+            with open('temp_pics_source_list.txt', 'w') as f:
+                json.dump(temp_pics_source_list, f)

-            # Append to master training dataframes, drop potential dupes and save
+        # Append to master training dataframes, drop potential dupes and save

-            expanded_class.to_csv('expanded_class.csv')
-            # expanded_class = pd.read_csv('expanded_class.csv', index_col=0)
-            # expanded_class.drop_duplicates(subset=['PictureURL']).reset_index(drop=True)
-            # expanded_class.to_csv('expanded_class.csv', mode='a', encoding='utf-8') # TODO see line 235 about views and copies
+        expanded_class.to_csv('expanded_class.csv')
+        # expanded_class = pd.read_csv('expanded_class.csv', index_col=0)
+        # expanded_class.drop_duplicates(subset=['PictureURL']).reset_index(drop=True)
+        # expanded_class.to_csv('expanded_class.csv', mode='a', encoding='utf-8') # TODO see line 235 about views and copies

-            expanded_dropd.to_csv('expanded_dropd.csv')
-            # expanded_dropd = pd.read_csv('expanded_dropd.csv', index_col=0)
-            # expanded_dropd.drop_duplicates(subset=['PictureURL']).reset_index(drop=True)
-            # expanded_dropd.to_csv('expanded_dropd.csv', mode='a', encoding='utf-8')
+        expanded_dropd.to_csv('expanded_dropd.csv')
+        # expanded_dropd = pd.read_csv('expanded_dropd.csv', index_col=0)
+        # expanded_dropd.drop_duplicates(subset=['PictureURL']).reset_index(drop=True)
+        # expanded_dropd.to_csv('expanded_dropd.csv', mode='a', encoding='utf-8')

-            return expanded_class, expanded_dropd
-        else:
-            return
+        return expanded_class, expanded_dropd

    def dl_pictures(self, *args):
        '''