added function to create custom image url dictionary

2021-04-04 14:38:04 -07:00
parent 35100b7952
commit b205b44b50
2 changed files with 18 additions and 7 deletions
--- a/curate.py
+++ b/curate.py
@@ -12,7 +12,9 @@ class_training = curate.class_training(training)
 nvl_training = curate.nvl_training(training)
 dropd = curate.drop_nvl_cols(nvl_training)

-# expand_nvlclass(class_training, dropd)
-# extracted_df = curate.extract_contents(expanded_dropd) # only extract contents after running expand_nvlclass and returning expanded dropd
+expanded_dfs = curate.expand_nvlclass(class_training, dropd)
+expanded_class = expanded_dfs[0]
+expanded_dropd = expanded_dfs[1]

 # TODO # need to replace expanded df's PictureURL col values with destination urls
+# TODO # still have the problem of duplicate listings. Possibly take care of this before you run curate
--- a/ebay_api.py
+++ b/ebay_api.py
@@ -7,6 +7,7 @@ import pandas as pd
 import config as cfg
 import wget # NOTE may not need this
 import shutil
+import re

 class FindingApi:
    '''Methods for accessing eBays FindingApi services'''
@@ -204,11 +205,11 @@ class CurateData:

        return nvl_training

-    def extract_contents(self, df):
+    def extract_df(self, df):
        '''
        converts single-value lists of strings of any df to string if not null
        '''
-        extracted_df = df.applymap(lambda x: ' '.join(x) if isinstance(x, list) else np.nan if pd.isull(x) else x)
+        extracted_df = df.applymap(lambda x: ' '.join(x) if isinstance(x, list) else np.nan if pd.isnull(x) else x)

        return extracted_df

@@ -257,9 +258,10 @@ class CurateData:
    def make_dict_pics(self, expanded_class_training):
        with open('dict_pic.txt', 'w+') as jf: # TODO requires cleaning up
            dict_pics = json.load(jf)
-#        dict_pics.extend('<
+        dict_pics.extend('<')

-    def expand_nvlclass(class_training, dropd):
+
+    def expand_nvlclass(self, class_training, dropd):
        '''
        takes image url list from each cell and expands them into separate/duplicate
        instances. Modifies both class training and dropd dfs. Appends custom
@@ -268,9 +270,16 @@ class CurateData:
        expanded_class_training = class_training.explode('PictureURL').reset_index(drop=True)
        # expanded_class_training.PictureURL.apply(lambda x: 'c:/users/unknown/
        expanded_dropd = dropd.explode('PictureURL').reset_index(drop=True)
+        expanded_dropd = self.extract_df(expanded_dropd) # convert lists to values
        # expanded_dropd.PictureURL.apply(lambda x: 'c:/users/unknown/

-        return expanded_class_training, expanded_dropd, dict_pics# TODO still need to replace source url to destination url in df cols and create custom dict {<source>, <distination>}
+        # curate.make_dict_pics(expanded_class_training) # custom image url dict
+        dict_pics_list = set(expanded_class_training.PictureURL.to_list()) # prolly need to create set long before df... immediately after Shopping or trading call
+        dict_pics = {k:destination+re.search(r'\w+(?=/\$_)', a).group()+'.jpg' for k in dict_pics_list} # TODO determine how to implement destination variable
+
+        # re.search(r'\w+(?=/\$_)', a).group()
+
+        return expanded_class_training, expanded_dropd # TODO still need to replace source url to destination url in df cols and create custom dict {<source>, <distination>}

    def dl_pictures(self):
        '''