added function to create custom image url dictionary

This commit is contained in:
spbeach46 2021-04-04 14:38:04 -07:00
parent 35100b7952
commit b205b44b50
2 changed files with 18 additions and 7 deletions

View File

@ -12,7 +12,9 @@ class_training = curate.class_training(training)
nvl_training = curate.nvl_training(training)
dropd = curate.drop_nvl_cols(nvl_training)
# expand_nvlclass(class_training, dropd)
# extracted_df = curate.extract_contents(expanded_dropd) # only extract contents after running expand_nvlclass and returning expanded dropd
expanded_dfs = curate.expand_nvlclass(class_training, dropd)
expanded_class = expanded_dfs[0]
expanded_dropd = expanded_dfs[1]
# TODO # need to replace expanded df's PictureURL col values with destination urls
# TODO # still have the problem of duplicate listings. Possibly take care of this before you run curate

View File

@ -7,6 +7,7 @@ import pandas as pd
import config as cfg
import wget # NOTE may not need this
import shutil
import re
class FindingApi:
'''Methods for accessing eBays FindingApi services'''
@ -204,11 +205,11 @@ class CurateData:
return nvl_training
def extract_contents(self, df):
def extract_df(self, df):
'''
converts single-value lists of strings of any df to string if not null
'''
extracted_df = df.applymap(lambda x: ' '.join(x) if isinstance(x, list) else np.nan if pd.isull(x) else x)
extracted_df = df.applymap(lambda x: ' '.join(x) if isinstance(x, list) else np.nan if pd.isnull(x) else x)
return extracted_df
@ -257,9 +258,10 @@ class CurateData:
def make_dict_pics(self, expanded_class_training):
with open('dict_pic.txt', 'w+') as jf: # TODO requires cleaning up
dict_pics = json.load(jf)
# dict_pics.extend('<
dict_pics.extend('<')
def expand_nvlclass(class_training, dropd):
def expand_nvlclass(self, class_training, dropd):
'''
takes image url list from each cell and expands them into separate/duplicate
instances. Modifies both class training and dropd dfs. Appends custom
@ -268,9 +270,16 @@ class CurateData:
expanded_class_training = class_training.explode('PictureURL').reset_index(drop=True)
# expanded_class_training.PictureURL.apply(lambda x: 'c:/users/unknown/
expanded_dropd = dropd.explode('PictureURL').reset_index(drop=True)
expanded_dropd = self.extract_df(expanded_dropd) # convert lists to values
# expanded_dropd.PictureURL.apply(lambda x: 'c:/users/unknown/
return expanded_class_training, expanded_dropd, dict_pics# TODO still need to replace source url to destination url in df cols and create custom dict {<source>, <distination>}
# curate.make_dict_pics(expanded_class_training) # custom image url dict
dict_pics_list = set(expanded_class_training.PictureURL.to_list()) # prolly need to create set long before df... immediately after Shopping or trading call
dict_pics = {k:destination+re.search(r'\w+(?=/\$_)', a).group()+'.jpg' for k in dict_pics_list} # TODO determine how to implement destination variable
# re.search(r'\w+(?=/\$_)', a).group()
return expanded_class_training, expanded_dropd # TODO still need to replace source url to destination url in df cols and create custom dict {<source>, <distination>}
def dl_pictures(self):
'''