added function to create custom image url dictionary
This commit is contained in:
parent
35100b7952
commit
b205b44b50
@ -12,7 +12,9 @@ class_training = curate.class_training(training)
|
||||
nvl_training = curate.nvl_training(training)
|
||||
dropd = curate.drop_nvl_cols(nvl_training)
|
||||
|
||||
# expand_nvlclass(class_training, dropd)
|
||||
# extracted_df = curate.extract_contents(expanded_dropd) # only extract contents after running expand_nvlclass and returning expanded dropd
|
||||
expanded_dfs = curate.expand_nvlclass(class_training, dropd)
|
||||
expanded_class = expanded_dfs[0]
|
||||
expanded_dropd = expanded_dfs[1]
|
||||
|
||||
# TODO # need to replace expanded df's PictureURL col values with destination urls
|
||||
# TODO # still have the problem of duplicate listings. Possibly take care of this before you run curate
|
||||
|
19
ebay_api.py
19
ebay_api.py
@ -7,6 +7,7 @@ import pandas as pd
|
||||
import config as cfg
|
||||
import wget # NOTE may not need this
|
||||
import shutil
|
||||
import re
|
||||
|
||||
class FindingApi:
|
||||
'''Methods for accessing eBays FindingApi services'''
|
||||
@ -204,11 +205,11 @@ class CurateData:
|
||||
|
||||
return nvl_training
|
||||
|
||||
def extract_contents(self, df):
|
||||
def extract_df(self, df):
|
||||
'''
|
||||
converts single-value lists of strings of any df to string if not null
|
||||
'''
|
||||
extracted_df = df.applymap(lambda x: ' '.join(x) if isinstance(x, list) else np.nan if pd.isull(x) else x)
|
||||
extracted_df = df.applymap(lambda x: ' '.join(x) if isinstance(x, list) else np.nan if pd.isnull(x) else x)
|
||||
|
||||
return extracted_df
|
||||
|
||||
@ -257,9 +258,10 @@ class CurateData:
|
||||
def make_dict_pics(self, expanded_class_training):
|
||||
with open('dict_pic.txt', 'w+') as jf: # TODO requires cleaning up
|
||||
dict_pics = json.load(jf)
|
||||
# dict_pics.extend('<
|
||||
dict_pics.extend('<')
|
||||
|
||||
def expand_nvlclass(class_training, dropd):
|
||||
|
||||
def expand_nvlclass(self, class_training, dropd):
|
||||
'''
|
||||
takes image url list from each cell and expands them into separate/duplicate
|
||||
instances. Modifies both class training and dropd dfs. Appends custom
|
||||
@ -268,9 +270,16 @@ class CurateData:
|
||||
expanded_class_training = class_training.explode('PictureURL').reset_index(drop=True)
|
||||
# expanded_class_training.PictureURL.apply(lambda x: 'c:/users/unknown/
|
||||
expanded_dropd = dropd.explode('PictureURL').reset_index(drop=True)
|
||||
expanded_dropd = self.extract_df(expanded_dropd) # convert lists to values
|
||||
# expanded_dropd.PictureURL.apply(lambda x: 'c:/users/unknown/
|
||||
|
||||
return expanded_class_training, expanded_dropd, dict_pics# TODO still need to replace source url to destination url in df cols and create custom dict {<source>, <distination>}
|
||||
# curate.make_dict_pics(expanded_class_training) # custom image url dict
|
||||
dict_pics_list = set(expanded_class_training.PictureURL.to_list()) # prolly need to create set long before df... immediately after Shopping or trading call
|
||||
dict_pics = {k:destination+re.search(r'\w+(?=/\$_)', a).group()+'.jpg' for k in dict_pics_list} # TODO determine how to implement destination variable
|
||||
|
||||
# re.search(r'\w+(?=/\$_)', a).group()
|
||||
|
||||
return expanded_class_training, expanded_dropd # TODO still need to replace source url to destination url in df cols and create custom dict {<source>, <distination>}
|
||||
|
||||
def dl_pictures(self):
|
||||
'''
|
||||
|
Loading…
Reference in New Issue
Block a user