import ebay_api import numpy as np ''' file used to compile methods from ebay_api.py for curating training data ''' curate = ebay_api.CurateData() raw_data = curate.import_raw() training = curate.to_training(raw_data) # NOTE have to reference PictureURL list here if you want to expand. Other column is string in subsequent dfs # or use dropd.PictureURL.split(' ') class_training = curate.class_training(training) nvl_training = curate.nvl_training(training) extracted_df = curate.extract_contents(nvl_training) dropd = curate.drop_nvl_cols(extracted_df) def expand_nvlclass(class_training, dropd): ''' takes image url list from each cell and expands them into separate/duplicate instances. Modifies both class training and dropd dfs. Appends custom image url dict {'source':'destination'}. ''' #interm_s =class_training.PictureURL.apply(lambda x: len(x)) #expanded_class_training = class_training.loc[np.repeat(class_training.index.values, interm_s)].reset_index(drop=True) expanded_class_training = class_training.explode('PictureURL').reset_index(drop=True) expanded_dropd = dropd.loc[np.repeat(dropd.index.values, interm_s)].reset_index(drop=True) # TODO CHANGE this to use explode(). picture list needs preservation # prior to creating dropd and extracted. maybe run extraced_df after dropd or after running nvl_training #interm_s = interm_s.astype(str).applymap(lambda x: x.split(',')*4)