diff --git a/curate.py b/curate.py index 241525c..a1a8950 100644 --- a/curate.py +++ b/curate.py @@ -1,20 +1,26 @@ import ebay_api +import json +import pdb ''' file used to compile methods from ebay_api.py for curating training data ''' - curate = ebay_api.CurateData() raw_data = curate.import_raw() -training = curate.to_training(raw_data) -class_training = curate.class_training(training) -nvl_training = curate.nvl_training(training) -dropd = curate.drop_nvl_cols(nvl_training) +training = curate.to_training(raw_data) # creates raw_df +class_training = curate.class_training(training) # creates initial class_training df +nvl_training = curate.nvl_training(training) # creates initial nvl_training +dropd = curate.drop_nvl_cols(nvl_training) # label mask -expanded_dfs = curate.expand_nvlclass(class_training, dropd) +expanded_dfs = curate.expand_nvlclass(class_training, dropd) # pulls values out of lists -expanded_class = expanded_dfs[0] -expanded_dropd = expanded_dfs[1] -# dict_pics = expanded_dfs[2] +expanded_class = expanded_dfs[0] # TODO still having problems with Unnamed: 0 col +expanded_dropd = expanded_dfs[1] # TODO incorrect df. Look at nvl_training func. Specifically "reindex" usage -# TODO # still have the problem of duplicate listings. Possibly take care of this before you run curate +download = input('download images?: ') +if ('y' or 'Y') in download: + with open('temp_pics_source_list.txt') as f: + test_list = json.load(f) + curate.dl_pictures(test_list) +else: + pass