From 6bdc94c8c64659a22917564543291af5666684d1 Mon Sep 17 00:00:00 2001 From: scott Date: Wed, 10 Nov 2021 18:01:09 -0700 Subject: [PATCH] fixing call limit errors and duplicate ids --- curate.py | 52 +++++++++++++++++++++++++-------------------------- nv_columns.py | 42 +++++++++++++++++++++++++++++++++++++++++ shopping.py | 7 +++++++ 3 files changed, 75 insertions(+), 26 deletions(-) create mode 100644 nv_columns.py create mode 100644 shopping.py diff --git a/curate.py b/curate.py index a1a8950..5be0685 100644 --- a/curate.py +++ b/curate.py @@ -1,26 +1,26 @@ -import ebay_api -import json -import pdb - -''' -file used to compile methods from ebay_api.py for curating training data -''' -curate = ebay_api.CurateData() -raw_data = curate.import_raw() -training = curate.to_training(raw_data) # creates raw_df -class_training = curate.class_training(training) # creates initial class_training df -nvl_training = curate.nvl_training(training) # creates initial nvl_training -dropd = curate.drop_nvl_cols(nvl_training) # label mask - -expanded_dfs = curate.expand_nvlclass(class_training, dropd) # pulls values out of lists - -expanded_class = expanded_dfs[0] # TODO still having problems with Unnamed: 0 col -expanded_dropd = expanded_dfs[1] # TODO incorrect df. Look at nvl_training func. Specifically "reindex" usage - -download = input('download images?: ') -if ('y' or 'Y') in download: - with open('temp_pics_source_list.txt') as f: - test_list = json.load(f) - curate.dl_pictures(test_list) -else: - pass +import ebay_api +import json +import pdb + +''' +file used to compile methods from ebay_api.py for curating training data +''' +curate = ebay_api.CurateData() +raw_data = curate.import_raw() +training = curate.to_training(raw_data) # creates raw_df +class_training = curate.class_training(training) # creates initial class_training df +nvl_training = curate.nvl_training(training) # creates initial nvl_training +dropd = curate.drop_nvl_cols(nvl_training) # label mask + +expanded_dfs = curate.expand_nvlclass(class_training, dropd) # pulls values out of lists for both dfs + +expanded_class = expanded_dfs[0] # TODO still having problems with Unnamed: 0 col +expanded_dropd = expanded_dfs[1] # TODO incorrect df. Look at nvl_training func. Specifically "reindex" usage + +download = input('download images?: ') +if ('y' or 'Y') in download: + with open('temp_pics_source_list.txt') as f: + test_list = json.load(f) + curate.dl_pictures(test_list) +else: + pass diff --git a/nv_columns.py b/nv_columns.py new file mode 100644 index 0000000..d2018cf --- /dev/null +++ b/nv_columns.py @@ -0,0 +1,42 @@ +''' +Retrieves eBay recommended Category Specifics for each category present in +cat_list.txt file. Used to drop crappy user-submitted specifics and values +from training datasets. +''' +import ebaysdk +import json +import requests +import concurrent.futures +import config as cfg +from ebaysdk.shopping import Connection as Shopping +from ebaysdk.trading import Connection as Trading +sapi = Shopping(config_file = 'ebay.yaml') +tapi = Trading(config_file='ebay.yaml') + +def get_cat_specs(cat): + + response = tapi.execute('GetCategorySpecifics', + {'CategoryID':cat}) + cat_spacs =[name['Name'] for name in response.dict()['Recommendations']['NameRecommendation']] + + return cat_spacs + +with open('cat_list.txt') as f: + cat_list = json.load(f) + +def threadd_cat_spacs(): + + cat_spacs = [] + + with concurrent.futures.ThreadPoolExecutor() as executor: + for future in executor.map(get_cat_specs, cat_list): + cat_spacs.extend(future) + + cat_spacs = list(set(cat_spacs)) + + return cat_spacs + +if __name__=='__main__': + cat_spacs = threadd_cat_spacs() + with open('cat_spacs.txt', 'w') as f: + json.dump(cat_spacs, f) diff --git a/shopping.py b/shopping.py new file mode 100644 index 0000000..4008658 --- /dev/null +++ b/shopping.py @@ -0,0 +1,7 @@ +''' +Initial download and write of raw data from ebay +''' +import ebay_api + +shopping = ebay_api.ShoppingApi() +data = shopping.conky()