diff --git a/ebay_api.py b/ebay_api.py index 59a47de..15665ae 100644 --- a/ebay_api.py +++ b/ebay_api.py @@ -11,7 +11,10 @@ import shutil import re class FindingApi: - '''Methods for accessing eBays FindingApi services''' + ''' + Methods for accessing eBay's FindingApi services + ''' + def __init__(self, service, pageNumber): self.service = [ 'findItemsAdvanced', 'findCompletedItems', @@ -20,13 +23,45 @@ class FindingApi: ][service] self.pageNumber = list(range(1, pageNumber)) # 77 pgs will give equal weights to cats given call constraints - # departments = ["3034","93427"] (womens and mens) - # examples of additional params you may want to add: # 'itemFilter(0).value':'Used' # 'itemFilter(1).name':'ListingType' # 'itemFilter(1).value':'AuctionWithBIN' + def update_cats(self): + + parent_cats = ['3034', '93427'] + cat_list = [] + + for department in parent_cats: + + params = { + "callname":"GetCategoryInfo", + "appid":cfg.sec['SECURITY-APPNAME'], + "version":"671", + "responseencoding":"JSON", + "CategoryID":department, + "IncludeSelector":"ChildCategories", + } + + try: + response = requests.get("https://open.api.ebay.com/shopping?", params=params, timeout=1) + response.raise_for_status() + + except requests.exceptions.RequestException: + print('connection error') + + response = response.json() + response = response['CategoryArray']['Category'][1:] + temp_cat_list = [cat['CategoryID'] for cat in response] + cat_list.extend(temp_cat_list) + + with open('cat_list.txt', 'w') as f: + json.dump(cat_list, f) + + # leaf_list = [node['LeafCategory'] for node in response] + return cat_list + def get_data(self, category_id, i): ''' @@ -127,7 +162,7 @@ class ShoppingApi: response = requests.get("https://open.api.ebay.com/shopping?", params=params, timeout=1) response.raise_for_status() - except requests.exceptions.RequestException: + except requests.exceptions.RequestException: # TODO need better handling print('connection error') response = response.json() @@ -137,8 +172,8 @@ class ShoppingApi: def conky(self): ''' - For some reason item_id_results can only be passed as argument in executor.map - if the variable is made within function + Runs get_item_from_findItemsByCategory in multiple threads to get relevant + data for creating training sets ''' data = [] finding = FindingApi(4, 2) # TODO replace these test values before production @@ -157,7 +192,7 @@ class ShoppingApi: class CurateData: ''' - Contains functions for curating data for machine learning training sets; + Contains methods for curating data for machine learning training sets; Takes item in data from ShoppingApi request as argument and extracts/ creates key value pairs that gets updated to custom dataframe used in Ml training sets. ''' @@ -220,7 +255,7 @@ class CurateData: return extracted_df - def drop_nvl_cols(self, nvl_training): + def drop_nvl_cols(self, nvl_training): # NOTE this is wonky col_drop = [ 'Fabric Type', 'Type of Sport', 'Mid Sole', 'Modified Item', 'Modification Description', 'Article Type', 'Customized', @@ -281,15 +316,16 @@ class CurateData: try: with open('temp_pics_source_list.txt') as f: - temp_pics_source_list = json.load(f) - temp_pics_source_list.append(temp_pics_source_list) - temp_pics_source_list = list(set(temp_pics_source_list)) + tpsl = json.load(f) + tpsl.extend(temp_pics_source_list) + temp_pics_source_list = list(set(tpsl)) with open('temp_pics_source_list.txt', 'w') as f: json.dump(temp_pics_source_list, f) except (ValueError, FileNotFoundError): with open('temp_pics_source_list.txt', 'w') as f: json.dump(temp_pics_source_list, f) + # TODO still need to save these as csv files return expanded_class, expanded_dropd def dl_pictures(self, *args): @@ -301,16 +337,16 @@ class CurateData: try: with open('target_dirs.txt', 'r+') as f: # TODO you can add option to change directory here, too. Look up how to have optional arguments target_dir = json.load(f) - except (ValueError, FileNotFoundError): - target_dir = input('No target dirctory found. Create One? [y] or [n]:') + target_dir = input('No target dirctory found. Create One? [y] or [n]:') if target_dir == ('y' or 'Y'): - target_dir = input('Please provide full URL to destination folder:') - with open('target_dirs.txt','w+') as f: + target_dir = input('Please provide full URL to destination folder:') # TODO need to catch human syntax errors here + with open('target_dirs.txt','w') as f: json.dump(target_dir, f) else: - target_dir = os.mkdir(os.getcwd()+os.sep+'training_images') - with open('target_dirs.txt','w+') as f: + os.mkdir(os.getcwd()+os.sep+'training_images') + target_dir = os.getcwd()+os.sep+'training_images' + with open('target_dirs.txt','w') as f: json.dump(target_dir, f) print('Creating default folder in current directory @ ' + target_dir) @@ -332,7 +368,7 @@ class CurateData: try: with open('dict_pics.txt') as f: dict_pics = json.load(f) - dict_pics.update(temp_dict_pics) + dict_pics.update(temp_dict_pics) # TODO This still creates duplicates with open('dict_pics.txt', 'w') as f: json.dump(dict_pics, f) @@ -344,7 +380,8 @@ class CurateData: def dl_pic(dict_pics, pic): if os.path.exists(dict_pics[pic]): # or call temp_dict_pics[pic] can work - pass + pass # TODO This is not catching duplicates for some reason....possibly not? Upon inspection, files aren't duplicates...but why? + #TODO it would mean that temp_pics_source_list is changing for some reason? else: r = requests.get(pic, stream=True) @@ -352,16 +389,31 @@ class CurateData: with open(temp_dict_pics[pic], 'wb') as f: # Or call dict_pics[pic] can work shutil.copyfileobj(r.raw, f) - breakpoint() bargs = [(dict_pics, pic) for pic in temp_pics_source_list] with concurrent.futures.ThreadPoolExecutor() as executor: for future in executor.map(lambda p: dl_pic(*p), bargs): future - with open('temp_pics_source_list.txt','w') as f: # Overwrites old when complete - temp_pics_source_list = [] - json.dump(temp_pics_source_list, f) + os.remove('temp_pics_source_list.txt') # Deletes file after downloads complete successfully + +class PreProcessing: + ''' + Includes methods for pre-processing training set input and labels in the + training set created from CurateData class. Whereas CurateData training + sets provided trimmed down data from the raw json response from the + ShoppingApi call and provided a bare minimum format for the dataframe to be + used in training, PreProcessing optimizes that dataframe for training and + includes methods for image manipulation, creating test/train/validation + splits, etc. + ''' + + def stt_training(self, dict_pics, expanded_class, expanded_dropd): + ''' + Source to target training. Replaces source image URL with target URL + determined by values in dict_pics variable. + ''' + pass # TODO pipeline gameplan: 5 files: master img download dict,raw_json.txt, raw_json.csv, master_class_training.csv, master_nvl_training.csv # cont... open raw_json.txt and append, same with csv --> process new data --> pull out image source+dest and expand new dfs for the additional pictures