Added method to update category IDs var and txt file
This commit is contained in:
parent
6cfa798902
commit
c8ab1b13d9
98
ebay_api.py
98
ebay_api.py
@ -11,7 +11,10 @@ import shutil
|
||||
import re
|
||||
|
||||
class FindingApi:
|
||||
'''Methods for accessing eBays FindingApi services'''
|
||||
'''
|
||||
Methods for accessing eBay's FindingApi services
|
||||
'''
|
||||
|
||||
def __init__(self, service, pageNumber):
|
||||
self.service = [
|
||||
'findItemsAdvanced', 'findCompletedItems',
|
||||
@ -20,13 +23,45 @@ class FindingApi:
|
||||
][service]
|
||||
self.pageNumber = list(range(1, pageNumber)) # 77 pgs will give equal weights to cats given call constraints
|
||||
|
||||
# departments = ["3034","93427"] (womens and mens)
|
||||
|
||||
# examples of additional params you may want to add:
|
||||
# 'itemFilter(0).value':'Used'
|
||||
# 'itemFilter(1).name':'ListingType'
|
||||
# 'itemFilter(1).value':'AuctionWithBIN'
|
||||
|
||||
def update_cats(self):
|
||||
|
||||
parent_cats = ['3034', '93427']
|
||||
cat_list = []
|
||||
|
||||
for department in parent_cats:
|
||||
|
||||
params = {
|
||||
"callname":"GetCategoryInfo",
|
||||
"appid":cfg.sec['SECURITY-APPNAME'],
|
||||
"version":"671",
|
||||
"responseencoding":"JSON",
|
||||
"CategoryID":department,
|
||||
"IncludeSelector":"ChildCategories",
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.get("https://open.api.ebay.com/shopping?", params=params, timeout=1)
|
||||
response.raise_for_status()
|
||||
|
||||
except requests.exceptions.RequestException:
|
||||
print('connection error')
|
||||
|
||||
response = response.json()
|
||||
response = response['CategoryArray']['Category'][1:]
|
||||
temp_cat_list = [cat['CategoryID'] for cat in response]
|
||||
cat_list.extend(temp_cat_list)
|
||||
|
||||
with open('cat_list.txt', 'w') as f:
|
||||
json.dump(cat_list, f)
|
||||
|
||||
# leaf_list = [node['LeafCategory'] for node in response]
|
||||
return cat_list
|
||||
|
||||
def get_data(self, category_id, i):
|
||||
|
||||
'''
|
||||
@ -127,7 +162,7 @@ class ShoppingApi:
|
||||
response = requests.get("https://open.api.ebay.com/shopping?", params=params, timeout=1)
|
||||
response.raise_for_status()
|
||||
|
||||
except requests.exceptions.RequestException:
|
||||
except requests.exceptions.RequestException: # TODO need better handling
|
||||
print('connection error')
|
||||
|
||||
response = response.json()
|
||||
@ -137,8 +172,8 @@ class ShoppingApi:
|
||||
|
||||
def conky(self):
|
||||
'''
|
||||
For some reason item_id_results can only be passed as argument in executor.map
|
||||
if the variable is made within function
|
||||
Runs get_item_from_findItemsByCategory in multiple threads to get relevant
|
||||
data for creating training sets
|
||||
'''
|
||||
data = []
|
||||
finding = FindingApi(4, 2) # TODO replace these test values before production
|
||||
@ -157,7 +192,7 @@ class ShoppingApi:
|
||||
|
||||
class CurateData:
|
||||
'''
|
||||
Contains functions for curating data for machine learning training sets;
|
||||
Contains methods for curating data for machine learning training sets;
|
||||
Takes item in data from ShoppingApi request as argument and extracts/ creates key
|
||||
value pairs that gets updated to custom dataframe used in Ml training sets.
|
||||
'''
|
||||
@ -220,7 +255,7 @@ class CurateData:
|
||||
|
||||
return extracted_df
|
||||
|
||||
def drop_nvl_cols(self, nvl_training):
|
||||
def drop_nvl_cols(self, nvl_training): # NOTE this is wonky
|
||||
col_drop = [
|
||||
'Fabric Type', 'Type of Sport', 'Mid Sole', 'Modified Item',
|
||||
'Modification Description', 'Article Type', 'Customized',
|
||||
@ -281,15 +316,16 @@ class CurateData:
|
||||
|
||||
try:
|
||||
with open('temp_pics_source_list.txt') as f:
|
||||
temp_pics_source_list = json.load(f)
|
||||
temp_pics_source_list.append(temp_pics_source_list)
|
||||
temp_pics_source_list = list(set(temp_pics_source_list))
|
||||
tpsl = json.load(f)
|
||||
tpsl.extend(temp_pics_source_list)
|
||||
temp_pics_source_list = list(set(tpsl))
|
||||
with open('temp_pics_source_list.txt', 'w') as f:
|
||||
json.dump(temp_pics_source_list, f)
|
||||
except (ValueError, FileNotFoundError):
|
||||
with open('temp_pics_source_list.txt', 'w') as f:
|
||||
json.dump(temp_pics_source_list, f)
|
||||
|
||||
# TODO still need to save these as csv files
|
||||
return expanded_class, expanded_dropd
|
||||
|
||||
def dl_pictures(self, *args):
|
||||
@ -301,16 +337,16 @@ class CurateData:
|
||||
try:
|
||||
with open('target_dirs.txt', 'r+') as f: # TODO you can add option to change directory here, too. Look up how to have optional arguments
|
||||
target_dir = json.load(f)
|
||||
|
||||
except (ValueError, FileNotFoundError):
|
||||
target_dir = input('No target dirctory found. Create One? [y] or [n]:')
|
||||
target_dir = input('No target dirctory found. Create One? [y] or [n]:')
|
||||
if target_dir == ('y' or 'Y'):
|
||||
target_dir = input('Please provide full URL to destination folder:')
|
||||
with open('target_dirs.txt','w+') as f:
|
||||
target_dir = input('Please provide full URL to destination folder:') # TODO need to catch human syntax errors here
|
||||
with open('target_dirs.txt','w') as f:
|
||||
json.dump(target_dir, f)
|
||||
else:
|
||||
target_dir = os.mkdir(os.getcwd()+os.sep+'training_images')
|
||||
with open('target_dirs.txt','w+') as f:
|
||||
os.mkdir(os.getcwd()+os.sep+'training_images')
|
||||
target_dir = os.getcwd()+os.sep+'training_images'
|
||||
with open('target_dirs.txt','w') as f:
|
||||
json.dump(target_dir, f)
|
||||
print('Creating default folder in current directory @ ' + target_dir)
|
||||
|
||||
@ -332,7 +368,7 @@ class CurateData:
|
||||
try:
|
||||
with open('dict_pics.txt') as f:
|
||||
dict_pics = json.load(f)
|
||||
dict_pics.update(temp_dict_pics)
|
||||
dict_pics.update(temp_dict_pics) # TODO This still creates duplicates
|
||||
with open('dict_pics.txt', 'w') as f:
|
||||
json.dump(dict_pics, f)
|
||||
|
||||
@ -344,7 +380,8 @@ class CurateData:
|
||||
def dl_pic(dict_pics, pic):
|
||||
|
||||
if os.path.exists(dict_pics[pic]): # or call temp_dict_pics[pic] can work
|
||||
pass
|
||||
pass # TODO This is not catching duplicates for some reason....possibly not? Upon inspection, files aren't duplicates...but why?
|
||||
#TODO it would mean that temp_pics_source_list is changing for some reason?
|
||||
|
||||
else:
|
||||
r = requests.get(pic, stream=True)
|
||||
@ -352,16 +389,31 @@ class CurateData:
|
||||
with open(temp_dict_pics[pic], 'wb') as f: # Or call dict_pics[pic] can work
|
||||
shutil.copyfileobj(r.raw, f)
|
||||
|
||||
breakpoint()
|
||||
bargs = [(dict_pics, pic) for pic in temp_pics_source_list]
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
for future in executor.map(lambda p: dl_pic(*p), bargs):
|
||||
future
|
||||
|
||||
with open('temp_pics_source_list.txt','w') as f: # Overwrites old when complete
|
||||
temp_pics_source_list = []
|
||||
json.dump(temp_pics_source_list, f)
|
||||
os.remove('temp_pics_source_list.txt') # Deletes file after downloads complete successfully
|
||||
|
||||
class PreProcessing:
|
||||
'''
|
||||
Includes methods for pre-processing training set input and labels in the
|
||||
training set created from CurateData class. Whereas CurateData training
|
||||
sets provided trimmed down data from the raw json response from the
|
||||
ShoppingApi call and provided a bare minimum format for the dataframe to be
|
||||
used in training, PreProcessing optimizes that dataframe for training and
|
||||
includes methods for image manipulation, creating test/train/validation
|
||||
splits, etc.
|
||||
'''
|
||||
|
||||
def stt_training(self, dict_pics, expanded_class, expanded_dropd):
|
||||
'''
|
||||
Source to target training. Replaces source image URL with target URL
|
||||
determined by values in dict_pics variable.
|
||||
'''
|
||||
pass
|
||||
|
||||
# TODO pipeline gameplan: 5 files: master img download dict,raw_json.txt, raw_json.csv, master_class_training.csv, master_nvl_training.csv
|
||||
# cont... open raw_json.txt and append, same with csv --> process new data --> pull out image source+dest and expand new dfs for the additional pictures
|
||||
|
Loading…
Reference in New Issue
Block a user