This commit is contained in:
spbeach46 2021-06-01 22:28:54 -07:00
parent 9fc00f4eab
commit 5582cd29ef

View File

@ -35,6 +35,10 @@ class FindingApi:
get itemIDs from categories;
'''
'''
consider using the sortOrder param to update by the latest listings first.
Also consider using the exlude duplicates param
'''
params = {
"OPERATION-NAME":self.service,
"SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'],
@ -79,7 +83,8 @@ class FindingApi:
for category_id in cat_list:
args = [(category_id, i) for i in pages]
args = [(category_id, i) for i in pages] # NOTE alternatively you can use args.extend(args) to create master list of tuples with all cats
# instead of running concurrent.futures.ThreadPoolExecutor in a loop. Might be faster
with concurrent.futures.ThreadPoolExecutor() as executor:
for future in executor.map(lambda p: self.get_data(*p), args):
@ -101,7 +106,7 @@ class FindingApi:
item_id_results = list(set(itemid_results_list))
item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0,
len(itemid_results_list), 20))]
len(itemid_results_list), 20))] # 20-ItemID list created to maximize dataset/decrease calls given call constraints
return item_id_results
# TODO during your try except conditionals just check the csv files. At the end you can create sets. You can creat another condition that says if the final set is smaller than 100k then you can call finding
@ -339,10 +344,19 @@ class CurateData:
user_input = input('drop or keep cols?:')
'''
dropping and or keeping/masking functions to create your filtered df below is
producing errors due to some column lables in your predefined lists not being present.
Look at documentation to see if option exists to ignore items not present
keep col option is ideal due to users inputting crappy custom fields in
item specifics. Use this if you can
'''
if 'keep' in user_input:
dropd = nvl_training.reindex([col_keep])
dropd = nvl_training.reindex([col_keep]) # TODO ERRORS HERE USING LOC OR REINDEX WITH MULTIPLE COL LABELS
else:
dropd = nvl_training.drop(col_drop, axis=1)
dropd = nvl_training#.drop(col_drop, errors='ignore', axis=1) # errors='ignore' for non existent labels
return dropd
# for future reference, to deal with inconsistent values in the nvl (due to sellers inputting custom values in the fields) you can drop either listings or k/v pairs that are unique which
@ -355,6 +369,8 @@ class CurateData:
takes image url list from each cell and expands them into separate/duplicate
instances. Modifies both class training and dropd dfs. Appends custom
image url dict {'source':'target'}.
* consider applying this function to other cells that have multiple values in their lists
'''
expanded_class = class_training.explode('PictureURL').reset_index(drop=True)
expanded_class = expanded_class.dropna(subset=['PictureURL'])
@ -366,8 +382,7 @@ class CurateData:
expanded_dropd = self.extract_df(expanded_dropd) # convert lists to values
temp_pics_source_list = list(set(expanded_class.PictureURL.to_list())) # prolly need to create set long before df... immediately after Shopping or trading call
#: defined in the download function
temp_pics_source_list = list(set(expanded_class.PictureURL.to_list()))
try:
@ -445,7 +460,7 @@ class CurateData:
dict_pics = temp_dict_pics
def dl_pic(dict_pics, pic):
if os.path.exists(dict_pics[pic]): # or call temp_dict_pics[pic] can work
pass # TODO This is not catching duplicates for some reason....possibly not? Upon inspection, files aren't duplicates...but why?
#TODO it would mean that temp_pics_source_list is changing for some reason?