added some exceptions but still likely incomplete

This commit is contained in:
spbeach46 2021-02-14 01:34:56 -07:00
parent 816fb797fa
commit 6abf695908

View File

@ -3,6 +3,7 @@ import numpy as np
import concurrent.futures
import json
import requests
from requests.exceptions import Timeout, ConnectionError
import pandas as pd
import config as cfg
@ -40,9 +41,13 @@ class FindingApi:
"paginationInput.PageNumber":i
}
response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1",
params=params)
# TODO add try excepts here
try:
response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1",
params=params, timeout=1)
except HTTPError:
print('connection error')
data = response.json()
return data
@ -106,14 +111,18 @@ class ShoppingApi:
'''
params = {
"callname":"GetMultipleItems",
"appid":config.cfg['SECURITY-APPNAME'],
"appid":cfg.sec['SECURITY-APPNAME'],
"version":"671",
"responseencoding":"JSON",
"ItemID":twenty_id,
"IncludeSelector":"ItemSpecifics",
}
response = requests.get("https://open.api.ebay.com/shopping?", params=params)
# TODO Add try excepts here
try:
response = requests.get("https://open.api.ebay.com/shopping?", params=params, timeout=1)
except HTTPError:
print('connection error') #TODO figure out how to pick up where left off or loop until it works
response = response.json()
response = response['Item']
return response
@ -172,7 +181,8 @@ class CurateData:
return nvl_training
def extract_contents(self, df):
extracted_df = df.applymap(lambda x: ' '.join(x) if pd.notnull(x) else np.nan) # TODO does it work?
extracted_df = df.applymap(lambda x: ' '.join(x) if np.any(pd.notnull(x)) else np.nan)
return extracted_df
def drop_nvl_cols(self, nvl_training):
col_drop = [
@ -193,7 +203,7 @@ class CurateData:
'Calf Width', 'Insole Material', 'UPC', 'Size Type'
]
col_keep = [
'Picture URL', 'Style', 'Department', 'Type', 'Gender', 'Closure', 'Performance/Activity',
'PictureURL', 'Style', 'Department', 'Type', 'Gender', 'Closure', 'Performance/Activity',
'Accents', 'Occasion', 'Toe Shape', 'Pattern', 'Activity',
'Heel Style', 'Fastening', 'Heel Type', 'Toe Type', 'Departement',
'Product Type', 'Sub Style', 'Season', 'Theme', 'Upper Material',
@ -202,9 +212,10 @@ class CurateData:
# closure; toe shape and toe type; occasion and performance/activity;
# see if you can combine these somehow (you may not want this though).
# Also consider keeping only cols that have plenty of values
user_input = input('drop or keep cols?:')
if 'keep' in user_input:
dropd_nvl_training = nvl_training.loc[:,[col_keep]]
dropd_nvl_training = nvl_training.loc[:,col_keep]
else:
dropd_nvl_training = nvl_training.drop(col_drop, axis=1)
return dropd_nvl_training
@ -213,6 +224,13 @@ class CurateData:
final_training = pd.concat([class_training, dropd_nvl_training], axis=1)
return final_training
def dl_pictures(self, expand=1):
'''
Downloads pictures from api to local storage and expands url list
to user specified number
'''
pass
# TODO Still need to to extract strings from list of strings and then drop which ones you don't want or vice versa. You may have to avoid using cells with lists of strings longer than one (e.g., 'Features')
# TODO Also need to expand photo list from PictureURL. Decide how many or which photos to use. You may even want to use a pretrained model to decide whether or not the photos are of shoes or not to filter#
# it might be that only the first picture is reliable enough to use in the dataset.
@ -267,22 +285,4 @@ if __name__ == "__main__":
# TO AVOID HICCUPS WHEN CREATING DATASET
# TODO YOU WILL HAVE TO FIND A WAY OF COLLECTING DATA FOR IMAGES OF TAGS EITHER USING YOUR OWN TAGS OR SOMEHOW FIND A WAY TO FIND TAGS ON OTHERS LISTINGS. CRUCIAL FOR THE LISTINGS PROCESS. May be as simple as adding a def to one of the apis to extract only the picture if it can identify what a tag looks like. So, it may actually be a good thing to include all the pictures in a training set but then when you're ready to begin training you'll have a data cleaning pipeline specific to training a model to either learn shoe features or information on tags.
'''
List of columns from nvl_list that I want to drop before training:
['Fabric Type', 'Type of Sport', 'Mid Sole', 'Modified Item', 'Modification Description', 'Article Type', 'Customized', 'Character', 'Features', 'Colors', 'Shade', 'Product ID', 'Personlized', 'Platform Height',
'Year Manufactured', 'Trim Material', 'Fashion Element', 'Shaft Material', 'Character Family', 'Heel to Toe Drop', 'Custom Bundle', 'Califormnia Prop 65 Warning', 'Manufacture Color', 'Main Color', A
'Collection', 'Mid Sole Type', 'Signed', 'US Shoe Size (Men#!#s)', 'Calf Circumference', 'Hand Made', 'Safety Standards', 'Customised', 'Cleat Type', 'Cushioning Level', 'AU Shoe Size', 'Country/Region of Manufacture',
'Type of Sport', 'Main Colour', 'Look']
'''
'''
list of columns from nvl_list that I want to keep before training:
[
'Picture URL', 'Style', 'Department', 'Type', 'Gender', 'Closure', 'Performance/Activity',
'Accents', 'Occasion', 'Toe Shape', 'Pattern', 'Activity',
'Heel Style', 'Fastening', 'Heel Type', 'Toe Type', 'Closure Type', 'Departement',
'Product Type', 'Sub Style', 'Season', 'Theme', 'Material', 'Upper Material',
]
'''
# Check the above list of cols I want to keep to see if there are duplicates with diff spelling and phrasing (e.g., Departement and Department, or Fastening and Closure Type)