added CurateData class for preprocessing pipeline fucntionality

This commit is contained in:
spbeach46 2020-10-17 16:21:11 -07:00
parent 58d8c8cda7
commit ae5a4e92bb

View File

@ -41,31 +41,18 @@ class FindingApi:
get_ids_from cats creates a 20-itemId list to use for the ShoppingApi
call
'''
data = self.get_data()
itemid_results_list = []
for category_id in cat_list:
for i in self.pageNumber:
params = {
"OPERATION-NAME":self.service,
"SECURITY-APPNAME":"scottbea-xlister-PRD-6796e0ff6-14862949",
"SERVICE-VERSION":"1.13.0",
"RESPONSE-DATA-FORMAT":"JSON",
"categoryId":category_id,
"paginationInput.entriesPerPage":"100",
"paginationInput.PageNumber":i
}
response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1",
params=params)
data = response.json()
for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
if item not in big_data.values:
itemid_results_list.append(item['itemId'][0]) # itemId
# values are in lists for some reason
for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
if item not in big_data.values:
itemid_results_list.append(item['itemId'][0]) # itemId
# values are in lists for some reason
item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0,
len(itemid_results_list), 20))]
return item_id_results
class ShoppingApi(FindingApi):
class ShoppingApi():
'''
Creates objects from ShoppingApi service calls that can interact with
pandas dataframes
@ -86,11 +73,11 @@ class ShoppingApi(FindingApi):
names = []
values = []
nvl = data['Item'][0]['ItemSpecifics']['NameValueList']
nvl = data['Item'][0]['ItemSpecifics']['NameValueList'][0]
for nvl_dict in nvl:
names.append(nvl_dict['Name'])
values.append(nvl_dict['Value'])
values.append(nvl_dict['Value']) # Try to excract value from list here
nvl_dict = dict(zip(names, values))
data.update(nvl_dict)
@ -102,6 +89,7 @@ class ShoppingApi(FindingApi):
# to divide these up into the categories. This will leave you with about 6.25K results per cat.
# More than enough data for your dataset.
# Need to make sure dataframe gets important stuff outside of nvl. Also need to
# change init method in findingapi to have variable pages and possibly variable
# services.
# Need to make sure dataframe gets important stuff outside of nvl in order to
# access values for cross referencing itemIds from calls
# Need to decide if list gets accessed from df or if you're just going to have
# list contents extracted and possibly placed into separate cells/labels