2020-10-12 07:53:29 +00:00
|
|
|
import json
|
|
|
|
import requests
|
|
|
|
import pandas as pd
|
|
|
|
|
|
|
|
class FindingApi:
|
|
|
|
'''Some docstring to get rid of linting errors'''
|
2020-10-12 18:48:15 +00:00
|
|
|
def __init__(self, service, pageNumber):
|
2020-10-12 07:53:29 +00:00
|
|
|
self.service = [
|
|
|
|
'findItemsAdvanced', 'findCompletedItems',
|
|
|
|
'findItemsByKeywords', 'findItemsIneBayStores', 'findItemsByCategory',
|
|
|
|
'findItemsByProduct'
|
2020-10-12 18:48:15 +00:00
|
|
|
][service]
|
|
|
|
self.pageNumber = list(range(1, pageNumber)) # 64 pages is recommended
|
2020-11-05 22:32:41 +00:00
|
|
|
# this will give equal weights to cats given call constraints
|
2020-10-12 07:53:29 +00:00
|
|
|
|
|
|
|
# departments = ["3034","93427"] (womens and mens)
|
2020-10-13 00:55:07 +00:00
|
|
|
def get_data(self):
|
2020-10-18 00:22:45 +00:00
|
|
|
'''# Gets raw JSON data fom FindingApi service call
|
|
|
|
'''
|
|
|
|
with open('cat_list.txt') as jf:
|
|
|
|
cat_list = json.load(jf)
|
2020-10-13 00:55:07 +00:00
|
|
|
for category_id in cat_list:
|
|
|
|
for i in self.pageNumber:
|
|
|
|
params = {
|
2020-10-13 01:42:57 +00:00
|
|
|
"OPERATION-NAME":self.service,
|
2020-10-13 00:55:07 +00:00
|
|
|
"SECURITY-APPNAME":"scottbea-xlister-PRD-6796e0ff6-14862949",
|
|
|
|
"SERVICE-VERSION":"1.13.0",
|
|
|
|
"RESPONSE-DATA-FORMAT":"JSON",
|
|
|
|
"categoryId":category_id,
|
|
|
|
"paginationInput.entriesPerPage":"100",
|
2020-10-13 01:42:57 +00:00
|
|
|
"paginationInput.PageNumber":i
|
2020-10-13 00:55:07 +00:00
|
|
|
}
|
|
|
|
response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1",
|
|
|
|
params=params)
|
|
|
|
data = response.json()
|
2020-10-18 20:56:16 +00:00
|
|
|
return data
|
2020-10-12 07:53:29 +00:00
|
|
|
|
|
|
|
def get_ids_from_cats(self):
|
2020-10-12 18:48:15 +00:00
|
|
|
'''
|
2020-10-18 00:22:45 +00:00
|
|
|
Creates a 20-itemId list to use for the ShoppingApi
|
2020-10-12 18:48:15 +00:00
|
|
|
call
|
|
|
|
'''
|
2020-10-17 23:21:11 +00:00
|
|
|
data = self.get_data()
|
2020-10-12 07:53:29 +00:00
|
|
|
itemid_results_list = []
|
2020-11-05 22:32:41 +00:00
|
|
|
|
2020-10-18 22:32:17 +00:00
|
|
|
try:# TODO run pdb here to see how to extract itemId before update_df
|
2020-10-18 20:56:16 +00:00
|
|
|
training = pd.read_csv('training.csv')
|
2020-10-18 00:22:45 +00:00
|
|
|
for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
|
2020-10-31 04:55:40 +00:00
|
|
|
if (item not in training.values) and (item not in itemid_results_list):
|
2020-10-18 00:22:45 +00:00
|
|
|
itemid_results_list.append(item['itemId'][0]) # itemId
|
|
|
|
# values are in lists for some reason
|
2020-11-05 22:32:41 +00:00
|
|
|
|
2020-10-18 22:32:17 +00:00
|
|
|
except (pd.errors.EmptyDataError, FileNotFoundError):
|
2020-10-18 00:22:45 +00:00
|
|
|
for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
|
2020-11-05 22:32:41 +00:00
|
|
|
if item not in itemid_results_list:
|
2020-10-24 10:36:31 +00:00
|
|
|
itemid_results_list.append(item['itemId'][0])
|
2020-11-05 22:32:41 +00:00
|
|
|
|
2020-10-12 07:53:29 +00:00
|
|
|
item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0,
|
|
|
|
len(itemid_results_list), 20))]
|
|
|
|
|
|
|
|
return item_id_results
|
|
|
|
|
2020-10-18 07:08:04 +00:00
|
|
|
class ShoppingApi:
|
2020-10-12 07:53:29 +00:00
|
|
|
'''
|
2020-10-12 18:48:15 +00:00
|
|
|
Creates objects from ShoppingApi service calls that can interact with
|
|
|
|
pandas dataframes
|
2020-10-12 07:53:29 +00:00
|
|
|
'''
|
|
|
|
def get_item_from_findItemsByCategory(self, item_id_results):
|
2020-10-18 00:22:45 +00:00
|
|
|
'''
|
2020-10-18 20:56:16 +00:00
|
|
|
Gets raw JSON data from multiple live listings given multiple itemIds
|
2020-10-18 00:22:45 +00:00
|
|
|
'''
|
2020-10-12 07:53:29 +00:00
|
|
|
for twenty_id in item_id_results:
|
|
|
|
params = {
|
|
|
|
"callname":"GetMultipleItems",
|
|
|
|
"appid":"scottbea-xlister-PRD-6796e0ff6-14862949",
|
|
|
|
"version":"671",
|
|
|
|
"responseencoding":"JSON",
|
|
|
|
"ItemID":twenty_id,
|
|
|
|
"IncludeSelector":"ItemSpecifics",
|
|
|
|
}
|
|
|
|
|
|
|
|
response = requests.get("https://open.api.ebay.com/shopping?", params=params)
|
|
|
|
data = response.json()
|
2020-10-18 22:32:17 +00:00
|
|
|
return data # TODO save data as file??
|
2020-10-18 20:56:16 +00:00
|
|
|
|
2020-10-18 00:22:45 +00:00
|
|
|
class CurateData:
|
|
|
|
'''
|
|
|
|
Contains functions for curating data for machine learning training sets
|
|
|
|
'''
|
|
|
|
def update_df(self, data):
|
2020-10-18 22:32:17 +00:00
|
|
|
'''
|
|
|
|
Extracts itemIds and name-value list , creates new dict and appends df
|
|
|
|
'''
|
2020-10-24 10:36:31 +00:00
|
|
|
for item in data:
|
|
|
|
names = []
|
|
|
|
values = []
|
|
|
|
nvl = data['Item'][0]['ItemSpecifics']['NameValueList']
|
|
|
|
for nvl_dict in nvl:
|
|
|
|
names.append(nvl_dict['Name'])
|
|
|
|
values.append(nvl_dict['Value'])
|
|
|
|
# TODO Also append itemId and value to the dictionary somewhere
|
2020-10-12 07:53:29 +00:00
|
|
|
|
2020-10-24 10:36:31 +00:00
|
|
|
nvl_dict = dict(zip(names, values))
|
2020-11-05 22:32:41 +00:00
|
|
|
data.update(nvl_dict) # TODO this changes iterable so you get error
|
2020-10-18 00:22:45 +00:00
|
|
|
df = pd.json_normalize(data)
|
2020-10-18 20:56:16 +00:00
|
|
|
df.to_csv('training.csv', mode='a')
|
2020-10-12 07:53:29 +00:00
|
|
|
|
2020-10-18 00:22:45 +00:00
|
|
|
def main():
|
|
|
|
'''
|
|
|
|
Main program creates/updates a csv file to use for ML training from live
|
|
|
|
ebay listings
|
|
|
|
'''
|
|
|
|
service, pageNumber = input('service and pageNumber:').split()
|
2020-10-18 22:32:17 +00:00
|
|
|
service = int(service)
|
|
|
|
pageNumber = int(pageNumber)
|
2020-10-18 00:22:45 +00:00
|
|
|
finding = FindingApi(service, pageNumber)
|
|
|
|
item_id_results = finding.get_ids_from_cats()
|
|
|
|
shopping = ShoppingApi()
|
|
|
|
data = shopping.get_item_from_findItemsByCategory(item_id_results)
|
|
|
|
curate = CurateData()
|
|
|
|
curate.update_df(data)
|
2020-10-12 07:53:29 +00:00
|
|
|
|
2020-10-18 00:22:45 +00:00
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|
2020-10-12 07:53:29 +00:00
|
|
|
# Limited to 5000 calls to shopping api per day, and getMultpileitems service maxes out at 20 items
|
|
|
|
# per call leaving you 100,000 items per day for you pandas dataframe initially. So you'll have
|
|
|
|
# to divide these up into the categories. This will leave you with about 6.25K results per cat.
|
|
|
|
# More than enough data for your dataset.
|
|
|
|
|
2020-10-17 23:21:11 +00:00
|
|
|
# Need to make sure dataframe gets important stuff outside of nvl in order to
|
|
|
|
# access values for cross referencing itemIds from calls
|
|
|
|
# Need to decide if list gets accessed from df or if you're just going to have
|
|
|
|
# list contents extracted and possibly placed into separate cells/labels
|