changed data var returned in shoppingapi to data['Item']. Vice versa in update_data func. This is so data.update is correct
This commit is contained in:
parent
97c5900a5b
commit
bcb11de855
@ -95,9 +95,10 @@ class ShoppingApi:
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
for future in executor.map(self.get_item_from_findItemsByCategory(), item_id_results):
|
||||
data.update(future)
|
||||
return data # TODO save data as file??
|
||||
return data
|
||||
|
||||
# TODO the structure of data as is (ie, as an updated dict) means CurateData class methods are going to have trouble running
|
||||
|
||||
# TODO CONSIDER IMPLEMENTING MULTITHREADING AROUND HERE TOO.
|
||||
class CurateData:
|
||||
'''
|
||||
Contains functions for curating data for machine learning training sets;
|
||||
|
@ -1,43 +0,0 @@
|
||||
import requests
|
||||
import json
|
||||
from bs4 import BeautifulSoup as b
|
||||
import pandas as p
|
||||
|
||||
# keywords = input('keyword search: ')
|
||||
|
||||
with open('cat_list.txt') as jf:
|
||||
cat_list = json.load(jf)
|
||||
finding_service = ['findItemsAdvanced', 'findCompletedItems', 'findItemsByKeywords', 'findItemsIneBayStores', 'findItemsByCategory', 'findItemsByProduct']
|
||||
|
||||
pageNumber = list(range(1, 63))
|
||||
|
||||
# departments = ["3034","93427"]
|
||||
|
||||
def get_ids():
|
||||
itemid_results_list = []
|
||||
for categoryID in cat_list[0:2]:
|
||||
params = {
|
||||
"OPERATION-NAME":finding_service[4],
|
||||
"SECURITY-APPNAME":"scottbea-xlister-PRD-6796e0ff6-14862949",
|
||||
"SERVICE-VERSION":"1.13.0",
|
||||
"RESPONSE-DATA-FORMAT":"JSON",
|
||||
"categoryId":categoryID ,
|
||||
"paginationInput.entriesPerPage":"100",
|
||||
"paginationInput.PageNumber":pageNumber[0]
|
||||
}
|
||||
# extract item id here for piping into shopping_test.py
|
||||
|
||||
response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1", params=params)
|
||||
data = response.json()
|
||||
pretty_data = json.dumps(data, indent=2)
|
||||
return data
|
||||
# can use pandas.json_normalize(custom dict cobbled from respons.json())
|
||||
|
||||
|
||||
# Additional problem you will run into when getting labeled data is shoe types and features not in features, accents, styles, categories or subcategories.
|
||||
|
||||
# also limited to 5000 calls per day. This leaves you with 500k listings
|
||||
|
||||
# If you want to split up each cat equally with their respective maxes then use 62 pages with 100
|
||||
# Entries per page. At this amount you'll have the max number of calls you can make on the
|
||||
# shopping api.
|
@ -1,79 +0,0 @@
|
||||
import json
|
||||
import requests
|
||||
import pandas as pd
|
||||
|
||||
# OPEN CSV AS VARIALBE RIGHT HERE
|
||||
with open('cat_list.txt') as jf:
|
||||
cat_list = json.load(jf)
|
||||
|
||||
big_data = pd.read_csv('big_data.csv')
|
||||
|
||||
class FindingApi:
|
||||
'''Some docstring to get rid of linting errors'''
|
||||
def __init__(self):
|
||||
self.service = [
|
||||
'findItemsAdvanced', 'findCompletedItems',
|
||||
'findItemsByKeywords', 'findItemsIneBayStores', 'findItemsByCategory',
|
||||
'findItemsByProduct'
|
||||
]
|
||||
self.pageNumber = list(range(1, 63))
|
||||
|
||||
# departments = ["3034","93427"] (womens and mens)
|
||||
|
||||
def get_ids_from_cats(self):
|
||||
'''Stop bothering me for docstrings.'''
|
||||
itemid_results_list = []
|
||||
for category_id in cat_list:
|
||||
for i in self.pageNumber:
|
||||
params = {
|
||||
"OPERATION-NAME":self.service[4],
|
||||
"SECURITY-APPNAME":"scottbea-xlister-PRD-6796e0ff6-14862949",
|
||||
"SERVICE-VERSION":"1.13.0",
|
||||
"RESPONSE-DATA-FORMAT":"JSON",
|
||||
"categoryId":category_id,
|
||||
"paginationInput.entriesPerPage":"100",
|
||||
"paginationInput.PageNumber":self.pageNumber[i]
|
||||
}
|
||||
response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1",
|
||||
params=params)
|
||||
data = response.json()
|
||||
for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
|
||||
if item not in big_data.values:
|
||||
itemid_results_list.append(item['itemId'])
|
||||
item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0,
|
||||
len(itemid_results_list), 20))]
|
||||
|
||||
return item_id_results
|
||||
|
||||
class ShoppingApi(FindingApi):
|
||||
def get_item_from_findItemsByCategory(self, item_id_results):
|
||||
for twenty_id in item_id_results:
|
||||
params = {
|
||||
"callname":"GetMultipleItems",
|
||||
"appid":"scottbea-xlister-PRD-6796e0ff6-14862949",
|
||||
"version":"671",
|
||||
"responseencoding":"JSON",
|
||||
"ItemID":twenty_id, # you pass in a list? If not then maybe a comma-separated
|
||||
"IncludeSelector":"ItemSpecifics",
|
||||
}
|
||||
|
||||
response = requests.get("https://open.api.ebay.com/shopping?", params=params)
|
||||
data = response.json()
|
||||
|
||||
names = []
|
||||
values = []
|
||||
nvl = data['Item'][0]['ItemSpecifics']['NameValueList']
|
||||
|
||||
for nvl_dict in nvl:
|
||||
names.append(nvl_dict['Name'])
|
||||
values.append(nvl_dict['Value'])
|
||||
|
||||
nvl_dict = dict(zip(names, values))
|
||||
data.update(nvl_dict)
|
||||
df = pd.json_normalize(data)
|
||||
df.to_csv('big_data.csv')
|
||||
|
||||
# Limited to 5000 calls to shopping api per day, and getMultpileitems service maxes out at 20 items
|
||||
# per call leaving you 100,000 items per day for you pandas dataframe initially. So you'll have
|
||||
# to divide these up into the categories. This will leave you with about 6.25K results per cat.
|
||||
# More than enough data for your dataset. Consider
|
Loading…
Reference in New Issue
Block a user