2020-10-12 07:53:29 +00:00
import json
import requests
import pandas as pd
class FindingApi :
''' Some docstring to get rid of linting errors '''
2020-10-12 18:48:15 +00:00
def __init__ ( self , service , pageNumber ) :
2020-10-12 07:53:29 +00:00
self . service = [
' findItemsAdvanced ' , ' findCompletedItems ' ,
' findItemsByKeywords ' , ' findItemsIneBayStores ' , ' findItemsByCategory ' ,
' findItemsByProduct '
2020-10-12 18:48:15 +00:00
] [ service ]
self . pageNumber = list ( range ( 1 , pageNumber ) ) # 64 pages is recommended
# this will give equal weights to cats given call restraints
2020-10-12 07:53:29 +00:00
# departments = ["3034","93427"] (womens and mens)
2020-10-13 00:55:07 +00:00
def get_data ( self ) :
2020-10-18 00:22:45 +00:00
''' # Gets raw JSON data fom FindingApi service call
'''
with open ( ' cat_list.txt ' ) as jf :
cat_list = json . load ( jf )
2020-10-13 00:55:07 +00:00
for category_id in cat_list :
for i in self . pageNumber :
params = {
2020-10-13 01:42:57 +00:00
" OPERATION-NAME " : self . service ,
2020-10-13 00:55:07 +00:00
" SECURITY-APPNAME " : " scottbea-xlister-PRD-6796e0ff6-14862949 " ,
" SERVICE-VERSION " : " 1.13.0 " ,
" RESPONSE-DATA-FORMAT " : " JSON " ,
" categoryId " : category_id ,
" paginationInput.entriesPerPage " : " 100 " ,
2020-10-13 01:42:57 +00:00
" paginationInput.PageNumber " : i
2020-10-13 00:55:07 +00:00
}
response = requests . get ( " https://svcs.ebay.com/services/search/FindingService/v1 " ,
params = params )
data = response . json ( )
2020-10-18 20:56:16 +00:00
return data
2020-10-12 07:53:29 +00:00
def get_ids_from_cats ( self ) :
2020-10-12 18:48:15 +00:00
'''
2020-10-18 00:22:45 +00:00
Creates a 20 - itemId list to use for the ShoppingApi
2020-10-12 18:48:15 +00:00
call
'''
2020-10-17 23:21:11 +00:00
data = self . get_data ( )
2020-10-12 07:53:29 +00:00
itemid_results_list = [ ]
2020-10-18 22:32:17 +00:00
try : # TODO run pdb here to see how to extract itemId before update_df
2020-10-18 20:56:16 +00:00
training = pd . read_csv ( ' training.csv ' )
2020-10-18 00:22:45 +00:00
for item in data [ ' findItemsByCategoryResponse ' ] [ 0 ] [ ' searchResult ' ] [ 0 ] [ ' item ' ] :
2020-10-24 10:36:31 +00:00
if item not in training . values not in itemid_results_list : # TODO need to figure out syntax to check if item_id is present in both list and training set. (list to avoid repeat results)
2020-10-18 00:22:45 +00:00
itemid_results_list . append ( item [ ' itemId ' ] [ 0 ] ) # itemId
# values are in lists for some reason
2020-10-18 22:32:17 +00:00
except ( pd . errors . EmptyDataError , FileNotFoundError ) :
2020-10-18 00:22:45 +00:00
for item in data [ ' findItemsByCategoryResponse ' ] [ 0 ] [ ' searchResult ' ] [ 0 ] [ ' item ' ] :
2020-10-24 10:36:31 +00:00
if item not in itemid_results_list :
itemid_results_list . append ( item [ ' itemId ' ] [ 0 ] )
2020-10-12 07:53:29 +00:00
item_id_results = [ ' , ' . join ( itemid_results_list [ n : n + 20 ] ) for n in list ( range ( 0 ,
len ( itemid_results_list ) , 20 ) ) ]
return item_id_results
2020-10-18 07:08:04 +00:00
class ShoppingApi :
2020-10-12 07:53:29 +00:00
'''
2020-10-12 18:48:15 +00:00
Creates objects from ShoppingApi service calls that can interact with
pandas dataframes
2020-10-12 07:53:29 +00:00
'''
def get_item_from_findItemsByCategory ( self , item_id_results ) :
2020-10-18 00:22:45 +00:00
'''
2020-10-18 20:56:16 +00:00
Gets raw JSON data from multiple live listings given multiple itemIds
2020-10-18 00:22:45 +00:00
'''
2020-10-12 07:53:29 +00:00
for twenty_id in item_id_results :
params = {
" callname " : " GetMultipleItems " ,
" appid " : " scottbea-xlister-PRD-6796e0ff6-14862949 " ,
" version " : " 671 " ,
" responseencoding " : " JSON " ,
" ItemID " : twenty_id ,
" IncludeSelector " : " ItemSpecifics " ,
}
response = requests . get ( " https://open.api.ebay.com/shopping? " , params = params )
data = response . json ( )
2020-10-18 22:32:17 +00:00
return data # TODO save data as file??
2020-10-18 20:56:16 +00:00
2020-10-18 00:22:45 +00:00
class CurateData :
'''
Contains functions for curating data for machine learning training sets
'''
def update_df ( self , data ) :
2020-10-18 22:32:17 +00:00
'''
Extracts itemIds and name - value list , creates new dict and appends df
'''
2020-10-24 10:36:31 +00:00
for item in data :
names = [ ]
values = [ ]
nvl = data [ ' Item ' ] [ 0 ] [ ' ItemSpecifics ' ] [ ' NameValueList ' ]
for nvl_dict in nvl :
names . append ( nvl_dict [ ' Name ' ] )
values . append ( nvl_dict [ ' Value ' ] )
# TODO Also append itemId and value to the dictionary somewhere
2020-10-12 07:53:29 +00:00
2020-10-24 10:36:31 +00:00
nvl_dict = dict ( zip ( names , values ) )
data . update ( nvl_dict )
2020-10-18 00:22:45 +00:00
df = pd . json_normalize ( data )
2020-10-18 20:56:16 +00:00
df . to_csv ( ' training.csv ' , mode = ' a ' )
2020-10-12 07:53:29 +00:00
2020-10-18 00:22:45 +00:00
def main ( ) :
'''
Main program creates / updates a csv file to use for ML training from live
ebay listings
'''
service , pageNumber = input ( ' service and pageNumber: ' ) . split ( )
2020-10-18 22:32:17 +00:00
service = int ( service )
pageNumber = int ( pageNumber )
2020-10-18 00:22:45 +00:00
finding = FindingApi ( service , pageNumber )
item_id_results = finding . get_ids_from_cats ( )
shopping = ShoppingApi ( )
data = shopping . get_item_from_findItemsByCategory ( item_id_results )
curate = CurateData ( )
curate . update_df ( data )
2020-10-12 07:53:29 +00:00
2020-10-18 00:22:45 +00:00
if __name__ == " __main__ " :
main ( )
2020-10-12 07:53:29 +00:00
# Limited to 5000 calls to shopping api per day, and getMultpileitems service maxes out at 20 items
# per call leaving you 100,000 items per day for you pandas dataframe initially. So you'll have
# to divide these up into the categories. This will leave you with about 6.25K results per cat.
# More than enough data for your dataset.
2020-10-17 23:21:11 +00:00
# Need to make sure dataframe gets important stuff outside of nvl in order to
# access values for cross referencing itemIds from calls
# Need to decide if list gets accessed from df or if you're just going to have
# list contents extracted and possibly placed into separate cells/labels