added notes and changed security_appname to var

This commit is contained in:
spbeach46 2021-01-30 03:08:10 -07:00
parent 72db161858
commit 4f46741a0f

View File

@ -4,6 +4,7 @@ import concurrent.futures
import json
import requests
import pandas as pd
import os
class FindingApi:
'''Methods for accessing eBays FindingApi services'''
@ -15,9 +16,15 @@ class FindingApi:
][service]
self.pageNumber = list(range(1, pageNumber)) # 64 pages is recommended
# as this will give equal weights to cats given call constraints
self.security_appname = os.environ.get('SECURITY-APPNAME')
# departments = ["3034","93427"] (womens and mens)
# examples of additional params you may want to add:
# 'itemFilter(0).value':'Used'
# 'itemFilter(1).name':'ListingType'
# 'itemFilter(1).value':'AuctionWithBIN'
def get_data(self, category_id, i):
'''
@ -27,7 +34,7 @@ class FindingApi:
params = {
"OPERATION-NAME":self.service,
"SECURITY-APPNAME":"scottbea-xlister-PRD-6796e0ff6-14862949",
"SECURITY-APPNAME":self.security_appname,
"SERVICE-VERSION":"1.13.0",
"RESPONSE-DATA-FORMAT":"JSON",
"categoryId":category_id,
@ -41,6 +48,13 @@ class FindingApi:
data = response.json()
return data
# TODO add some other options to finding call api such as for possibly filtering for used items only. This might give you a better dataset for training. Or maybe a mixture of new and used. Maybe
# try and come up with a way to mathematically determine your odds of maximizing the number of pictures in your training set while reducing the number of useless images. Say for example, if you took a
# random set of 3 of 8 pictures total from each listing you might have a better chance of getting 3 good pictures in addition to increasing your training set. Or maybe you would have better luck with limiting
# it to the first 5 pictures instead of random.
# You may even have more consistency with used shoes since they are "one-off" items without confusing multiple variations and colors. What else you can do is run small training sets on both new and used
# to see which one is more accurate or if a combo of both is more accurate.
def get_ids_from_cats(self):
'''
@ -77,6 +91,9 @@ class FindingApi:
len(itemid_results_list), 20))]
return item_id_results
# TODO during your try except conditionals just check the csv files. At the end you can create sets. You can creat another condition that says if the final set is smaller than 100k then you can call finding
# service on more pages (but only pages you haven't tried) and repeat the search process.
# TODO instead of running through multiple try except loops try to implement set methods for efficiency and ease. Remember symmetric_difference, difference, intersection, set()
# for category_id in cat_list:
@ -89,9 +106,10 @@ class ShoppingApi:
'''
Gets raw JSON data from multiple live listings given multiple itemIds
'''
finding = FindingApi(4,2)
params = {
"callname":"GetMultipleItems",
"appid":"scottbea-xlister-PRD-6796e0ff6-14862949",
"appid":finding.security_appname,
"version":"671",
"responseencoding":"JSON",
"ItemID":twenty_id,
@ -156,6 +174,9 @@ class CurateData:
nvl_training = pd.concat([pd.Series(training.PictureURL), nvl_df], axis=1)
return nvl_training
# TODO Still need to to extract strings from list of strings and then drop which ones you don't want or vice versa. You may have to avoid using cells with lists of strings longer than one (e.g., 'Features')
# TODO Also need to expand photo list from PictureURL. Decide how many or which photos to use. You may even want to use a pretrained model to decide whether or not the photos are of shoes or not to filter#
# it might be that only the first picture is reliable enough to use in the dataset.
# TODO also need to decide which features are going to be relevant. For example, is color really necessary for finding features? is it necessary to train your model on this or can you find color an easier way?
def update_df(self, data):
'''
Creates training instances for dataset. picture_url_list expanded to
@ -163,6 +184,7 @@ class CurateData:
in common with same listing (i.e., because there are multiple pictures
per listing, each picture will be its own training instance.
'''
pass
# Ultimately you need each record to be one picture url as input and relevant columns determined from custom nvl_dicts. You can figure out how you need to address the multiple values in the lists when you make the df just before the final df (this one may include the multiple pictures from each list in the original records. This should be your next step).