This commit is contained in:
spbeach46 2020-12-12 18:56:51 -07:00
parent bcb11de855
commit d9781579cd

View File

@ -16,13 +16,17 @@ class FindingApi:
# departments = ["3034","93427"] (womens and mens)
def get_data(self, i):
def get_data(self, i): # TODO your going to have to use nested functions of lambda functions here somewhere
'''
Gets raw JSON data fom FindingApi service call
Currently being used to get itemIDs from categories
'''
with open('cat_list.txt') as jf:
cat_list = json.load(jf)
for category_id in cat_list:
params = {
"OPERATION-NAME":self.service,
"SECURITY-APPNAME":"scottbea-xlister-PRD-6796e0ff6-14862949",
@ -45,11 +49,13 @@ class FindingApi:
with open('cat_list.txt') as jf:
cat_list = json.load(jf)
pages = self.pageNumber
itemid_results_list = []
# TODO instead of running through multiple try except loops try to implement set methods for efficiency and ease. Remember symmetric_difference, difference, intersection, set()
for category_id in cat_list:
with concurrent.futures.ThreadPoolExecutor() as executor:
for future in executor.map(self.get_data(), self.pageNumber):
for future in executor.map(self.get_data, pages):
data = future
try:
@ -88,17 +94,22 @@ class ShoppingApi:
response = requests.get("https://open.api.ebay.com/shopping?", params=params)
response = response.json()
response = response['Item']
return response
def conky(self):
'''
For some reason item_id_results can only be passed as argument in executor.map
if the variable is made within function
'''
data = {}
finding = FindingApi(4, 2)
item_id_results = finding.get_ids_from_cats()
with concurrent.futures.ThreadPoolExecutor() as executor:
for future in executor.map(self.get_item_from_findItemsByCategory(), item_id_results):
for future in executor.map(self.get_item_from_findItemsByCategory, item_id_results):
data.update(future)
return data
# TODO the structure of data as is (ie, as an updated dict) means CurateData class methods are going to have trouble running
class CurateData:
'''
Contains functions for curating data for machine learning training sets;
@ -142,12 +153,11 @@ class CurateData:
in common with same listing (i.e., because there are multiple pictures
per listing, each picture will be its own training instance.
'''
for item in data['item']:
training = {} # TODO something funky going on here
# NEED TO CREATE EMPTY DICT OUTSIDE OF FOR LOOP?
training = {}
for item in data:
# TODO MAY HAVE TO DISCARD THIS IDEA DUE TO CRAPPY PICTURES OF CLOSEUPDS AND TAGS. may have to settle for first picture which is likely to contain more accurate representation of item.
picture_url_list = self.extract_picture_url(item)
'''
Creates same training instance per photo for
'''
@ -174,9 +184,9 @@ def main():
service, pageNumber = input('service and pageNumber:').split()
service = int(service)
pageNumber = int(pageNumber)
finding = FindingApi(service, pageNumber)
# finding = FindingApi(service, pageNumber)
item_id_results = finding.get_ids_from_cats()
# item_id_results = finding.get_ids_from_cats()
shopping = ShoppingApi()
data = shopping.conky()
curate = CurateData()
@ -197,3 +207,4 @@ if __name__ == "__main__":
# TODO NEED TO ADD TRY EXCEPT CONDITIONS FOR EVERY CALL MADE TO API SERVICES TO
# TO AVOID HICCUPS WHEN CREATING DATASET
# TODO YOU WILL HAVE TO FIND A WAY OF COLLECTING DATA FOR IMAGES OF TAGS EITHER USING YOUR OWN TAGS OR SOMEHOW FIND A WAY TO FIND TAGS ON OTHERS LISTINGS. CRUCIAL FOR THE LISTINGS PROCESS. May be as simple as adding a def to one of the apis to extract only the picture if it can identify what a tag looks like. So, it may actually be a good thing to include all the pictures in a training set but then when you're ready to begin training you'll have a data cleaning pipeline specific to training a model to either learn shoe features or information on tags.