From 1faa4e86fd626c744f54832759dbf4c5850265fb Mon Sep 17 00:00:00 2001
From: scott <spbeach46@gmail.com>
Date: Mon, 15 Nov 2021 21:01:02 -0700
Subject: [PATCH] duplicate id fixes. full data not getting fetched

---
 ebay_api.py | 73 ++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 47 insertions(+), 26 deletions(-)

diff --git a/ebay_api.py b/ebay_api.py
index 5b6183d..16aa3ac 100644
--- a/ebay_api.py
+++ b/ebay_api.py
@@ -20,13 +20,13 @@ class FindingApi:
     Methods for accessing eBay's FindingApi services
     '''
 
-    def __init__(self, service, pageNumber):
+    def __init__(self, service, target_idspc): #target ids per cat
         self.service = [
             'findItemsAdvanced', 'findCompletedItems',
             'findItemsByKeywords', 'findItemsIneBayStores', 'findItemsByCategory',
             'findItemsByProduct'
             ][service] # Currently using only index 4, i.e., service = 4
-        self.pageNumber = list(range(1, pageNumber)) # 77 pgs will give equal weights to cats given call constraints
+        self.target_idspc = target_idspc
 
     # examples of additional params you may want to add:
     # 'itemFilter(0).value':'Used' consider using this with findCompletedItems call
@@ -35,7 +35,7 @@ class FindingApi:
     # 'StartTimeNewest'
     # HideDuplicateItems
 
-    def get_data(self, category_id, i):
+    def get_data(self, category_id, target_idspc):
 
         '''
         Gets raw JSON data fom FindingApi service call. Currently being used to
@@ -47,6 +47,8 @@ class FindingApi:
         Also consider using the exlude duplicates param and possibly others.
         research ebay api docs to find cadidates
         '''
+        i = 1
+        ids = []
         params = {
             "OPERATION-NAME":self.service,
             "SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'],
@@ -63,16 +65,40 @@ class FindingApi:
             "itemFilter.value":'true'
             }
 
-        # TODO add try excepts here
-        try:
-            response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1",
-                params=params, timeout=4)
-            response.raise_for_status()
+        while len(ids) < target_idspc:
+            try:
+                response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1",
+                    params=params, timeout=7)
+                response.raise_for_status()
 
-        except requests.exceptions.RequestException:
-            print('connection error') #TODO DECIDE HOW TO HANDLE EXCEPTION
-        data = response.json()
-        return data
+            except requests.exceptions.RequestException: # appears this works need to be able to continue where you left off or use better timeout?
+                print('connection error') #TODO DECIDE HOW TO HANDLE EXCEPTION
+                return ids
+            data = response.json()
+
+            for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
+                if item not in ids:
+                    ids.append(item['itemId'][0])
+
+            ids = list(set(ids))
+            i += 1
+            params = {
+                "OPERATION-NAME":self.service,
+                "SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'],
+                "SERVICE-VERSION":"1.13.0",
+                "RESPONSE-DATA-FORMAT":"JSON",
+                "categoryId":category_id,
+                "paginationInput.entriesPerPage":"100",
+                "paginationInput.PageNumber":i,
+                "itemFilter(0).name":"Condition",
+                "itemFilter(0).value":"Used", # recommended is conditionId instead but for some reason that doesn't work either
+                # but may not be necessary anyways if you can eleminate dupes. TODO Still need to fix to work. Results are likely better than new items w/ shitty brands and pics
+                "sortOrder":"StartTimeNewest",
+                "itemFilter.name":"HideDuplicateItems", # this isn't working or is only working per page
+                "itemFilter.value":'true'
+                }
+
+        return ids
 
 # TODO add some other options to finding call api such as for possibly filtering for used items only. This might give you a better dataset for training. Or maybe a mixture of new and used. Maybe 
 # try and come up with a way to mathematically determine your odds of maximizing the number of pictures in your training set while reducing the number of useless images. Say for example, if you took a
@@ -87,27 +113,22 @@ class FindingApi:
         Creates a 20-itemId list to use for the ShoppingApi
         call
         '''
-        pages = self.pageNumber
+        target_idspc = self.target_idspc
+
         itemid_results_list = []
 
         with open('cat_list.txt') as jf:
             cat_list = json.load(jf)
 
-        args = []
-        for category_id in cat_list:
-
-            bargs = [(category_id, i) for i in pages]
-            args.extend(bargs)
+        args = [(cat, target_idspc) for cat in cat_list]
 
         with concurrent.futures.ThreadPoolExecutor() as executor:
             for future in executor.map(lambda p: self.get_data(*p), args):
-                data = future
+                itemid_results_list.extend(future)
 
-                for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
-                    if item not in itemid_results_list:
-                        itemid_results_list.append(item['itemId'][0])
+        with open('raw_ids.txt', 'w') as f:
+            json.dump(itemid_results_list, f)
 
-        item_id_results = list(set(itemid_results_list))
         item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0,
             len(itemid_results_list), 20))] # 20-ItemID list created to maximize dataset/decrease calls given call constraints
         return item_id_results
@@ -204,7 +225,7 @@ class ShoppingApi:
         service_dict
 
         fnd_srvc = input(str(service_dict) + "choose Finding call: (press 'enter' for default(4))")
-        pg_num = int(input('how many ids per cat? (7692 max)'))
+        target_idspc = int(input('how many ids per cat? (7692 max)'))
 
         optional_params = {
                 "itemFilter(0).name":"Condition",
@@ -213,10 +234,10 @@ class ShoppingApi:
 
         if fnd_srvc != '':
             fnd_srvc = int(fnd_srvc)
-            finding = FindingApi(fnd_srvc, pg_num)
+            finding = FindingApi(fnd_srvc, target_idspc)
         else:
             fnd_srvc = 4
-            finding = FindingApi(fnd_srvc, pg_num)
+            finding = FindingApi(fnd_srvc, target_idspc)
 
         item_id_results = finding.get_ids_from_cats()
         with concurrent.futures.ThreadPoolExecutor() as executor: