replaced FindingApi for easy refining of cat search

2021-12-31 15:30:06 -07:00
parent 5ec46ae0c7
commit 6650468756
1 changed files with 134 additions and 0 deletions
--- a/ebay_api.py
+++ b/ebay_api.py
@@ -22,6 +22,140 @@ from ebaysdk.trading import Connection as Trading
 from ebaysdk.finding import Connection as Finding
 from ebaysdk.shopping import Connection as Shopping

+class FindingApi:
+    '''
+    Methods for accessing eBay's FindingApi services
+    '''
+
+    def __init__(self, service, idspc):
+        self.service = [
+            'findItemsAdvanced', 'findCompletedItems',
+            'findItemsByKeywords', 'findItemsIneBayStores', 'findItemsByCategory',
+            'findItemsByProduct'
+            ][service] # Currently using only index 4, i.e., service = 4
+        self.idspc = idspc    # examples of additional params you may want to add:
+    # 'itemFilter(0).value':'Used' consider using this with findCompletedItems call
+    # 'itemFilter(1).name':'ListingType'
+    # 'itemFilter(1).value':'AuctionWithBIN'
+    # 'StartTimeNewest'
+    # HideDuplicateItems
+
+    def get_data(self, category_id, idspc):
+
+        '''
+        Gets raw JSON data fom FindingApi service call. Currently being used to
+        get itemIDs from categories;
+        '''
+#        startTime = dateutil.parser.isoparse( startTime )
+#        now = datetime.datetime.now(tz=pytz.UTC)
+#        days_on_site = (now - startTime).days # as int
+
+        ids = []
+        modTimeFrom = datetime.now() - timedelta(seconds=5) # initialize modTimeFrom value
+        i = 1
+        params = {
+            "OPERATION-NAME":self.service,
+            "SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'],
+            "SERVICE-VERSION":"1.13.0",
+            "RESPONSE-DATA-FORMAT":"JSON",
+            "categoryId":category_id,
+            "paginationInput.entriesPerPage":"20",
+            "paginationInput.PageNumber":i,
+            "itemFilter(0).name":"Condition",
+            "itemFilter(0).value":"Used",
+            "itemFilter.name":"HideDuplicateItems",
+            "itemFilter.value":"true",
+            "sortOrder":"StartTimeNewest",
+            }
+
+#            "itemFilter.name(2)":"modTimeFrom",
+#            "itemFilter.value(2)":modTimeFrom,
+
+        while len(ids) < idspc:
+
+            try:
+                response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1",
+                    params=params, timeout=24)
+                response.raise_for_status()
+
+            except requests.exceptions.RequestException: # appears this works need to be able to continue where you left off or use better timeout?
+                print('connection error')
+                return ids
+            try:
+                data = response.json()
+                itemSearchURL = data['findItemsByCategoryResponse'][0]['itemSearchURL'][0]
+                modTimeFrom = data['findItemsByCategoryResponse'][0]['searchResult'][0]['item'][-1]['listingInfo'][0]['startTime'][0]
+                modTimeFrom = dateutil.parser.isoparse( modTimeFrom )
+                modTimeFrom = modTimeFrom - timedelta(seconds=5) # TODO NEED BACK TO GMT FORMAT
+                for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
+#                if item not in ids:
+                    ids.append(item['itemId'][0])
+
+                #ids = list(set(ids))
+
+            except (AttributeError, KeyError):
+                print('AttributeError or KeyError. Exiting')
+                print(response.json())
+                return ids
+
+            input('press enter to continue')
+            i+=1
+            params = {
+                "OPERATION-NAME":self.service,
+                "SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'],
+                "SERVICE-VERSION":"1.13.0",
+                "RESPONSE-DATA-FORMAT":"JSON",
+                "categoryId":category_id,
+                "paginationInput.entriesPerPage":"20",
+                "paginationInput.PageNumber":i,
+                "itemFilter(0).name":"Condition",
+                "itemFilter(0).value":"Used",
+                "itemFilter.name":"HideDuplicateItems",
+                "itemFilter.value":"true",
+                "sortOrder":"StartTimeNewest",
+                }
+
+        return ids, data, modTimeFrom, itemSearchURL 
+
+# TODO add some other options to finding call api such as for possibly filtering for used items only. This might give you a better dataset for training. Or maybe a mixture of new and used. Maybe 
+# try and come up with a way to mathematically determine your odds of maximizing the number of pictures in your training set while reducing the number of useless images. Say for example, if you took a
+# random set of 3 of 8 pictures total from each listing you might have a better chance of getting 3 good pictures in addition to increasing your training set. Or maybe you would have better luck with limiting
+# it to the first 5 pictures instead of random. 
+
+# You may even have more consistency with used shoes since they are "one-off" items without confusing multiple variations and colors. What else you can do is run small training sets on both new and used
+# to see which one is more accurate or if a combo of both is more accurate. 
+
+    def get_ids_from_cats(self): #TODO need to resolve duplicates here to maximize unique ids/data and ShopppingApi call
+        '''
+        Creates a 20-itemId list to use for the ShoppingApi
+        call
+        '''
+#        target_idspc = self.target_idspc
+        idspc = self.idspc
+
+        itemid_results_list = []
+
+        with open('cat_list.txt') as jf:
+            cat_list = json.load(jf)
+
+        for cat in cat_list:
+            args = [(cat, idspc) for cat in cat_list]
+
+            with concurrent.futures.ThreadPoolExecutor() as executor:
+                for future in executor.map(lambda p: self.get_data(*p), args):
+                    itemid_results_list.extend(future)
+
+            print(len(itemid_results_list))
+            a = list(set(itemid_results_list))
+            print(len(a))
+            input('press enter to continue')
+
+        with open('raw_ids.txt', 'w') as f:
+            json.dump(itemid_results_list, f)
+
+        item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0,
+            len(itemid_results_list), 20))] # 20-ItemID list created to maximize dataset/decrease calls given call constraints
+        return item_id_results, itemid_results_list

 class ShoppingApi:
    '''