modTimeFrom w dateutil datetime timedelta parser integration

2021-11-21 16:05:20 -07:00
parent 7cee062b78
commit eb780faf40
1 changed files with 59 additions and 45 deletions
--- a/ebay_api.py
+++ b/ebay_api.py
@@ -1,4 +1,8 @@
 import os
+from datetime import datetime, timedelta
+import dateutil
+from dateutil import parser
+import pytz
 import pdb
 from io import StringIO
 import numpy as np
@@ -21,56 +25,53 @@ class FindingApi:
    Methods for accessing eBay's FindingApi services
    '''

-    def __init__(self, service, target_idspc): #target ids per cat
+    def __init__(self, service, idspc):
        self.service = [
            'findItemsAdvanced', 'findCompletedItems',
            'findItemsByKeywords', 'findItemsIneBayStores', 'findItemsByCategory',
            'findItemsByProduct'
            ][service] # Currently using only index 4, i.e., service = 4
-        self.target_idspc = target_idspc
-
-    # examples of additional params you may want to add:
+        self.idspc = idspc    # examples of additional params you may want to add:
    # 'itemFilter(0).value':'Used' consider using this with findCompletedItems call
    # 'itemFilter(1).name':'ListingType'
    # 'itemFilter(1).value':'AuctionWithBIN'
    # 'StartTimeNewest'
    # HideDuplicateItems

-    def get_data(self, category_id, target_idspc):
+    def get_data(self, category_id, idspc):

        '''
        Gets raw JSON data fom FindingApi service call. Currently being used to
        get itemIDs from categories;
        '''
+#        startTime = dateutil.parser.isoparse( startTime )
+#        now = datetime.datetime.now(tz=pytz.UTC)
+#        days_on_site = (now - startTime).days # as int

-        '''
-        consider using the sortOrder param to update by the latest listings first.
-        Also consider using the exlude duplicates param and possibly others.
-        research ebay api docs to find cadidates
-        '''
-        i = 1
        ids = []
+        modTimeFrom = datetime.now() - timedelta(seconds=5) # initialize modTimeFrom value
+        i = 1
        params = {
            "OPERATION-NAME":self.service,
            "SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'],
            "SERVICE-VERSION":"1.13.0",
            "RESPONSE-DATA-FORMAT":"JSON",
            "categoryId":category_id,
-            "paginationInput.entriesPerPage":"100",
+            "paginationInput.entriesPerPage":"20",
            "paginationInput.PageNumber":i,
            "itemFilter(0).name":"Condition",
-            "itemFilter(0).value":"Used", # recommended is conditionId instead but for some reason that doesn't work either
-            # but may not be necessary anyways if you can eleminate dupes. TODO Still need to fix to work. Results are likely better than new items w/ shitty brands and pics
+            "itemFilter(0).value":"Used",
+            "itemFilter.name":"HideDuplicateItems",
+            "itemFilter.value":"true",
            "sortOrder":"StartTimeNewest",
-            "itemFilter.name":"HideDuplicateItems", # this isn't working or is only working per page
-            "itemfilter.value":'true'
            }

-        while len(ids) < target_idspc: # target_ids(per category)
+#            "itemFilter.name(2)":"modTimeFrom",
+#            "itemFilter.value(2)":modTimeFrom,
+
+        while len(ids) < idspc:
+
            try:
-                print(len(ids))
-                print(params['paginationInput.PageNumber'])
-                input('press enter to continue')
                response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1",
                    params=params, timeout=24)
                response.raise_for_status()
@@ -80,33 +81,39 @@ class FindingApi:
                return ids
            try:
                data = response.json()
-
+                itemSearchURL = data['findItemsByCategoryResponse'][0]['itemSearchURL'][0]
+                modTimeFrom = data['findItemsByCategoryResponse'][0]['searchResult'][0]['item'][-1]['listingInfo'][0]['startTime'][0]
+                modTimeFrom = dateutil.parser.isoparse( modTimeFrom )
+                modTimeFrom = modTimeFrom - timedelta(seconds=5) # TODO NEED BACK TO GMT FORMAT
                for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
 #                if item not in ids:
                    ids.append(item['itemId'][0])

-#                ids = list(set(ids))
-                i += 1
-                params = {
-                    "OPERATION-NAME":self.service,
-                    "SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'],
-                    "SERVICE-VERSION":"1.13.0",
-                    "RESPONSE-DATA-FORMAT":"JSON",
-                    "categoryId":category_id,
-                    "paginationInput.entriesPerPage":"100",
-                    "paginationInput.PageNumber":i,
-                    "itemFilter(0).name":"Condition",
-                    "itemFilter(0).value":"Used", # recommended is conditionId instead but for some reason that doesn't work either
-                    # but may not be necessary anyways if you can eleminate dupes. TODO Still need to fix to work. Results are likely better than new items w/ shitty brands and pics
-                    "sortOrder":"StartTimeNewest",
-                    "itemFilter.name":"HideDuplicateItems", # this isn't working or is only working per page
-                    "itemfilter.value":'true'
-                    }
+                #ids = list(set(ids))
+
            except (AttributeError, KeyError):
                print('AttributeError or KeyError. Exiting')
+                print(response.json())
                return ids

-        return ids, data
+            input('press enter to continue')
+            i+=1
+            params = {
+                "OPERATION-NAME":self.service,
+                "SECURITY-APPNAME":cfg.sec['SECURITY-APPNAME'],
+                "SERVICE-VERSION":"1.13.0",
+                "RESPONSE-DATA-FORMAT":"JSON",
+                "categoryId":category_id,
+                "paginationInput.entriesPerPage":"20",
+                "paginationInput.PageNumber":i,
+                "itemFilter(0).name":"Condition",
+                "itemFilter(0).value":"Used",
+                "itemFilter.name":"HideDuplicateItems",
+                "itemFilter.value":"true",
+                "sortOrder":"StartTimeNewest",
+                }
+
+        return ids, data, modTimeFrom, itemSearchURL 

 # TODO add some other options to finding call api such as for possibly filtering for used items only. This might give you a better dataset for training. Or maybe a mixture of new and used. Maybe 
 # try and come up with a way to mathematically determine your odds of maximizing the number of pictures in your training set while reducing the number of useless images. Say for example, if you took a
@@ -121,25 +128,32 @@ class FindingApi:
        Creates a 20-itemId list to use for the ShoppingApi
        call
        '''
-        target_idspc = self.target_idspc
+#        target_idspc = self.target_idspc
+        idspc = self.idspc

        itemid_results_list = []

        with open('cat_list.txt') as jf:
            cat_list = json.load(jf)

-        args = [(cat, target_idspc) for cat in cat_list]
+        for cat in cat_list:
+            args = [(cat, idspc) for cat in cat_list]

-        with concurrent.futures.ThreadPoolExecutor() as executor:
-            for future in executor.map(lambda p: self.get_data(*p), args):
-                itemid_results_list.extend(future)
+            with concurrent.futures.ThreadPoolExecutor() as executor:
+                for future in executor.map(lambda p: self.get_data(*p), args):
+                    itemid_results_list.extend(future)
+
+            print(len(itemid_results_list))
+            a = list(set(itemid_results_list))
+            print(len(a))
+            input('press enter to continue')

        with open('raw_ids.txt', 'w') as f:
            json.dump(itemid_results_list, f)

        item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0,
            len(itemid_results_list), 20))] # 20-ItemID list created to maximize dataset/decrease calls given call constraints
-        return item_id_results
+        return item_id_results, itemid_results_list

 # TODO during your try except conditionals just check the csv files. At the end you can create sets. You can creat another condition that says if the final set is smaller than 100k then you can call finding
 # service on more pages (but only pages you haven't tried) and repeat the search process.