changed data var returned in shoppingapi to data['Item']. Vice versa in update_data func. This is so data.update is correct

2020-11-12 14:46:37 -07:00
parent 97c5900a5b
commit bcb11de855
3 changed files with 3 additions and 124 deletions
--- a/ebay_api.py
+++ b/ebay_api.py
@@ -95,9 +95,10 @@ class ShoppingApi:
        with concurrent.futures.ThreadPoolExecutor() as executor:
            for future in executor.map(self.get_item_from_findItemsByCategory(), item_id_results):
                data.update(future)
-        return data # TODO save data as file??
+        return data
+
+# TODO the structure of data as is (ie, as an updated dict) means CurateData class methods are going to have trouble running

-            # TODO CONSIDER IMPLEMENTING MULTITHREADING AROUND HERE TOO. 
 class CurateData:
    '''
    Contains functions for curating data for machine learning training sets;
--- a/finding_api.py
+++ b/finding_api.py
@@ -1,43 +0,0 @@
-import requests
-import json
-from bs4 import BeautifulSoup as b
-import pandas as p
-
-# keywords = input('keyword search: ')
-
-with open('cat_list.txt') as jf:
-    cat_list = json.load(jf)
-finding_service = ['findItemsAdvanced', 'findCompletedItems', 'findItemsByKeywords', 'findItemsIneBayStores', 'findItemsByCategory', 'findItemsByProduct']
-
-pageNumber = list(range(1, 63))
-
-# departments = ["3034","93427"]
-
-def get_ids():
-    itemid_results_list = []
-    for categoryID in cat_list[0:2]:
-        params = {
-            "OPERATION-NAME":finding_service[4],
-            "SECURITY-APPNAME":"scottbea-xlister-PRD-6796e0ff6-14862949",
-            "SERVICE-VERSION":"1.13.0",
-            "RESPONSE-DATA-FORMAT":"JSON",
-            "categoryId":categoryID ,
-            "paginationInput.entriesPerPage":"100",
-            "paginationInput.PageNumber":pageNumber[0]
-            }
-        # extract item id here for piping into shopping_test.py
-
-        response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1", params=params)
-        data = response.json()
-        pretty_data = json.dumps(data, indent=2)
-        return data
-# can use pandas.json_normalize(custom dict cobbled from respons.json())
-
-
-# Additional problem you will run into when getting labeled data is shoe types and features not in features, accents, styles, categories or subcategories.
-
-# also limited to 5000 calls per day. This leaves you with 500k listings
-
-# If you want to split up each cat equally with their respective maxes then use 62 pages with 100
-# Entries per page. At this amount you'll have the max number of calls you can make on the 
-# shopping api. 
--- a/shopping_api.py
+++ b/shopping_api.py
@@ -1,79 +0,0 @@
-import json
-import requests
-import pandas as pd
-
-# OPEN CSV AS VARIALBE RIGHT HERE
-with open('cat_list.txt') as jf:
-    cat_list = json.load(jf)
-
-big_data = pd.read_csv('big_data.csv')
-
-class FindingApi:
-    '''Some docstring to get rid of linting errors'''
-    def __init__(self):
-        self.service = [
-            'findItemsAdvanced', 'findCompletedItems',
-            'findItemsByKeywords', 'findItemsIneBayStores', 'findItemsByCategory',
-            'findItemsByProduct'
-            ]
-        self.pageNumber = list(range(1, 63))
-
-    # departments = ["3034","93427"] (womens and mens)
-
-    def get_ids_from_cats(self):
-        '''Stop bothering me for docstrings.'''
-        itemid_results_list = []
-        for category_id in cat_list:
-            for i in self.pageNumber:
-                params = {
-                    "OPERATION-NAME":self.service[4],
-                    "SECURITY-APPNAME":"scottbea-xlister-PRD-6796e0ff6-14862949",
-                    "SERVICE-VERSION":"1.13.0",
-                    "RESPONSE-DATA-FORMAT":"JSON",
-                    "categoryId":category_id,
-                    "paginationInput.entriesPerPage":"100",
-                    "paginationInput.PageNumber":self.pageNumber[i]
-                    }
-                response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1",
-                        params=params)
-                data = response.json()
-                for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
-                    if item not in big_data.values:
-                        itemid_results_list.append(item['itemId'])
-        item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0,
-            len(itemid_results_list), 20))]
-
-        return item_id_results
-
-class ShoppingApi(FindingApi):
-    def get_item_from_findItemsByCategory(self, item_id_results):
-        for twenty_id in item_id_results:
-            params = {
-                "callname":"GetMultipleItems",
-                "appid":"scottbea-xlister-PRD-6796e0ff6-14862949",
-                "version":"671",
-                "responseencoding":"JSON",
-                "ItemID":twenty_id, # you pass in a list? If not then maybe a comma-separated
-                "IncludeSelector":"ItemSpecifics",
-                }
-
-            response = requests.get("https://open.api.ebay.com/shopping?", params=params)
-            data = response.json()
-
-            names = []
-            values = []
-            nvl = data['Item'][0]['ItemSpecifics']['NameValueList']
-
-            for nvl_dict in nvl:
-                names.append(nvl_dict['Name'])
-                values.append(nvl_dict['Value'])
-
-            nvl_dict = dict(zip(names, values))
-            data.update(nvl_dict)
-            df = pd.json_normalize(data)
-            df.to_csv('big_data.csv')
-
-# Limited to 5000 calls to shopping api per day, and getMultpileitems service maxes out at 20 items
-# per call leaving you 100,000 items per day for you pandas dataframe initially. So you'll have
-# to divide these up into the categories. This will leave you with about 6.25K results per cat.
-# More than enough data for your dataset. Consider