attempting curate by starting with pandas df first

2021-01-22 23:21:56 -07:00
parent b4e1961ace
commit fd6fd04ecd
1 changed files with 23 additions and 15 deletions
--- a/ebay_api.py
+++ b/ebay_api.py
@@ -1,3 +1,5 @@
+import importlib
+import numpy as np
 import concurrent.futures
 import json
 import requests
@@ -117,7 +119,7 @@ class ShoppingApi:
                # data.update(future)
        return data # TODO each future is a list of dictionaries because the output of any multithreader in this method is a list. 
    # data dictionary can't update from list of dicts unless iterated over. Might need a different way to update. 
-
+# TODO It seems like the problem with updating the dictionary/csv file is starting here possibly; I think the item data is getting appended out of order from the item itself. 
 class CurateData:
    '''
    Contains functions for curating data for machine learning training sets;
@@ -169,21 +171,27 @@ class CurateData:
            '''
            Creates same training instance per photo for
            '''
-            for url in picture_url_list:
-                remote_url = {'PictureURL':url}
-                training.update(remote_url)
-                item_id = self.extract_itemId(item)
-                training.update(item_id)
-                catId = self.extract_catId(item)
-                training.update(catId)
-                prime_cat_name = self.extract_prime_cat_name(item)
-                training.update(prime_cat_name)
-                nvl_dict = self.extract_nvl(item)
-                training.update(nvl_dict)
+           # for url in picture_url_list: # maybe try removing for loop to see if csv updates correctly here
+           # remote_url = {'PictureURL':url}
+           # training.update(remote_url)
+            item_id = self.extract_itemId(item)
+            training.update(item_id)
+            catId = self.extract_catId(item)
+            training.update(catId)
+            prime_cat_name = self.extract_prime_cat_name(item)
+            training.update(prime_cat_name)
+            nvl_dict = self.extract_nvl(item)
+            training.update(nvl_dict)
+
+        df = pd.json_normalize(training) # TODO FIX INDENT HERE?
+        #df.to_csv('training.csv', mode='a')
+        print(training) # after looking at the training output it looks like everything might be out of order due possibly to multithreading issues. Due to this you may have to use a more finegrained
+        # multithreading module
+    def data_frame(self, data):
+        to_json = json.dumps(data)
+        raw_df = pd.read_json(to_json)
+        return raw_df

-                df = pd.json_normalize(training) # TODO FIX INDENT HERE?
-                df.to_csv('training.csv', mode='a')
-        return training

 def main():
    '''