From fd6fd04ecd21c32299d8866974ea6a0fcd16a2a1 Mon Sep 17 00:00:00 2001
From: spbeach46 <spbeacg46@gmail.com>
Date: Fri, 22 Jan 2021 23:21:56 -0700
Subject: [PATCH] attempting curate by starting with pandas df first

---
 ebay_api.py | 38 +++++++++++++++++++++++---------------
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/ebay_api.py b/ebay_api.py
index 92d394c..2ceeffb 100644
--- a/ebay_api.py
+++ b/ebay_api.py
@@ -1,3 +1,5 @@
+import importlib
+import numpy as np
 import concurrent.futures
 import json
 import requests
@@ -117,7 +119,7 @@ class ShoppingApi:
                 # data.update(future)
         return data # TODO each future is a list of dictionaries because the output of any multithreader in this method is a list. 
     # data dictionary can't update from list of dicts unless iterated over. Might need a different way to update. 
-
+# TODO It seems like the problem with updating the dictionary/csv file is starting here possibly; I think the item data is getting appended out of order from the item itself. 
 class CurateData:
     '''
     Contains functions for curating data for machine learning training sets;
@@ -169,21 +171,27 @@ class CurateData:
             '''
             Creates same training instance per photo for
             '''
-            for url in picture_url_list:
-                remote_url = {'PictureURL':url}
-                training.update(remote_url)
-                item_id = self.extract_itemId(item)
-                training.update(item_id)
-                catId = self.extract_catId(item)
-                training.update(catId)
-                prime_cat_name = self.extract_prime_cat_name(item)
-                training.update(prime_cat_name)
-                nvl_dict = self.extract_nvl(item)
-                training.update(nvl_dict)
+           # for url in picture_url_list: # maybe try removing for loop to see if csv updates correctly here
+           # remote_url = {'PictureURL':url}
+           # training.update(remote_url)
+            item_id = self.extract_itemId(item)
+            training.update(item_id)
+            catId = self.extract_catId(item)
+            training.update(catId)
+            prime_cat_name = self.extract_prime_cat_name(item)
+            training.update(prime_cat_name)
+            nvl_dict = self.extract_nvl(item)
+            training.update(nvl_dict)
+
+        df = pd.json_normalize(training) # TODO FIX INDENT HERE?
+        #df.to_csv('training.csv', mode='a')
+        print(training) # after looking at the training output it looks like everything might be out of order due possibly to multithreading issues. Due to this you may have to use a more finegrained
+        # multithreading module
+    def data_frame(self, data):
+        to_json = json.dumps(data)
+        raw_df = pd.read_json(to_json)
+        return raw_df
 
-                df = pd.json_normalize(training) # TODO FIX INDENT HERE?
-                df.to_csv('training.csv', mode='a')
-        return training
 
 def main():
     '''