From 167f1f29ec3d120f3ed2102bd39b936dad649656 Mon Sep 17 00:00:00 2001
From: spbeach46 <spbeacg46@gmail.com>
Date: Fri, 2 Apr 2021 23:42:31 -0700
Subject: [PATCH] added download fix

---
 curate.py   | 10 +++++++---
 ebay_api.py | 25 ++++++++++++++++++++++---
 2 files changed, 29 insertions(+), 6 deletions(-)
diff --git a/curate.py b/curate.py
index 5cff803..655dbd4 100644
--- a/curate.py
+++ b/curate.py
@@ -11,8 +11,9 @@ training = curate.to_training(raw_data) # NOTE have to reference PictureURL list
 # or use dropd.PictureURL.split(' ')
 class_training = curate.class_training(training)
 nvl_training = curate.nvl_training(training)
-extracted_df = curate.extract_contents(nvl_training)
-dropd = curate.drop_nvl_cols(extracted_df)
+dropd = curate.drop_nvl_cols(nvl_training) # NOTE move this method above extracted and this should solve the expand before extract problem
+# expand_nvlclass(class_training, dropd)
+# extracted_df = curate.extract_contents(expended_dropd) # only extract contents after running expand_nvlclass and returning expanded dropd
 
 def expand_nvlclass(class_training, dropd):
     '''
@@ -23,7 +24,10 @@ def expand_nvlclass(class_training, dropd):
     #interm_s =class_training.PictureURL.apply(lambda x: len(x))
     #expanded_class_training = class_training.loc[np.repeat(class_training.index.values, interm_s)].reset_index(drop=True)
     expanded_class_training = class_training.explode('PictureURL').reset_index(drop=True)
-    expanded_dropd = dropd.loc[np.repeat(dropd.index.values, interm_s)].reset_index(drop=True) # TODO CHANGE this to use explode(). picture list needs preservation
+    # expanded_class_training.PictureURL.apply(lambda x: 'c:/users/unknown/
+    expanded_dropd = dropd.explode('PictureURL').reset_indext(drop=True)
+    #expanded_dropd = dropd.loc[np.repeat(dropd.index.values, interm_s)].reset_index(drop=True) # TODO CHANGE this to use explode(). picture list needs preservation
     # prior to creating dropd and extracted. maybe run extraced_df after dropd or after running nvl_training
 
     #interm_s = interm_s.astype(str).applymap(lambda x: x.split(',')*4)
+    return expanded_class_training, expanded_dropd # TODO still need to replace source url to destination url in df cols and create custom dict {<source>, <distination>}
diff --git a/ebay_api.py b/ebay_api.py
index 54b007d..9b6fac5 100644
--- a/ebay_api.py
+++ b/ebay_api.py
@@ -5,7 +5,8 @@ import json
 import requests
 import pandas as pd
 import config as cfg
-import wget
+import wget # NOTE may not need this
+import shutil
 
 class FindingApi:
     '''Methods for accessing eBays FindingApi services'''
@@ -256,13 +257,31 @@ class CurateData:
         instances. Modifies both class training and dropd dfs. Appends custom
         image url dict {'source':'destination'}.
         '''
+        #interm_s =class_training.PictureURL.apply(lambda x: len(x))
+        #expanded_class_training = class_training.loc[np.repeat(class_training.index.values, interm_s)].reset_index(drop=True)
+        expanded_class_training = class_training.explode('PictureURL').reset_index(drop=True)
+        expanded_dropd = dropd.explode('PictureURL').reset_indext(drop=True)
+        #expanded_dropd = dropd.loc[np.repeat(dropd.index.values, interm_s)].reset_index(drop=True) # TODO CHANGE this to use explode(). picture list needs preservation
+        # prior to creating dropd and extracted. maybe run extraced_df after dropd or after running nvl_training
 
-        pass
+        #interm_s = interm_s.astype(str).applymap(lambda x: x.split(',')*4)
+        return expanded_class_training, expanded_dropd
 
-    def dl_pictures(self, expand=1):
+    def dl_pictures(self, dict_pic, expand=1):
         '''
         Downloads pictures from api to local storage using custom master dict
         '''
+
+        with open('dict_pic.txt', 'w+') as jf: # TODO requires cleaning up
+            dict_pics = json.load(jf)
+
+        r = requests.get('<dict_pic>', stream=True)
+        r.raw.decode_content = True
+        filename = '<your destination + naming schem.jpg>'
+        with open(filename, 'wb') as  f:
+            shutil.copyfileobj(r.raw, f)
+        # PictureURL in PictureURL list can't be downloaded....have to use indirect address in the form https://i.ebayimg.com/images/g/<unique code>/s-l<size>.jpg
+        # in place of https://i.ebayimg.com/00/s/ODQwWDE2MDA=/z/<unique code>/$_1.JPG or use requests methods instead of wget and original PictureURL? yes, use requests
         # TODO pipeline gameplan: 5 files: master img download dict,raw_json.txt, raw_json.csv, master_class_training.csv, master_nvl_training.csv
         # cont... open raw_json.txt and append, same with csv --> process new data --> pull out image source+dest and expand new dfs for the additional pictures
         # if not exists and append to master img download dict