From b205b44b5063733a7e04d6aea3a9fef20d7845a9 Mon Sep 17 00:00:00 2001
From: spbeach46 <spbeacg46@gmail.com>
Date: Sun, 4 Apr 2021 14:38:04 -0700
Subject: [PATCH] added function to create custom image url dictionary

---
 curate.py   |  6 ++++--
 ebay_api.py | 19 ++++++++++++++-----
 2 files changed, 18 insertions(+), 7 deletions(-)
diff --git a/curate.py b/curate.py
index ed31371..22c6db5 100644
--- a/curate.py
+++ b/curate.py
@@ -12,7 +12,9 @@ class_training = curate.class_training(training)
 nvl_training = curate.nvl_training(training)
 dropd = curate.drop_nvl_cols(nvl_training)
 
-# expand_nvlclass(class_training, dropd)
-# extracted_df = curate.extract_contents(expanded_dropd) # only extract contents after running expand_nvlclass and returning expanded dropd
+expanded_dfs = curate.expand_nvlclass(class_training, dropd)
+expanded_class = expanded_dfs[0]
+expanded_dropd = expanded_dfs[1]
 
 # TODO # need to replace expanded df's PictureURL col values with destination urls
+# TODO # still have the problem of duplicate listings. Possibly take care of this before you run curate
diff --git a/ebay_api.py b/ebay_api.py
index 0d6bddf..c576c29 100644
--- a/ebay_api.py
+++ b/ebay_api.py
@@ -7,6 +7,7 @@ import pandas as pd
 import config as cfg
 import wget # NOTE may not need this
 import shutil
+import re
 
 class FindingApi:
     '''Methods for accessing eBays FindingApi services'''
@@ -204,11 +205,11 @@ class CurateData:
 
         return nvl_training
 
-    def extract_contents(self, df):
+    def extract_df(self, df):
         '''
         converts single-value lists of strings of any df to string if not null
         '''
-        extracted_df = df.applymap(lambda x: ' '.join(x) if isinstance(x, list) else np.nan if pd.isull(x) else x)
+        extracted_df = df.applymap(lambda x: ' '.join(x) if isinstance(x, list) else np.nan if pd.isnull(x) else x)
 
         return extracted_df
 
@@ -257,9 +258,10 @@ class CurateData:
     def make_dict_pics(self, expanded_class_training):
         with open('dict_pic.txt', 'w+') as jf: # TODO requires cleaning up
             dict_pics = json.load(jf)
-#        dict_pics.extend('<
+        dict_pics.extend('<')
 
-    def expand_nvlclass(class_training, dropd):
+
+    def expand_nvlclass(self, class_training, dropd):
         '''
         takes image url list from each cell and expands them into separate/duplicate
         instances. Modifies both class training and dropd dfs. Appends custom
@@ -268,9 +270,16 @@ class CurateData:
         expanded_class_training = class_training.explode('PictureURL').reset_index(drop=True)
         # expanded_class_training.PictureURL.apply(lambda x: 'c:/users/unknown/
         expanded_dropd = dropd.explode('PictureURL').reset_index(drop=True)
+        expanded_dropd = self.extract_df(expanded_dropd) # convert lists to values
         # expanded_dropd.PictureURL.apply(lambda x: 'c:/users/unknown/
 
-        return expanded_class_training, expanded_dropd, dict_pics# TODO still need to replace source url to destination url in df cols and create custom dict {<source>, <distination>}
+        # curate.make_dict_pics(expanded_class_training) # custom image url dict
+        dict_pics_list = set(expanded_class_training.PictureURL.to_list()) # prolly need to create set long before df... immediately after Shopping or trading call
+        dict_pics = {k:destination+re.search(r'\w+(?=/\$_)', a).group()+'.jpg' for k in dict_pics_list} # TODO determine how to implement destination variable
+
+        # re.search(r'\w+(?=/\$_)', a).group()
+
+        return expanded_class_training, expanded_dropd # TODO still need to replace source url to destination url in df cols and create custom dict {<source>, <distination>}
 
     def dl_pictures(self):
         '''