ebay-ml-lister/curate.py

import ebay_api
import numpy as np

'''
file used to compile methods from ebay_api.py for curating training data
'''

curate = ebay_api.CurateData()
raw_data = curate.import_raw()
training = curate.to_training(raw_data) # NOTE have to reference PictureURL list here if you want to expand. Other column is string in subsequent dfs
# or use dropd.PictureURL.split(' ')
class_training = curate.class_training(training)
nvl_training = curate.nvl_training(training)
dropd = curate.drop_nvl_cols(nvl_training) # NOTE move this method above extracted and this should solve the expand before extract problem
# expand_nvlclass(class_training, dropd)
# extracted_df = curate.extract_contents(expended_dropd) # only extract contents after running expand_nvlclass and returning expanded dropd

def expand_nvlclass(class_training, dropd):
    '''
    takes image url list from each cell and expands them into separate/duplicate
    instances. Modifies both class training and dropd dfs. Appends custom
    image url dict {'source':'destination'}.
    '''
    #interm_s =class_training.PictureURL.apply(lambda x: len(x))
    #expanded_class_training = class_training.loc[np.repeat(class_training.index.values, interm_s)].reset_index(drop=True)
    expanded_class_training = class_training.explode('PictureURL').reset_index(drop=True)
    # expanded_class_training.PictureURL.apply(lambda x: 'c:/users/unknown/
    expanded_dropd = dropd.explode('PictureURL').reset_indext(drop=True)
    #expanded_dropd = dropd.loc[np.repeat(dropd.index.values, interm_s)].reset_index(drop=True) # TODO CHANGE this to use explode(). picture list needs preservation
    # prior to creating dropd and extracted. maybe run extraced_df after dropd or after running nvl_training

    #interm_s = interm_s.astype(str).applymap(lambda x: x.split(',')*4)
    return expanded_class_training, expanded_dropd # TODO still need to replace source url to destination url in df cols and create custom dict {<source>, <distination>}
Created curate file for running curate methods from ebay_api.py 2021-04-02 18:08:56 +00:00			`import ebay_api`
			`import numpy as np`

			`'''`
			`file used to compile methods from ebay_api.py for curating training data`
			`'''`

			`curate = ebay_api.CurateData()`
			`raw_data = curate.import_raw()`
			`training = curate.to_training(raw_data) # NOTE have to reference PictureURL list here if you want to expand. Other column is string in subsequent dfs`
			`# or use dropd.PictureURL.split(' ')`
			`class_training = curate.class_training(training)`
			`nvl_training = curate.nvl_training(training)`
added download fix 2021-04-03 06:42:31 +00:00			`dropd = curate.drop_nvl_cols(nvl_training) # NOTE move this method above extracted and this should solve the expand before extract problem`
			`# expand_nvlclass(class_training, dropd)`
			`# extracted_df = curate.extract_contents(expended_dropd) # only extract contents after running expand_nvlclass and returning expanded dropd`
Created curate file for running curate methods from ebay_api.py 2021-04-02 18:08:56 +00:00
			`def expand_nvlclass(class_training, dropd):`
			`'''`
			`takes image url list from each cell and expands them into separate/duplicate`
			`instances. Modifies both class training and dropd dfs. Appends custom`
			`image url dict {'source':'destination'}.`
			`'''`
			`#interm_s =class_training.PictureURL.apply(lambda x: len(x))`
			`#expanded_class_training = class_training.loc[np.repeat(class_training.index.values, interm_s)].reset_index(drop=True)`
			`expanded_class_training = class_training.explode('PictureURL').reset_index(drop=True)`
added download fix 2021-04-03 06:42:31 +00:00			`# expanded_class_training.PictureURL.apply(lambda x: 'c:/users/unknown/`
			`expanded_dropd = dropd.explode('PictureURL').reset_indext(drop=True)`
			`#expanded_dropd = dropd.loc[np.repeat(dropd.index.values, interm_s)].reset_index(drop=True) # TODO CHANGE this to use explode(). picture list needs preservation`
Created curate file for running curate methods from ebay_api.py 2021-04-02 18:08:56 +00:00			`# prior to creating dropd and extracted. maybe run extraced_df after dropd or after running nvl_training`

			`#interm_s = interm_s.astype(str).applymap(lambda x: x.split(',')*4)`
added download fix 2021-04-03 06:42:31 +00:00			`return expanded_class_training, expanded_dropd # TODO still need to replace source url to destination url in df cols and create custom dict {<source>, <distination>}`