adding functions to curateData class

This commit is contained in:
spbeach46 2020-11-07 10:39:49 -07:00
parent cd21c98e54
commit 5965f19d2a

View File

@ -87,22 +87,45 @@ class CurateData:
'''
Contains functions for curating data for machine learning training sets
'''
def extract_itemid(self, data):
for item in data['Item']:
item_id = ['ItemID']
def extract_prime_cat(self, data):
for item in data['Item']:
prime_cat = ['PrimaryCategory']
def extract_picture_url(self, data):
for item in data['Item']:
picture_url_list = ['PictureURL']
def extract_nvl(self, data):
for item in data['item']:
training = {}
names = []
values = []
nvl = item['itemspecifics']['namevaluelist']
for nvl_dict in nvl:
names.append(nvl_dict['name'])
values.append(nvl_dict['value'])
# todo also append itemid and value to the dictionary somewhere
nvl_dict = dict(zip(names, values))
def update_df(self, data):
'''
Extracts itemIds and name-value list , creates new dict and appends df
'''
for item in data:
for item in data['item']:
training = {}
names = []
values = []
nvl = data['Item'][0]['ItemSpecifics']['NameValueList']
nvl = item['itemspecifics']['namevaluelist']
for nvl_dict in nvl:
names.append(nvl_dict['Name'])
values.append(nvl_dict['Value'])
# TODO Also append itemId and value to the dictionary somewhere
names.append(nvl_dict['name'])
values.append(nvl_dict['value'])
# todo also append itemid and value to the dictionary somewhere
nvl_dict = dict(zip(names, values))
data.update(nvl_dict) # TODO this changes iterable so you get error
df = pd.json_normalize(data)
training.update(nvl_dict) # todo just creating a training variable will not include itemid, picture urls, or categories which you will still need for your df so you can either extract them and append them to training or you can solely modify the data variable but you will have to deal with runtime error.
# probably best to extract and making custom df
df = pd.json_normalize(training)
df.to_csv('training.csv', mode='a')
def main():