ebay-ml-lister/Shoe Classifier_VGG16.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "572dc7fb",
   "metadata": {},
   "outputs": [],
   "source": [
    "from matplotlib import pyplot as plt\n",
    "from matplotlib.image import imread\n",
    "import pandas as pd\n",
    "from collections import Counter\n",
    "import json\n",
    "import os\n",
    "import re\n",
    "import tempfile\n",
    "import numpy as np\n",
    "from os.path import exists\n",
    "from imblearn.under_sampling import RandomUnderSampler\n",
    "from PIL import ImageFile\n",
    "import sklearn as sk\n",
    "from sklearn.model_selection import train_test_split, StratifiedShuffleSplit\n",
    "import tensorflow as tf\n",
    "import tensorflow.keras\n",
    "from tensorflow.keras.preprocessing.image import ImageDataGenerator\n",
    "from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten, Activation\n",
    "from tensorflow.keras.models import Sequential\n",
    "from tensorflow.keras.optimizers import Adam\n",
    "# custom modules\n",
    "import image_faults\n",
    "\n",
    "ImageFile.LOAD_TRUNCATED_IMAGES = True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_regularization(model, regularizer=tf.keras.regularizers.l2(0.0001)):\n",
    "\n",
    "    if not isinstance(regularizer, tf.keras.regularizers.Regularizer):\n",
    "      print(\"Regularizer must be a subclass of tf.keras.regularizers.Regularizer\")\n",
    "      return model\n",
    "\n",
    "    for layer in model.layers:\n",
    "        for attr in ['kernel_regularizer']:\n",
    "            if hasattr(layer, attr):\n",
    "              setattr(layer, attr, regularizer)\n",
    "\n",
    "    # When we change the layers attributes, the change only happens in the model config file\n",
    "    model_json = model.to_json()\n",
    "\n",
    "    # Save the weights before reloading the model.\n",
    "    tmp_weights_path = os.path.join(tempfile.gettempdir(), 'tmp_weights.h5')\n",
    "    model.save_weights(tmp_weights_path)\n",
    "\n",
    "    # load the model from the config\n",
    "    model = tf.keras.models.model_from_json(model_json)\n",
    "    \n",
    "    # Reload the model weights\n",
    "    model.load_weights(tmp_weights_path, by_name=True)\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "a5c72863",
   "metadata": {},
   "outputs": [],
   "source": [
    "# image_faults.faulty_images() # removes faulty images\n",
    "df = pd.read_csv('expanded_class.csv', index_col=[0], low_memory=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "1057a442",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "def dict_pics_jup():\n",
    "    '''\n",
    "    {source:target} dict used to replace source urls with image location as input\n",
    "    '''\n",
    "    target_dir = os.getcwd() + os.sep + \"training_images\"\n",
    "    with open('temp_pics_source_list.txt') as f:\n",
    "        temp_pics_source_list = json.load(f)\n",
    "        \n",
    "    dict_pics = {}\n",
    "    for k in temp_pics_source_list:\n",
    "         patt_1 = re.search(r'[^/]+(?=/\\$_|.(\\.jpg|\\.jpeg|\\.png))', k, re.IGNORECASE)\n",
    "         patt_2 = re.search(r'(\\.jpg|\\.jpeg|\\.png)', k, re.IGNORECASE)\n",
    "         if patt_1 and patt_2 is not None:\n",
    "             tag = patt_1.group() + patt_2.group().lower()\n",
    "             file_name = target_dir + os.sep + tag\n",
    "             dict_pics.update({k:file_name})\n",
    "    print(\"{source:target} dictionary created @ \" + target_dir)\n",
    "    return dict_pics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "7a6146e6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{source:target} dictionary created @ /tf/training_images\n"
     ]
    }
   ],
   "source": [
    "dict_pics = dict_pics_jup()\n",
    "\n",
    "with open('women_cat_list.txt') as f:\n",
    "    women_cats = json.load(f)\n",
    "with open('men_cat_list.txt') as f:\n",
    "    men_cats = json.load(f)\n",
    "    \n",
    "with open('temp_pics_source_list.txt') as f:\n",
    "    tempics = json.load(f)\n",
    "# list of image urls that did not get named properly which will be removed from the dataframe\n",
    "drop_row_vals = []\n",
    "for pic in tempics:\n",
    "    try:\n",
    "        dict_pics[pic]\n",
    "    except KeyError:\n",
    "        drop_row_vals.append(pic)\n",
    "\n",
    "df['PrimaryCategoryID'] = df['PrimaryCategoryID'].astype(str) # pandas thinks ids are ints\n",
    "ddf = df[df.PictureURL.isin(drop_row_vals)==False] # remove improperly named image files\n",
    "df = ddf[ddf.PrimaryCategoryID.isin(men_cats)==False] # removes rows of womens categories\n",
    "\n",
    "blah = pd.Series(df.PictureURL)\n",
    "df = df.drop(labels=['PictureURL'], axis=1)\n",
    "\n",
    "blah = blah.apply(lambda x: dict_pics[x])\n",
    "df = pd.concat([blah, df],axis=1)\n",
    "df = df.groupby('PrimaryCategoryID').filter(lambda x: len(x)>25) # removes cat outliers\n",
    "\n",
    "df=df.sample(frac=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Counter({'11632': 6505, '45333': 6505, '53548': 6505, '53557': 6505, '55793': 6505, '62107': 6505, '95672': 6505})\n"
     ]
    }
   ],
   "source": [
    "undersample = RandomUnderSampler(sampling_strategy='auto')\n",
    "train, y_under = undersample.fit_resample(df, df['PrimaryCategoryID'])\n",
    "print(Counter(train['PrimaryCategoryID']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "506aa5cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "train, test = train_test_split(train, test_size=0.2, random_state=42)\n",
    "# stratify=train['PrimaryCategoryID']\n",
    "# train['PrimaryCategoryID'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "4d72eb90",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Found 29143 validated image filenames belonging to 7 classes.\n",
      "Found 7285 validated image filenames belonging to 7 classes.\n"
     ]
    }
   ],
   "source": [
    "datagen = ImageDataGenerator(rescale=1./255., \n",
    "                             validation_split=.2,\n",
    "                             #samplewise_std_normalization=True,\n",
    "                             #horizontal_flip= True,\n",
    "                             #vertical_flip= True,\n",
    "                             #width_shift_range= 0.2,\n",
    "                             #height_shift_range= 0.2,\n",
    "                             #rotation_range= 90,\n",
    "                             preprocessing_function=tf.keras.applications.vgg16.preprocess_input)\n",
    "train_generator=datagen.flow_from_dataframe(\n",
    "    dataframe=train[:len(train)],\n",
    "    directory='./training_images',\n",
    "    x_col='PictureURL',\n",
    "    y_col='PrimaryCategoryID',\n",
    "    batch_size=32,\n",
    "    seed=42,\n",
    "    shuffle=True,\n",
    "    target_size=(244,244),\n",
    "    subset='training'\n",
    "    )\n",
    "validation_generator=datagen.flow_from_dataframe(\n",
    "    dataframe=train[:len(train)], # is using train right?\n",
    "    directory='./training_images',\n",
    "    x_col='PictureURL',\n",
    "    y_col='PrimaryCategoryID',\n",
    "    batch_size=32,\n",
    "    seed=42,\n",
    "    shuffle=True,\n",
    "    target_size=(244,244),\n",
    "    subset='validation'\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "7b70f37f",
   "metadata": {},
   "outputs": [],
   "source": [
    "imgs, labels = next(train_generator)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "1ed54bf5",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plotImages(images_arr):\n",
    "    fig, axes = plt.subplots(1, 10, figsize=(20,20))\n",
    "    axes = axes.flatten()\n",
    "    for img, ax in zip( images_arr, axes):\n",
    "        ax.imshow(img)\n",
    "        ax.axis('off')\n",
    "    plt.tight_layout()\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "85934565",
   "metadata": {},
   "outputs": [],
   "source": [
    "#plotImages(imgs)\n",
    "#print(labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "6322bcad",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n"
     ]
    }
   ],
   "source": [
    "physical_devices = tf.config.list_physical_devices('GPU')\n",
    "print(len(physical_devices))\n",
    "tf.config.experimental.set_memory_growth(physical_devices[0], True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "b31af79e",
   "metadata": {},
   "outputs": [],
   "source": [
    "base_model = tf.keras.applications.vgg16.VGG16(weights='imagenet')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "fe06f2bf",
   "metadata": {},
   "outputs": [],
   "source": [
    "#model = Sequential()\n",
    "#for layer in base_model.layers[:-1]:\n",
    "#    model.add(layer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "7d3cc82c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# loop through layers, add Dropout after layers 'fc1' and 'fc2'\n",
    "updated_model = Sequential()\n",
    "for layer in base_model.layers[:-1]:\n",
    "    updated_model.add(layer)\n",
    "    if layer.name in ['fc1', 'fc2']:\n",
    "        updated_model.add(Dropout(.50))\n",
    "\n",
    "model = updated_model\n",
    "\n",
    "for layer in model.layers:\n",
    "    layer.trainable = True\n",
    "\n",
    "model.add(Dense(units=7, activation='softmax'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "c774d787",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model: \"sequential\"\n",
      "_________________________________________________________________\n",
      " Layer (type)                Output Shape              Param #   \n",
      "=================================================================\n",
      " block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      \n",
      "                                                                 \n",
      " block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     \n",
      "                                                                 \n",
      " block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         \n",
      "                                                                 \n",
      " block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     \n",
      "                                                                 \n",
      " block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    \n",
      "                                                                 \n",
      " block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0         \n",
      "                                                                 \n",
      " block3_conv1 (Conv2D)       (None, 56, 56, 256)       295168    \n",
      "                                                                 \n",
      " block3_conv2 (Conv2D)       (None, 56, 56, 256)       590080    \n",
      "                                                                 \n",
      " block3_conv3 (Conv2D)       (None, 56, 56, 256)       590080    \n",
      "                                                                 \n",
      " block3_pool (MaxPooling2D)  (None, 28, 28, 256)       0         \n",
      "                                                                 \n",
      " block4_conv1 (Conv2D)       (None, 28, 28, 512)       1180160   \n",
      "                                                                 \n",
      " block4_conv2 (Conv2D)       (None, 28, 28, 512)       2359808   \n",
      "                                                                 \n",
      " block4_conv3 (Conv2D)       (None, 28, 28, 512)       2359808   \n",
      "                                                                 \n",
      " block4_pool (MaxPooling2D)  (None, 14, 14, 512)       0         \n",
      "                                                                 \n",
      " block5_conv1 (Conv2D)       (None, 14, 14, 512)       2359808   \n",
      "                                                                 \n",
      " block5_conv2 (Conv2D)       (None, 14, 14, 512)       2359808   \n",
      "                                                                 \n",
      " block5_conv3 (Conv2D)       (None, 14, 14, 512)       2359808   \n",
      "                                                                 \n",
      " block5_pool (MaxPooling2D)  (None, 7, 7, 512)         0         \n",
      "                                                                 \n",
      " flatten (Flatten)           (None, 25088)             0         \n",
      "                                                                 \n",
      " fc1 (Dense)                 (None, 4096)              102764544 \n",
      "                                                                 \n",
      " dropout (Dropout)           (None, 4096)              0         \n",
      "                                                                 \n",
      " fc2 (Dense)                 (None, 4096)              16781312  \n",
      "                                                                 \n",
      " dropout_1 (Dropout)         (None, 4096)              0         \n",
      "                                                                 \n",
      " dense (Dense)               (None, 7)                 28679     \n",
      "                                                                 \n",
      "=================================================================\n",
      "Total params: 134,289,223\n",
      "Trainable params: 134,289,223\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "#model = add_regularization(model)\n",
    "model.summary()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "fd5d1246",
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(optimizer=Adam(learning_rate=.0001), loss='categorical_crossentropy',\n",
    "              metrics=['accuracy'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9cd2ba27",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/30\n",
      "911/911 [==============================] - 329s 356ms/step - loss: 1.8477 - accuracy: 0.2577 - val_loss: 1.6306 - val_accuracy: 0.3669\n",
      "Epoch 2/30\n",
      "911/911 [==============================] - 322s 353ms/step - loss: 1.4882 - accuracy: 0.4353 - val_loss: 1.4317 - val_accuracy: 0.4784\n",
      "Epoch 3/30\n",
      "911/911 [==============================] - 323s 354ms/step - loss: 1.3046 - accuracy: 0.5158 - val_loss: 1.2747 - val_accuracy: 0.5235\n",
      "Epoch 4/30\n",
      "911/911 [==============================] - 319s 350ms/step - loss: 1.1691 - accuracy: 0.5681 - val_loss: 1.2090 - val_accuracy: 0.5529\n",
      "Epoch 5/30\n",
      "911/911 [==============================] - 317s 348ms/step - loss: 1.0389 - accuracy: 0.6185 - val_loss: 1.1774 - val_accuracy: 0.5682\n",
      "Epoch 6/30\n",
      "911/911 [==============================] - 317s 348ms/step - loss: 0.9125 - accuracy: 0.6656 - val_loss: 1.2237 - val_accuracy: 0.5639\n",
      "Epoch 7/30\n",
      "147/911 [===>..........................] - ETA: 3:39 - loss: 0.7312 - accuracy: 0.7256"
     ]
    }
   ],
   "source": [
    "model.fit(x=train_generator,\n",
    "          steps_per_epoch=len(train_generator),\n",
    "          validation_data=validation_generator,\n",
    "          validation_steps=len(validation_generator),\n",
    "          epochs=30,\n",
    "          verbose=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "63f791af",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`{`
			`"cells": [`
			`{`
			`"cell_type": "code",`
			`"execution_count": 1,`
			`"id": "572dc7fb",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"from matplotlib import pyplot as plt\n",`
			`"from matplotlib.image import imread\n",`
			`"import pandas as pd\n",`
			`"from collections import Counter\n",`
			`"import json\n",`
			`"import os\n",`
			`"import re\n",`
			`"import tempfile\n",`
			`"import numpy as np\n",`
			`"from os.path import exists\n",`
			`"from imblearn.under_sampling import RandomUnderSampler\n",`
			`"from PIL import ImageFile\n",`
			`"import sklearn as sk\n",`
			`"from sklearn.model_selection import train_test_split, StratifiedShuffleSplit\n",`
			`"import tensorflow as tf\n",`
			`"import tensorflow.keras\n",`
			`"from tensorflow.keras.preprocessing.image import ImageDataGenerator\n",`
			`"from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten, Activation\n",`
			`"from tensorflow.keras.models import Sequential\n",`
			`"from tensorflow.keras.optimizers import Adam\n",`
			`"# custom modules\n",`
			`"import image_faults\n",`
			`"\n",`
			`"ImageFile.LOAD_TRUNCATED_IMAGES = True"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 2,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"def add_regularization(model, regularizer=tf.keras.regularizers.l2(0.0001)):\n",`
			`"\n",`
			`" if not isinstance(regularizer, tf.keras.regularizers.Regularizer):\n",`
			`" print(\"Regularizer must be a subclass of tf.keras.regularizers.Regularizer\")\n",`
			`" return model\n",`
			`"\n",`
			`" for layer in model.layers:\n",`
			`" for attr in ['kernel_regularizer']:\n",`
			`" if hasattr(layer, attr):\n",`
			`" setattr(layer, attr, regularizer)\n",`
			`"\n",`
			`" # When we change the layers attributes, the change only happens in the model config file\n",`
			`" model_json = model.to_json()\n",`
			`"\n",`
			`" # Save the weights before reloading the model.\n",`
			`" tmp_weights_path = os.path.join(tempfile.gettempdir(), 'tmp_weights.h5')\n",`
			`" model.save_weights(tmp_weights_path)\n",`
			`"\n",`
			`" # load the model from the config\n",`
			`" model = tf.keras.models.model_from_json(model_json)\n",`
			`" \n",`
			`" # Reload the model weights\n",`
			`" model.load_weights(tmp_weights_path, by_name=True)\n",`
			`" return model"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 3,`
			`"id": "a5c72863",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"# image_faults.faulty_images() # removes faulty images\n",`
			`"df = pd.read_csv('expanded_class.csv', index_col=[0], low_memory=False)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 4,`
			`"id": "1057a442",`
			`"metadata": {`
			`"scrolled": true`
			`},`
included option for variable range of images per listing 2022-01-15 02:53:59 +00:00			`"outputs": [],`
			`"source": [`
			`"def dict_pics_jup():\n",`
			`" '''\n",`
			`" {source:target} dict used to replace source urls with image location as input\n",`
			`" '''\n",`
			`" target_dir = os.getcwd() + os.sep + \"training_images\"\n",`
			`" with open('temp_pics_source_list.txt') as f:\n",`
			`" temp_pics_source_list = json.load(f)\n",`
			`" \n",`
			`" dict_pics = {}\n",`
			`" for k in temp_pics_source_list:\n",`
			`" patt_1 = re.search(r'[^/]+(?=/\\$_\|.(\\.jpg\|\\.jpeg\|\\.png))', k, re.IGNORECASE)\n",`
			`" patt_2 = re.search(r'(\\.jpg\|\\.jpeg\|\\.png)', k, re.IGNORECASE)\n",`
			`" if patt_1 and patt_2 is not None:\n",`
			`" tag = patt_1.group() + patt_2.group().lower()\n",`
			`" file_name = target_dir + os.sep + tag\n",`
			`" dict_pics.update({k:file_name})\n",`
			`" print(\"{source:target} dictionary created @ \" + target_dir)\n",`
			`" return dict_pics"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 5,`
			`"id": "7a6146e6",`
			`"metadata": {},`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"{source:target} dictionary created @ /tf/training_images\n"`
			`]`
			`}`
			`],`
			`"source": [`
included option for variable range of images per listing 2022-01-15 02:53:59 +00:00			`"dict_pics = dict_pics_jup()\n",`
			`"\n",`
			`"with open('women_cat_list.txt') as f:\n",`
			`" women_cats = json.load(f)\n",`
			`"with open('men_cat_list.txt') as f:\n",`
			`" men_cats = json.load(f)\n",`
			`" \n",`
			`"with open('temp_pics_source_list.txt') as f:\n",`
			`" tempics = json.load(f)\n",`
			`"# list of image urls that did not get named properly which will be removed from the dataframe\n",`
			`"drop_row_vals = []\n",`
			`"for pic in tempics:\n",`
			`" try:\n",`
			`" dict_pics[pic]\n",`
			`" except KeyError:\n",`
			`" drop_row_vals.append(pic)\n",`
			`"\n",`
			`"df['PrimaryCategoryID'] = df['PrimaryCategoryID'].astype(str) # pandas thinks ids are ints\n",`
			`"ddf = df[df.PictureURL.isin(drop_row_vals)==False] # remove improperly named image files\n",`
			`"df = ddf[ddf.PrimaryCategoryID.isin(men_cats)==False] # removes rows of womens categories\n",`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"\n",`
			`"blah = pd.Series(df.PictureURL)\n",`
			`"df = df.drop(labels=['PictureURL'], axis=1)\n",`
included option for variable range of images per listing 2022-01-15 02:53:59 +00:00			`"\n",`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"blah = blah.apply(lambda x: dict_pics[x])\n",`
			`"df = pd.concat([blah, df],axis=1)\n",`
			`"df = df.groupby('PrimaryCategoryID').filter(lambda x: len(x)>25) # removes cat outliers\n",`
			`"\n",`
			`"df=df.sample(frac=1)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 6,`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
included option for variable range of images per listing 2022-01-15 02:53:59 +00:00			`"Counter({'11632': 6505, '45333': 6505, '53548': 6505, '53557': 6505, '55793': 6505, '62107': 6505, '95672': 6505})\n"`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`]`
			`}`
			`],`
			`"source": [`
			`"undersample = RandomUnderSampler(sampling_strategy='auto')\n",`
			`"train, y_under = undersample.fit_resample(df, df['PrimaryCategoryID'])\n",`
			`"print(Counter(train['PrimaryCategoryID']))"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 7,`
			`"id": "506aa5cf",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"train, test = train_test_split(train, test_size=0.2, random_state=42)\n",`
			`"# stratify=train['PrimaryCategoryID']\n",`
			`"# train['PrimaryCategoryID'].value_counts()"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 8,`
			`"id": "4d72eb90",`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
included option for variable range of images per listing 2022-01-15 02:53:59 +00:00			`"Found 29143 validated image filenames belonging to 7 classes.\n",`
			`"Found 7285 validated image filenames belonging to 7 classes.\n"`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`]`
			`}`
			`],`
			`"source": [`
			`"datagen = ImageDataGenerator(rescale=1./255., \n",`
			`" validation_split=.2,\n",`
			`" #samplewise_std_normalization=True,\n",`
			`" #horizontal_flip= True,\n",`
			`" #vertical_flip= True,\n",`
			`" #width_shift_range= 0.2,\n",`
			`" #height_shift_range= 0.2,\n",`
			`" #rotation_range= 90,\n",`
			`" preprocessing_function=tf.keras.applications.vgg16.preprocess_input)\n",`
			`"train_generator=datagen.flow_from_dataframe(\n",`
			`" dataframe=train[:len(train)],\n",`
			`" directory='./training_images',\n",`
			`" x_col='PictureURL',\n",`
			`" y_col='PrimaryCategoryID',\n",`
			`" batch_size=32,\n",`
			`" seed=42,\n",`
			`" shuffle=True,\n",`
			`" target_size=(244,244),\n",`
			`" subset='training'\n",`
			`" )\n",`
			`"validation_generator=datagen.flow_from_dataframe(\n",`
			`" dataframe=train[:len(train)], # is using train right?\n",`
			`" directory='./training_images',\n",`
			`" x_col='PictureURL',\n",`
			`" y_col='PrimaryCategoryID',\n",`
			`" batch_size=32,\n",`
			`" seed=42,\n",`
			`" shuffle=True,\n",`
			`" target_size=(244,244),\n",`
			`" subset='validation'\n",`
			`" )"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 9,`
			`"id": "7b70f37f",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"imgs, labels = next(train_generator)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 10,`
			`"id": "1ed54bf5",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"def plotImages(images_arr):\n",`
			`" fig, axes = plt.subplots(1, 10, figsize=(20,20))\n",`
			`" axes = axes.flatten()\n",`
			`" for img, ax in zip( images_arr, axes):\n",`
			`" ax.imshow(img)\n",`
			`" ax.axis('off')\n",`
			`" plt.tight_layout()\n",`
			`" plt.show()"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 11,`
			`"id": "85934565",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"#plotImages(imgs)\n",`
			`"#print(labels)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 12,`
			`"id": "6322bcad",`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"1\n"`
			`]`
			`}`
			`],`
			`"source": [`
			`"physical_devices = tf.config.list_physical_devices('GPU')\n",`
			`"print(len(physical_devices))\n",`
			`"tf.config.experimental.set_memory_growth(physical_devices[0], True)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 13,`
			`"id": "b31af79e",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"base_model = tf.keras.applications.vgg16.VGG16(weights='imagenet')"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 14,`
			`"id": "fe06f2bf",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"#model = Sequential()\n",`
			`"#for layer in base_model.layers[:-1]:\n",`
			`"# model.add(layer)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 15,`
			`"id": "7d3cc82c",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"# loop through layers, add Dropout after layers 'fc1' and 'fc2'\n",`
			`"updated_model = Sequential()\n",`
			`"for layer in base_model.layers[:-1]:\n",`
			`" updated_model.add(layer)\n",`
			`" if layer.name in ['fc1', 'fc2']:\n",`
included option for variable range of images per listing 2022-01-15 02:53:59 +00:00			`" updated_model.add(Dropout(.50))\n",`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"\n",`
			`"model = updated_model\n",`
			`"\n",`
			`"for layer in model.layers:\n",`
			`" layer.trainable = True\n",`
			`"\n",`
included option for variable range of images per listing 2022-01-15 02:53:59 +00:00			`"model.add(Dense(units=7, activation='softmax'))"`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 16,`
			`"id": "c774d787",`
			`"metadata": {`
			`"scrolled": true`
			`},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"Model: \"sequential\"\n",`
			`"_________________________________________________________________\n",`
			`" Layer (type) Output Shape Param # \n",`
			`"=================================================================\n",`
			`" block1_conv1 (Conv2D) (None, 224, 224, 64) 1792 \n",`
			`" \n",`
			`" block1_conv2 (Conv2D) (None, 224, 224, 64) 36928 \n",`
			`" \n",`
			`" block1_pool (MaxPooling2D) (None, 112, 112, 64) 0 \n",`
			`" \n",`
			`" block2_conv1 (Conv2D) (None, 112, 112, 128) 73856 \n",`
			`" \n",`
			`" block2_conv2 (Conv2D) (None, 112, 112, 128) 147584 \n",`
			`" \n",`
			`" block2_pool (MaxPooling2D) (None, 56, 56, 128) 0 \n",`
			`" \n",`
			`" block3_conv1 (Conv2D) (None, 56, 56, 256) 295168 \n",`
			`" \n",`
			`" block3_conv2 (Conv2D) (None, 56, 56, 256) 590080 \n",`
			`" \n",`
			`" block3_conv3 (Conv2D) (None, 56, 56, 256) 590080 \n",`
			`" \n",`
			`" block3_pool (MaxPooling2D) (None, 28, 28, 256) 0 \n",`
			`" \n",`
			`" block4_conv1 (Conv2D) (None, 28, 28, 512) 1180160 \n",`
			`" \n",`
			`" block4_conv2 (Conv2D) (None, 28, 28, 512) 2359808 \n",`
			`" \n",`
			`" block4_conv3 (Conv2D) (None, 28, 28, 512) 2359808 \n",`
			`" \n",`
			`" block4_pool (MaxPooling2D) (None, 14, 14, 512) 0 \n",`
			`" \n",`
			`" block5_conv1 (Conv2D) (None, 14, 14, 512) 2359808 \n",`
			`" \n",`
			`" block5_conv2 (Conv2D) (None, 14, 14, 512) 2359808 \n",`
			`" \n",`
			`" block5_conv3 (Conv2D) (None, 14, 14, 512) 2359808 \n",`
			`" \n",`
			`" block5_pool (MaxPooling2D) (None, 7, 7, 512) 0 \n",`
			`" \n",`
			`" flatten (Flatten) (None, 25088) 0 \n",`
			`" \n",`
			`" fc1 (Dense) (None, 4096) 102764544 \n",`
			`" \n",`
			`" dropout (Dropout) (None, 4096) 0 \n",`
			`" \n",`
			`" fc2 (Dense) (None, 4096) 16781312 \n",`
			`" \n",`
			`" dropout_1 (Dropout) (None, 4096) 0 \n",`
			`" \n",`
included option for variable range of images per listing 2022-01-15 02:53:59 +00:00			`" dense (Dense) (None, 7) 28679 \n",`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`" \n",`
			`"=================================================================\n",`
included option for variable range of images per listing 2022-01-15 02:53:59 +00:00			`"Total params: 134,289,223\n",`
			`"Trainable params: 134,289,223\n",`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"Non-trainable params: 0\n",`
			`"_________________________________________________________________\n"`
			`]`
			`}`
			`],`
			`"source": [`
			`"#model = add_regularization(model)\n",`
			`"model.summary()\n"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 17,`
			`"id": "fd5d1246",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"model.compile(optimizer=Adam(learning_rate=.0001), loss='categorical_crossentropy',\n",`
			`" metrics=['accuracy'])"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
included option for variable range of images per listing 2022-01-15 02:53:59 +00:00			`"execution_count": null,`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"id": "9cd2ba27",`
			`"metadata": {`
			`"scrolled": false`
			`},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"Epoch 1/30\n",`
included option for variable range of images per listing 2022-01-15 02:53:59 +00:00			`"911/911 [==============================] - 329s 356ms/step - loss: 1.8477 - accuracy: 0.2577 - val_loss: 1.6306 - val_accuracy: 0.3669\n",`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"Epoch 2/30\n",`
included option for variable range of images per listing 2022-01-15 02:53:59 +00:00			`"911/911 [==============================] - 322s 353ms/step - loss: 1.4882 - accuracy: 0.4353 - val_loss: 1.4317 - val_accuracy: 0.4784\n",`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"Epoch 3/30\n",`
included option for variable range of images per listing 2022-01-15 02:53:59 +00:00			`"911/911 [==============================] - 323s 354ms/step - loss: 1.3046 - accuracy: 0.5158 - val_loss: 1.2747 - val_accuracy: 0.5235\n",`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"Epoch 4/30\n",`
included option for variable range of images per listing 2022-01-15 02:53:59 +00:00			`"911/911 [==============================] - 319s 350ms/step - loss: 1.1691 - accuracy: 0.5681 - val_loss: 1.2090 - val_accuracy: 0.5529\n",`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"Epoch 5/30\n",`
included option for variable range of images per listing 2022-01-15 02:53:59 +00:00			`"911/911 [==============================] - 317s 348ms/step - loss: 1.0389 - accuracy: 0.6185 - val_loss: 1.1774 - val_accuracy: 0.5682\n",`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"Epoch 6/30\n",`
included option for variable range of images per listing 2022-01-15 02:53:59 +00:00			`"911/911 [==============================] - 317s 348ms/step - loss: 0.9125 - accuracy: 0.6656 - val_loss: 1.2237 - val_accuracy: 0.5639\n",`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"Epoch 7/30\n",`
included option for variable range of images per listing 2022-01-15 02:53:59 +00:00			`"147/911 [===>..........................] - ETA: 3:39 - loss: 0.7312 - accuracy: 0.7256"`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`]`
			`}`
			`],`
			`"source": [`
			`"model.fit(x=train_generator,\n",`
			`" steps_per_epoch=len(train_generator),\n",`
			`" validation_data=validation_generator,\n",`
			`" validation_steps=len(validation_generator),\n",`
			`" epochs=30,\n",`
			`" verbose=1)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"id": "63f791af",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": []`
			`}`
			`],`
			`"metadata": {`
			`"kernelspec": {`
			`"display_name": "Python 3",`
			`"language": "python",`
			`"name": "python3"`
			`},`
			`"language_info": {`
			`"codemirror_mode": {`
			`"name": "ipython",`
			`"version": 3`
			`},`
			`"file_extension": ".py",`
			`"mimetype": "text/x-python",`
			`"name": "python",`
			`"nbconvert_exporter": "python",`
			`"pygments_lexer": "ipython3",`
			`"version": "3.8.10"`
			`}`
			`},`
			`"nbformat": 4,`
			`"nbformat_minor": 5`
			`}`