ebay-ml-lister/Shoe Classifier_VGG16.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "572dc7fb",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-08-01 23:57:09.348119: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1\n"
     ]
    }
   ],
   "source": [
    "from matplotlib import pyplot as plt\n",
    "from matplotlib.image import imread\n",
    "import pandas as pd\n",
    "from collections import Counter\n",
    "import json\n",
    "import os\n",
    "import re\n",
    "import tempfile\n",
    "import numpy as np\n",
    "from os.path import exists\n",
    "from imblearn.under_sampling import RandomUnderSampler\n",
    "from PIL import ImageFile\n",
    "import sklearn as sk\n",
    "from sklearn.model_selection import train_test_split, StratifiedShuffleSplit\n",
    "import tensorflow as tf\n",
    "import tensorflow.keras\n",
    "from tensorflow.keras.preprocessing.image import ImageDataGenerator\n",
    "from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten, Activation\n",
    "from tensorflow.keras.models import Sequential\n",
    "from tensorflow.keras.optimizers import Adam\n",
    "# custom modules\n",
    "import image_faults\n",
    "\n",
    "ImageFile.LOAD_TRUNCATED_IMAGES = True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "6ea418cc",
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_regularization(model, regularizer=tf.keras.regularizers.l2(0.0001)):\n",
    "\n",
    "    if not isinstance(regularizer, tf.keras.regularizers.Regularizer):\n",
    "      print(\"Regularizer must be a subclass of tf.keras.regularizers.Regularizer\")\n",
    "      return model\n",
    "\n",
    "    for layer in model.layers:\n",
    "        for attr in ['kernel_regularizer']:\n",
    "            if hasattr(layer, attr):\n",
    "              setattr(layer, attr, regularizer)\n",
    "\n",
    "    # When we change the layers attributes, the change only happens in the model config file\n",
    "    model_json = model.to_json()\n",
    "\n",
    "    # Save the weights before reloading the model.\n",
    "    tmp_weights_path = os.path.join(tempfile.gettempdir(), 'tmp_weights.h5')\n",
    "    model.save_weights(tmp_weights_path)\n",
    "\n",
    "    # load the model from the config\n",
    "    model = tf.keras.models.model_from_json(model_json)\n",
    "    \n",
    "    # Reload the model weights\n",
    "    model.load_weights(tmp_weights_path, by_name=True)\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "a5c72863",
   "metadata": {},
   "outputs": [],
   "source": [
    "image_faults.faulty_images() # removes faulty images\n",
    "df = pd.read_csv('expanded_class.csv', index_col=[0], low_memory=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "1057a442",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "def dict_pics_jup():\n",
    "    '''\n",
    "    {source:target} dict used to replace source urls with image location as input\n",
    "    '''\n",
    "    target_dir = os.getcwd() + os.sep + \"training_images\"\n",
    "    with open('temp_pics_source_list.txt') as f:\n",
    "        temp_pics_source_list = json.load(f)\n",
    "        \n",
    "    dict_pics = {}\n",
    "    for k in temp_pics_source_list:\n",
    "         patt_1 = re.search(r'[^/]+(?=/\\$_|.(\\.jpg|\\.jpeg|\\.png))', k, re.IGNORECASE)\n",
    "         patt_2 = re.search(r'(\\.jpg|\\.jpeg|\\.png)', k, re.IGNORECASE)\n",
    "         if patt_1 and patt_2 is not None:\n",
    "             tag = patt_1.group() + patt_2.group().lower()\n",
    "             file_name = target_dir + os.sep + tag\n",
    "             dict_pics.update({k:file_name})\n",
    "    print(\"{source:target} dictionary created @ \" + target_dir)\n",
    "    return dict_pics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "7a6146e6",
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "expected string or bytes-like object",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-5-0009b269209e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdict_pics\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdict_pics_jup\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'women_cat_list.txt'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m     \u001b[0mwomen_cats\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'men_cat_list.txt'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m<ipython-input-4-4701772f6383>\u001b[0m in \u001b[0;36mdict_pics_jup\u001b[0;34m()\u001b[0m\n\u001b[1;32m      9\u001b[0m     \u001b[0mdict_pics\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     10\u001b[0m     \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtemp_pics_source_list\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m          \u001b[0mpatt_1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mre\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msearch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mr'[^/]+(?=/\\$_|.(\\.jpg|\\.jpeg|\\.png))'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mre\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mIGNORECASE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     12\u001b[0m          \u001b[0mpatt_2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mre\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msearch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mr'(\\.jpg|\\.jpeg|\\.png)'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mre\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mIGNORECASE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     13\u001b[0m          \u001b[0;32mif\u001b[0m \u001b[0mpatt_1\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mpatt_2\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/lib/python3.8/re.py\u001b[0m in \u001b[0;36msearch\u001b[0;34m(pattern, string, flags)\u001b[0m\n\u001b[1;32m    199\u001b[0m     \"\"\"Scan through string looking for a match to the pattern, returning\n\u001b[1;32m    200\u001b[0m     a Match object, or None if no match was found.\"\"\"\n\u001b[0;32m--> 201\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0m_compile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpattern\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mflags\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msearch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstring\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    202\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    203\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0msub\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpattern\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrepl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstring\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcount\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mflags\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mTypeError\u001b[0m: expected string or bytes-like object"
     ]
    }
   ],
   "source": [
    "dict_pics = dict_pics_jup()\n",
    "\n",
    "with open('women_cat_list.txt') as f:\n",
    "    women_cats = json.load(f)\n",
    "with open('men_cat_list.txt') as f:\n",
    "    men_cats = json.load(f)\n",
    "    \n",
    "with open('temp_pics_source_list.txt') as f:\n",
    "    tempics = json.load(f)\n",
    "# list of image urls that did not get named properly which will be removed from the dataframe\n",
    "drop_row_vals = []\n",
    "for pic in tempics:\n",
    "    try:\n",
    "        dict_pics[pic]\n",
    "    except KeyError:\n",
    "        drop_row_vals.append(pic)\n",
    "\n",
    "df['PrimaryCategoryID'] = df['PrimaryCategoryID'].astype(str) # pandas thinks ids are ints\n",
    "ddf = df[df.PictureURL.isin(drop_row_vals)==False] # remove improperly named image files\n",
    "df = ddf[ddf.PrimaryCategoryID.isin(men_cats)==False] # removes rows of womens categories\n",
    "\n",
    "blah = pd.Series(df.PictureURL)\n",
    "df = df.drop(labels=['PictureURL'], axis=1)\n",
    "\n",
    "blah = blah.apply(lambda x: dict_pics[x])\n",
    "df = pd.concat([blah, df],axis=1)\n",
    "df = df.groupby('PrimaryCategoryID').filter(lambda x: len(x)>25) # removes cat outliers\n",
    "\n",
    "df=df.sample(frac=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8a3a86a1",
   "metadata": {},
   "outputs": [],
   "source": [
    "undersample = RandomUnderSampler(sampling_strategy='auto')\n",
    "train, y_under = undersample.fit_resample(df, df['PrimaryCategoryID'])\n",
    "print(Counter(train['PrimaryCategoryID']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "506aa5cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "train, test = train_test_split(train, test_size=0.2, random_state=42)\n",
    "# stratify=train['PrimaryCategoryID']\n",
    "# train['PrimaryCategoryID'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4d72eb90",
   "metadata": {},
   "outputs": [],
   "source": [
    "datagen = ImageDataGenerator(rescale=1./255., \n",
    "                             validation_split=.2,\n",
    "                             #samplewise_std_normalization=True,\n",
    "                             #horizontal_flip= True,\n",
    "                             #vertical_flip= True,\n",
    "                             #width_shift_range= 0.2,\n",
    "                             #height_shift_range= 0.2,\n",
    "                             #rotation_range= 90,\n",
    "                             preprocessing_function=tf.keras.applications.vgg16.preprocess_input)\n",
    "train_generator=datagen.flow_from_dataframe(\n",
    "    dataframe=train[:len(train)],\n",
    "    directory='./training_images',\n",
    "    x_col='PictureURL',\n",
    "    y_col='PrimaryCategoryID',\n",
    "    batch_size=32,\n",
    "    seed=42,\n",
    "    shuffle=True,\n",
    "    target_size=(244,244),\n",
    "    subset='training'\n",
    "    )\n",
    "validation_generator=datagen.flow_from_dataframe(\n",
    "    dataframe=train[:len(train)], # is using train right?\n",
    "    directory='./training_images',\n",
    "    x_col='PictureURL',\n",
    "    y_col='PrimaryCategoryID',\n",
    "    batch_size=32,\n",
    "    seed=42,\n",
    "    shuffle=True,\n",
    "    target_size=(244,244),\n",
    "    subset='validation'\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7b70f37f",
   "metadata": {},
   "outputs": [],
   "source": [
    "imgs, labels = next(train_generator)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1ed54bf5",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plotImages(images_arr):\n",
    "    fig, axes = plt.subplots(1, 10, figsize=(20,20))\n",
    "    axes = axes.flatten()\n",
    "    for img, ax in zip( images_arr, axes):\n",
    "        ax.imshow(img)\n",
    "        ax.axis('off')\n",
    "    plt.tight_layout()\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "85934565",
   "metadata": {},
   "outputs": [],
   "source": [
    "#plotImages(imgs)\n",
    "#print(labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6322bcad",
   "metadata": {},
   "outputs": [],
   "source": [
    "physical_devices = tf.config.list_physical_devices('GPU')\n",
    "print(len(physical_devices))\n",
    "tf.config.experimental.set_memory_growth(physical_devices[0], True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b31af79e",
   "metadata": {},
   "outputs": [],
   "source": [
    "base_model = tf.keras.applications.vgg16.VGG16(weights='imagenet')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fe06f2bf",
   "metadata": {},
   "outputs": [],
   "source": [
    "#model = Sequential()\n",
    "#for layer in base_model.layers[:-1]:\n",
    "#    model.add(layer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7d3cc82c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# loop through layers, add Dropout after layers 'fc1' and 'fc2'\n",
    "updated_model = Sequential()\n",
    "for layer in base_model.layers[:-1]:\n",
    "    updated_model.add(layer)\n",
    "    if layer.name in ['fc1', 'fc2']:\n",
    "        updated_model.add(Dropout(.50))\n",
    "\n",
    "model = updated_model\n",
    "\n",
    "for layer in model.layers:\n",
    "    layer.trainable = True\n",
    "\n",
    "model.add(Dense(units=7, activation='softmax'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c774d787",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "#model = add_regularization(model)\n",
    "model.summary()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fd5d1246",
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(optimizer=Adam(learning_rate=.0001), loss='categorical_crossentropy',\n",
    "              metrics=['accuracy'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9cd2ba27",
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "model.fit(x=train_generator,\n",
    "          steps_per_epoch=len(train_generator),\n",
    "          validation_data=validation_generator,\n",
    "          validation_steps=len(validation_generator),\n",
    "          epochs=30,\n",
    "          verbose=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "63f791af",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`{`
			`"cells": [`
			`{`
			`"cell_type": "code",`
			`"execution_count": 1,`
			`"id": "572dc7fb",`
			`"metadata": {},`
added ML training projects in pytorch 2022-08-03 03:14:38 +00:00			`"outputs": [`
			`{`
			`"name": "stderr",`
			`"output_type": "stream",`
			`"text": [`
			`"2022-08-01 23:57:09.348119: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1\n"`
			`]`
			`}`
			`],`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"source": [`
			`"from matplotlib import pyplot as plt\n",`
			`"from matplotlib.image import imread\n",`
			`"import pandas as pd\n",`
			`"from collections import Counter\n",`
			`"import json\n",`
			`"import os\n",`
			`"import re\n",`
			`"import tempfile\n",`
			`"import numpy as np\n",`
			`"from os.path import exists\n",`
			`"from imblearn.under_sampling import RandomUnderSampler\n",`
			`"from PIL import ImageFile\n",`
			`"import sklearn as sk\n",`
			`"from sklearn.model_selection import train_test_split, StratifiedShuffleSplit\n",`
			`"import tensorflow as tf\n",`
			`"import tensorflow.keras\n",`
			`"from tensorflow.keras.preprocessing.image import ImageDataGenerator\n",`
			`"from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten, Activation\n",`
			`"from tensorflow.keras.models import Sequential\n",`
			`"from tensorflow.keras.optimizers import Adam\n",`
			`"# custom modules\n",`
			`"import image_faults\n",`
			`"\n",`
			`"ImageFile.LOAD_TRUNCATED_IMAGES = True"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 2,`
added ML training projects in pytorch 2022-08-03 03:14:38 +00:00			`"id": "6ea418cc",`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"def add_regularization(model, regularizer=tf.keras.regularizers.l2(0.0001)):\n",`
			`"\n",`
			`" if not isinstance(regularizer, tf.keras.regularizers.Regularizer):\n",`
			`" print(\"Regularizer must be a subclass of tf.keras.regularizers.Regularizer\")\n",`
			`" return model\n",`
			`"\n",`
			`" for layer in model.layers:\n",`
			`" for attr in ['kernel_regularizer']:\n",`
			`" if hasattr(layer, attr):\n",`
			`" setattr(layer, attr, regularizer)\n",`
			`"\n",`
			`" # When we change the layers attributes, the change only happens in the model config file\n",`
			`" model_json = model.to_json()\n",`
			`"\n",`
			`" # Save the weights before reloading the model.\n",`
			`" tmp_weights_path = os.path.join(tempfile.gettempdir(), 'tmp_weights.h5')\n",`
			`" model.save_weights(tmp_weights_path)\n",`
			`"\n",`
			`" # load the model from the config\n",`
			`" model = tf.keras.models.model_from_json(model_json)\n",`
			`" \n",`
			`" # Reload the model weights\n",`
			`" model.load_weights(tmp_weights_path, by_name=True)\n",`
			`" return model"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 3,`
			`"id": "a5c72863",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
added ML training projects in pytorch 2022-08-03 03:14:38 +00:00			`"image_faults.faulty_images() # removes faulty images\n",`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"df = pd.read_csv('expanded_class.csv', index_col=[0], low_memory=False)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 4,`
			`"id": "1057a442",`
			`"metadata": {`
			`"scrolled": true`
			`},`
included option for variable range of images per listing 2022-01-15 02:53:59 +00:00			`"outputs": [],`
			`"source": [`
			`"def dict_pics_jup():\n",`
			`" '''\n",`
			`" {source:target} dict used to replace source urls with image location as input\n",`
			`" '''\n",`
			`" target_dir = os.getcwd() + os.sep + \"training_images\"\n",`
			`" with open('temp_pics_source_list.txt') as f:\n",`
			`" temp_pics_source_list = json.load(f)\n",`
			`" \n",`
			`" dict_pics = {}\n",`
			`" for k in temp_pics_source_list:\n",`
			`" patt_1 = re.search(r'[^/]+(?=/\\$_\|.(\\.jpg\|\\.jpeg\|\\.png))', k, re.IGNORECASE)\n",`
			`" patt_2 = re.search(r'(\\.jpg\|\\.jpeg\|\\.png)', k, re.IGNORECASE)\n",`
			`" if patt_1 and patt_2 is not None:\n",`
			`" tag = patt_1.group() + patt_2.group().lower()\n",`
			`" file_name = target_dir + os.sep + tag\n",`
			`" dict_pics.update({k:file_name})\n",`
			`" print(\"{source:target} dictionary created @ \" + target_dir)\n",`
			`" return dict_pics"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 5,`
			`"id": "7a6146e6",`
			`"metadata": {},`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"outputs": [`
			`{`
added ML training projects in pytorch 2022-08-03 03:14:38 +00:00			`"ename": "TypeError",`
			`"evalue": "expected string or bytes-like object",`
			`"output_type": "error",`
			`"traceback": [`
			`"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",`
			`"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",`
			"\u001b[0;32m<ipython-input-5-0009b269209e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdict_pics\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdict_pics_jup\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'women_cat_list.txt'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mwomen_cats\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'men_cat_list.txt'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
			"\u001b[0;32m<ipython-input-4-4701772f6383>\u001b[0m in \u001b[0;36mdict_pics_jup\u001b[0;34m()\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mdict_pics\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtemp_pics_source_list\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0mpatt_1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mre\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msearch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mr'[^/]+(?=/\\$_\|.(\\.jpg\|\\.jpeg\|\\.png))'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mre\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mIGNORECASE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 12\u001b[0m \u001b[0mpatt_2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mre\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msearch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mr'(\\.jpg\|\\.jpeg\|\\.png)'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mre\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mIGNORECASE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mpatt_1\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mpatt_2\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
			"\u001b[0;32m/usr/lib/python3.8/re.py\u001b[0m in \u001b[0;36msearch\u001b[0;34m(pattern, string, flags)\u001b[0m\n\u001b[1;32m 199\u001b[0m \"\"\"Scan through string looking for a match to the pattern, returning\n\u001b[1;32m 200\u001b[0m a Match object, or None if no match was found.\"\"\"\n\u001b[0;32m--> 201\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_compile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpattern\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mflags\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msearch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstring\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 202\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 203\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0msub\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpattern\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrepl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstring\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcount\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mflags\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
			`"\u001b[0;31mTypeError\u001b[0m: expected string or bytes-like object"`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`]`
			`}`
			`],`
			`"source": [`
included option for variable range of images per listing 2022-01-15 02:53:59 +00:00			`"dict_pics = dict_pics_jup()\n",`
			`"\n",`
			`"with open('women_cat_list.txt') as f:\n",`
			`" women_cats = json.load(f)\n",`
			`"with open('men_cat_list.txt') as f:\n",`
			`" men_cats = json.load(f)\n",`
			`" \n",`
			`"with open('temp_pics_source_list.txt') as f:\n",`
			`" tempics = json.load(f)\n",`
			`"# list of image urls that did not get named properly which will be removed from the dataframe\n",`
			`"drop_row_vals = []\n",`
			`"for pic in tempics:\n",`
			`" try:\n",`
			`" dict_pics[pic]\n",`
			`" except KeyError:\n",`
			`" drop_row_vals.append(pic)\n",`
			`"\n",`
			`"df['PrimaryCategoryID'] = df['PrimaryCategoryID'].astype(str) # pandas thinks ids are ints\n",`
			`"ddf = df[df.PictureURL.isin(drop_row_vals)==False] # remove improperly named image files\n",`
			`"df = ddf[ddf.PrimaryCategoryID.isin(men_cats)==False] # removes rows of womens categories\n",`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"\n",`
			`"blah = pd.Series(df.PictureURL)\n",`
			`"df = df.drop(labels=['PictureURL'], axis=1)\n",`
included option for variable range of images per listing 2022-01-15 02:53:59 +00:00			`"\n",`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"blah = blah.apply(lambda x: dict_pics[x])\n",`
			`"df = pd.concat([blah, df],axis=1)\n",`
			`"df = df.groupby('PrimaryCategoryID').filter(lambda x: len(x)>25) # removes cat outliers\n",`
			`"\n",`
			`"df=df.sample(frac=1)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
added ML training projects in pytorch 2022-08-03 03:14:38 +00:00			`"execution_count": null,`
			`"id": "8a3a86a1",`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"metadata": {},`
added ML training projects in pytorch 2022-08-03 03:14:38 +00:00			`"outputs": [],`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"source": [`
			`"undersample = RandomUnderSampler(sampling_strategy='auto')\n",`
			`"train, y_under = undersample.fit_resample(df, df['PrimaryCategoryID'])\n",`
			`"print(Counter(train['PrimaryCategoryID']))"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
added ML training projects in pytorch 2022-08-03 03:14:38 +00:00			`"execution_count": null,`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"id": "506aa5cf",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"train, test = train_test_split(train, test_size=0.2, random_state=42)\n",`
			`"# stratify=train['PrimaryCategoryID']\n",`
			`"# train['PrimaryCategoryID'].value_counts()"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
added ML training projects in pytorch 2022-08-03 03:14:38 +00:00			`"execution_count": null,`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"id": "4d72eb90",`
			`"metadata": {},`
added ML training projects in pytorch 2022-08-03 03:14:38 +00:00			`"outputs": [],`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"source": [`
			`"datagen = ImageDataGenerator(rescale=1./255., \n",`
			`" validation_split=.2,\n",`
			`" #samplewise_std_normalization=True,\n",`
			`" #horizontal_flip= True,\n",`
			`" #vertical_flip= True,\n",`
			`" #width_shift_range= 0.2,\n",`
			`" #height_shift_range= 0.2,\n",`
			`" #rotation_range= 90,\n",`
			`" preprocessing_function=tf.keras.applications.vgg16.preprocess_input)\n",`
			`"train_generator=datagen.flow_from_dataframe(\n",`
			`" dataframe=train[:len(train)],\n",`
			`" directory='./training_images',\n",`
			`" x_col='PictureURL',\n",`
			`" y_col='PrimaryCategoryID',\n",`
			`" batch_size=32,\n",`
			`" seed=42,\n",`
			`" shuffle=True,\n",`
			`" target_size=(244,244),\n",`
			`" subset='training'\n",`
			`" )\n",`
			`"validation_generator=datagen.flow_from_dataframe(\n",`
			`" dataframe=train[:len(train)], # is using train right?\n",`
			`" directory='./training_images',\n",`
			`" x_col='PictureURL',\n",`
			`" y_col='PrimaryCategoryID',\n",`
			`" batch_size=32,\n",`
			`" seed=42,\n",`
			`" shuffle=True,\n",`
			`" target_size=(244,244),\n",`
			`" subset='validation'\n",`
			`" )"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
added ML training projects in pytorch 2022-08-03 03:14:38 +00:00			`"execution_count": null,`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"id": "7b70f37f",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"imgs, labels = next(train_generator)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
added ML training projects in pytorch 2022-08-03 03:14:38 +00:00			`"execution_count": null,`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"id": "1ed54bf5",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"def plotImages(images_arr):\n",`
			`" fig, axes = plt.subplots(1, 10, figsize=(20,20))\n",`
			`" axes = axes.flatten()\n",`
			`" for img, ax in zip( images_arr, axes):\n",`
			`" ax.imshow(img)\n",`
			`" ax.axis('off')\n",`
			`" plt.tight_layout()\n",`
			`" plt.show()"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
added ML training projects in pytorch 2022-08-03 03:14:38 +00:00			`"execution_count": null,`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"id": "85934565",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"#plotImages(imgs)\n",`
			`"#print(labels)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
added ML training projects in pytorch 2022-08-03 03:14:38 +00:00			`"execution_count": null,`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"id": "6322bcad",`
			`"metadata": {},`
added ML training projects in pytorch 2022-08-03 03:14:38 +00:00			`"outputs": [],`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"source": [`
			`"physical_devices = tf.config.list_physical_devices('GPU')\n",`
			`"print(len(physical_devices))\n",`
			`"tf.config.experimental.set_memory_growth(physical_devices[0], True)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
added ML training projects in pytorch 2022-08-03 03:14:38 +00:00			`"execution_count": null,`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"id": "b31af79e",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"base_model = tf.keras.applications.vgg16.VGG16(weights='imagenet')"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
added ML training projects in pytorch 2022-08-03 03:14:38 +00:00			`"execution_count": null,`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"id": "fe06f2bf",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"#model = Sequential()\n",`
			`"#for layer in base_model.layers[:-1]:\n",`
			`"# model.add(layer)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
added ML training projects in pytorch 2022-08-03 03:14:38 +00:00			`"execution_count": null,`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"id": "7d3cc82c",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"# loop through layers, add Dropout after layers 'fc1' and 'fc2'\n",`
			`"updated_model = Sequential()\n",`
			`"for layer in base_model.layers[:-1]:\n",`
			`" updated_model.add(layer)\n",`
			`" if layer.name in ['fc1', 'fc2']:\n",`
included option for variable range of images per listing 2022-01-15 02:53:59 +00:00			`" updated_model.add(Dropout(.50))\n",`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"\n",`
			`"model = updated_model\n",`
			`"\n",`
			`"for layer in model.layers:\n",`
			`" layer.trainable = True\n",`
			`"\n",`
included option for variable range of images per listing 2022-01-15 02:53:59 +00:00			`"model.add(Dense(units=7, activation='softmax'))"`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`]`
			`},`
			`{`
			`"cell_type": "code",`
added ML training projects in pytorch 2022-08-03 03:14:38 +00:00			`"execution_count": null,`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"id": "c774d787",`
			`"metadata": {`
			`"scrolled": true`
			`},`
added ML training projects in pytorch 2022-08-03 03:14:38 +00:00			`"outputs": [],`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"source": [`
			`"#model = add_regularization(model)\n",`
			`"model.summary()\n"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
added ML training projects in pytorch 2022-08-03 03:14:38 +00:00			`"execution_count": null,`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"id": "fd5d1246",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"model.compile(optimizer=Adam(learning_rate=.0001), loss='categorical_crossentropy',\n",`
			`" metrics=['accuracy'])"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
included option for variable range of images per listing 2022-01-15 02:53:59 +00:00			`"execution_count": null,`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"id": "9cd2ba27",`
			`"metadata": {`
			`"scrolled": false`
			`},`
added ML training projects in pytorch 2022-08-03 03:14:38 +00:00			`"outputs": [],`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"source": [`
			`"model.fit(x=train_generator,\n",`
			`" steps_per_epoch=len(train_generator),\n",`
			`" validation_data=validation_generator,\n",`
			`" validation_steps=len(validation_generator),\n",`
			`" epochs=30,\n",`
			`" verbose=1)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"id": "63f791af",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": []`
			`}`
			`],`
			`"metadata": {`
			`"kernelspec": {`
added ML training projects in pytorch 2022-08-03 03:14:38 +00:00			`"display_name": "Python 3 (ipykernel)",`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`"language": "python",`
			`"name": "python3"`
			`},`
			`"language_info": {`
			`"codemirror_mode": {`
			`"name": "ipython",`
			`"version": 3`
			`},`
			`"file_extension": ".py",`
			`"mimetype": "text/x-python",`
			`"name": "python",`
			`"nbconvert_exporter": "python",`
			`"pygments_lexer": "ipython3",`
added ML training projects in pytorch 2022-08-03 03:14:38 +00:00			`"version": "3.9.12"`
commit before checkout 428e0379efc3cff3f85154e42ed544a5f982ade3 for ref 2022-01-03 20:32:42 +00:00			`}`
			`},`
			`"nbformat": 4,`
			`"nbformat_minor": 5`
			`}`