new dl_pic, dict_pics methods and updated dl_pictures. fixed bugs
This commit is contained in:
parent
ae25ab88b6
commit
8c41fe1daf
@ -93,7 +93,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def dict_pics():\n",
|
||||
"def dict_pics_jup():\n",
|
||||
" target_dir = os.getcwd() + os.sep + \"training_images\"\n",
|
||||
" with open('temp_pics_source_list.txt') as f:\n",
|
||||
" temp_pics_source_list = json.load(f)\n",
|
||||
@ -101,7 +101,7 @@
|
||||
" print(\"{source:target} dictionary created @ \" + target_dir)\n",
|
||||
" return dict_pics\n",
|
||||
"\n",
|
||||
"dict_pics = dict_pics()\n",
|
||||
"dict_pics = dict_pics_jup()\n",
|
||||
"blah = pd.Series(df.PictureURL)\n",
|
||||
"df = df.drop(labels=['PictureURL'], axis=1)\n",
|
||||
"blah = blah.apply(lambda x: dict_pics[x])\n",
|
||||
|
@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 7,
|
||||
"id": "572dc7fb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -35,7 +35,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 8,
|
||||
"id": "8d94196d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -68,7 +68,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 9,
|
||||
"id": "a5c72863",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -79,22 +79,28 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 11,
|
||||
"id": "1057a442",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{source:target} dictionary created @ /tf/training_images\n"
|
||||
"ename": "AttributeError",
|
||||
"evalue": "'NoneType' object has no attribute 'group'",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[0;32m<ipython-input-11-d8afc400d306>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdict_pics\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0mdict_pics\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdict_pics_jup\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0mblah\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSeries\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPictureURL\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'PictureURL'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m<ipython-input-11-d8afc400d306>\u001b[0m in \u001b[0;36mdict_pics_jup\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'temp_pics_source_list.txt'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mtemp_pics_source_list\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mdict_pics\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mtarget_dir\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msep\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mre\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msearch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mr'[^/]+(?=/\\$_|.jpg)'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mre\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mIGNORECASE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroup\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m'.jpg'\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtemp_pics_source_list\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"{source:target} dictionary created @ \"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtarget_dir\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdict_pics\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m<ipython-input-11-d8afc400d306>\u001b[0m in \u001b[0;36m<dictcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'temp_pics_source_list.txt'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mtemp_pics_source_list\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mdict_pics\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mtarget_dir\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msep\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mre\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msearch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mr'[^/]+(?=/\\$_|.jpg)'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mre\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mIGNORECASE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroup\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m'.jpg'\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtemp_pics_source_list\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"{source:target} dictionary created @ \"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtarget_dir\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdict_pics\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'group'"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def dict_pics():\n",
|
||||
"def dict_pics_jup(): # \n",
|
||||
" target_dir = os.getcwd() + os.sep + \"training_images\"\n",
|
||||
" with open('temp_pics_source_list.txt') as f:\n",
|
||||
" temp_pics_source_list = json.load(f)\n",
|
||||
@ -102,7 +108,7 @@
|
||||
" print(\"{source:target} dictionary created @ \" + target_dir)\n",
|
||||
" return dict_pics\n",
|
||||
"\n",
|
||||
"dict_pics = dict_pics()\n",
|
||||
"dict_pics = dict_pics_jup()\n",
|
||||
"blah = pd.Series(df.PictureURL)\n",
|
||||
"df = df.drop(labels=['PictureURL'], axis=1)\n",
|
||||
"blah = blah.apply(lambda x: dict_pics[x])\n",
|
||||
|
@ -23,6 +23,6 @@ download = input('download images?: ')
|
||||
if ('y' or 'Y') in download:
|
||||
with open('temp_pics_source_list.txt') as f:
|
||||
url_list = json.load(f)
|
||||
curate.dl_pictures(url_list)
|
||||
curate.dl_pictures()
|
||||
else:
|
||||
pass
|
||||
|
77
ebay_api.py
77
ebay_api.py
@ -379,9 +379,13 @@ class CurateData:
|
||||
with open('temp_pics_source_list.txt') as f:
|
||||
tpsl = json.load(f)
|
||||
tpsl.extend(temp_pics_source_list)
|
||||
|
||||
# ensures no duplicate source URLs exist
|
||||
temp_pics_source_list = list(set(tpsl))
|
||||
with open('temp_pics_source_list.txt', 'w') as f:
|
||||
json.dump(temp_pics_source_list, f)
|
||||
|
||||
# creates file if script is ran for 1st time and file not present
|
||||
except (ValueError, FileNotFoundError):
|
||||
with open('temp_pics_source_list.txt', 'w') as f:
|
||||
json.dump(temp_pics_source_list, f)
|
||||
@ -392,11 +396,27 @@ class CurateData:
|
||||
|
||||
return expanded_class, expanded_dropd
|
||||
|
||||
def dl_pictures(self):
|
||||
'''
|
||||
Downloads pictures from api to local storage using temp_pics_source_list
|
||||
and creates custom {source:target} dictionary as dict_pics
|
||||
'''
|
||||
def dl_pic(self,dict_pics, pic):
|
||||
|
||||
try:
|
||||
|
||||
if os.path.exists(dict_pics[pic]):
|
||||
pass
|
||||
|
||||
else:
|
||||
try:
|
||||
r = requests.get(pic, stream=True)
|
||||
r.raw.decode_content = True
|
||||
with open(dict_pics[pic], 'wb') as f:
|
||||
shutil.copyfileobj(r.raw, f)
|
||||
except ConnectionError:
|
||||
|
||||
return
|
||||
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
def dict_pics(self):
|
||||
|
||||
try:
|
||||
with open('target_dirs.txt', 'r+') as f: # TODO you can add option to change directory here, too. Look up how to have optional arguments
|
||||
@ -418,44 +438,45 @@ class CurateData:
|
||||
try:
|
||||
temp_pics_source_list = json.load(f)
|
||||
except (ValueError, FileNotFoundError):
|
||||
print('url list not found. download aborted')
|
||||
print('url list not found. aborting')
|
||||
return
|
||||
|
||||
|
||||
dict_pics = {k:target_dir + os.sep + re.search(r'[^/]+(?=/\$_|.jpg)', k, re.IGNORECASE).group() + '.jpg' for k in temp_pics_source_list}
|
||||
dict_pics = {}
|
||||
for k in temp_pics_source_list:
|
||||
if re.search(r'[^/]+(?=/\$_|.(.jpg|.jpeg|.png))', k, re.IGNORECASE) and re.search(r'(.jpg|.jpeg|.png)', k, re.IGNORECASE) is not None:
|
||||
tag = re.search(r'[^/]+(?=/\$_|.(.jpg|.jpeg|.png))', k, re.IGNORECASE).group() + re.search(r'(.jpg|.jpeg|.png)', k, re.IGNORECASE).group()
|
||||
file_name = target_dir + os.sep + tag
|
||||
dict_pics.update({k:file_name})
|
||||
|
||||
with open('dict_pics.txt', 'w') as f:
|
||||
json.dump(dict_pics, f)
|
||||
|
||||
def dl_pic(dict_pics, pic):
|
||||
return dict_pics # TODO still need to find sol to outliers (i.e., naming scheme for unusual source URLs)
|
||||
|
||||
def dl_pictures(self, *dict_pics):
|
||||
'''
|
||||
Downloads pictures from api to local storage using temp_pics_source_list
|
||||
and creates custom {source:target} dictionary as dict_pics
|
||||
'''
|
||||
|
||||
if not dict_pics:
|
||||
with open('dict_pics.txt') as f:
|
||||
dict_pics = json.load(f)
|
||||
with open('temp_pics_source_list.txt') as f:
|
||||
try:
|
||||
|
||||
if os.path.exists(dict_pics[pic]):
|
||||
pass # TODO should catch dupes, but make sure it is
|
||||
|
||||
else:
|
||||
try:
|
||||
r = requests.get(pic, stream=True)
|
||||
r.raw.decode_content = True
|
||||
with open(dict_pics[pic], 'wb') as f: # Or call dict_pics[pic] can work
|
||||
shutil.copyfileobj(r.raw, f)
|
||||
except ConnectionError:
|
||||
|
||||
return
|
||||
|
||||
except KeyError:
|
||||
pass
|
||||
temp_pics_source_list = json.load(f)
|
||||
except (ValueError, FileNotFoundError):
|
||||
print('url list not found. download aborted')
|
||||
return
|
||||
|
||||
bargs = [(dict_pics, pic) for pic in temp_pics_source_list]
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
for future in executor.map(lambda p: dl_pic(*p), bargs):
|
||||
for future in executor.map(lambda p: self.dl_pic(*p), bargs):
|
||||
if future is not None:
|
||||
future
|
||||
else:
|
||||
print('connection error')
|
||||
|
||||
os.remove('temp_pics_source_list.txt') # Deletes file after downloads complete successfully
|
||||
|
||||
class PreProcessing:
|
||||
'''
|
||||
Includes methods for pre-processing training set input and labels in the
|
||||
|
Loading…
Reference in New Issue
Block a user