added comments. changed html.parser to lxml
This commit is contained in:
parent
6fd57ec6b9
commit
9d9d72e8d0
1
bulkpp
Submodule
1
bulkpp
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 32d3febba22b73b37ac188d84330ae789288c535
|
44
posh.py
44
posh.py
@ -4,14 +4,16 @@ import time
|
|||||||
import re
|
import re
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import matplotlib.pyplot as plt
|
# import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
|
||||||
def url_base_builder(search_query):
|
def url_base_builder(search_query):
|
||||||
genders = ['Men', 'Women']
|
genders = ['Men', 'Women']
|
||||||
|
|
||||||
posh_colors = ['Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple', 'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan']
|
posh_colors = ['Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple',
|
||||||
|
'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan']
|
||||||
|
|
||||||
for i in range(0,len(posh_colors)):
|
for i in range(0, len(posh_colors)):
|
||||||
if posh_colors[i] in search_query:
|
if posh_colors[i] in search_query:
|
||||||
url_color = '&color[]=' + posh_colors[i]
|
url_color = '&color[]=' + posh_colors[i]
|
||||||
color = posh_colors[i]
|
color = posh_colors[i]
|
||||||
@ -19,7 +21,7 @@ def url_base_builder(search_query):
|
|||||||
else:
|
else:
|
||||||
color = ''
|
color = ''
|
||||||
url_color = ''
|
url_color = ''
|
||||||
for i in range(0,len(genders)):
|
for i in range(0, len(genders)):
|
||||||
if genders[i] in search_query:
|
if genders[i] in search_query:
|
||||||
url_gender = '&department=' + genders[i]
|
url_gender = '&department=' + genders[i]
|
||||||
gender = genders[i]
|
gender = genders[i]
|
||||||
@ -28,23 +30,26 @@ def url_base_builder(search_query):
|
|||||||
gender = ''
|
gender = ''
|
||||||
url_gender = '&department=All'
|
url_gender = '&department=All'
|
||||||
|
|
||||||
sq = search_query.replace(color,'').replace(gender,'').replace('NEW','').replace(' ', '+')
|
sq = search_query.replace(color, '').replace(gender, '').replace('NEW', '').replace(' ', '+')
|
||||||
|
|
||||||
all_sold_url_base = 'https://poshmark.com/search?query=' + sq + "&availability=sold_out" + url_color + url_gender + '&max_id='
|
all_sold_url_base = 'https://poshmark.com/search?query=' + sq + \
|
||||||
|
"&availability=sold_out" + url_color + url_gender + '&max_id='
|
||||||
|
|
||||||
new_sold_url_base = 'https://poshmark.com/search?query=' + sq + '&availability=sold_out' + '&condition=nwt_and_ret' + url_color + url_gender + '&max_id='
|
new_sold_url_base = 'https://poshmark.com/search?query=' + sq + '&availability=sold_out' + \
|
||||||
|
'&condition=nwt_and_ret' + url_color + url_gender + '&max_id='
|
||||||
|
|
||||||
return all_sold_url_base, new_sold_url_base
|
return all_sold_url_base, new_sold_url_base
|
||||||
|
|
||||||
|
|
||||||
def all_sold_list_builder(i):
|
def all_sold_list_builder(i):
|
||||||
bases = url_base_builder(search_query)
|
bases = url_base_builder(search_query)
|
||||||
all_sold_url_base = bases[0]
|
all_sold_url_base = bases[0]
|
||||||
all_sold_prices = []
|
all_sold_prices = []
|
||||||
url = all_sold_url_base + str(i)
|
url = all_sold_url_base + str(i)
|
||||||
html = requests.get(url).text
|
html = requests.get(url).text
|
||||||
soup = b(html, "html.parser")
|
soup = b(html, "lxml")
|
||||||
#last_page = soup.find(string = re.compile('No Listings Found'))
|
# last_page = soup.find(string = re.compile('No Listings Found'))
|
||||||
for price in soup.find_all('span', {'class':'p--t--1 fw--bold'}):
|
for price in soup.find_all('span', {'class': 'p--t--1 fw--bold'}):
|
||||||
price = price.get_text()
|
price = price.get_text()
|
||||||
dollar_index = price.find('$')
|
dollar_index = price.find('$')
|
||||||
price = price[dollar_index+1:]
|
price = price[dollar_index+1:]
|
||||||
@ -53,16 +58,17 @@ def all_sold_list_builder(i):
|
|||||||
all_sold_prices.append(price)
|
all_sold_prices.append(price)
|
||||||
return all_sold_prices
|
return all_sold_prices
|
||||||
|
|
||||||
|
|
||||||
def new_sold_list_builder(i):
|
def new_sold_list_builder(i):
|
||||||
bases = url_base_builder(search_query)
|
bases = url_base_builder(search_query)
|
||||||
new_sold_url_base = bases[1]
|
new_sold_url_base = bases[1]
|
||||||
new_sold_prices = []
|
new_sold_prices = []
|
||||||
url = new_sold_url_base + str(i)
|
url = new_sold_url_base + str(i)
|
||||||
html = requests.get(url).text
|
html = requests.get(url).text
|
||||||
soup = b(html, "html.parser")
|
soup = b(html, "lxml")
|
||||||
#last_page = soup.find(string = re.compile('No Listings Found'))#this is present in all pages that don't have a full 48 listings on them. So you end up with an empty price list becuase of your conditional statement
|
# last_page = soup.find(string = re.compile('No Listings Found'))#this is present in all pages that don't have a full 48 listings on them. So you end up with an empty price list becuase of your conditional statement
|
||||||
|
|
||||||
for price in soup.find_all('span', {'class':'p--t--1 fw--bold'}):
|
for price in soup.find_all('span', {'class': 'p--t--1 fw--bold'}):
|
||||||
price = price.get_text()
|
price = price.get_text()
|
||||||
dollar_index = price.find('$')
|
dollar_index = price.find('$')
|
||||||
price = price[dollar_index+1:]
|
price = price[dollar_index+1:]
|
||||||
@ -71,11 +77,12 @@ def new_sold_list_builder(i):
|
|||||||
new_sold_prices.append(price)
|
new_sold_prices.append(price)
|
||||||
return new_sold_prices
|
return new_sold_prices
|
||||||
|
|
||||||
|
|
||||||
search_query = str(input('Title Search: '))
|
search_query = str(input('Title Search: '))
|
||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
|
||||||
page_list = list(range(1, 13))
|
page_list = list(range(1, 2))
|
||||||
all_sold_list = []
|
all_sold_list = []
|
||||||
new_sold_list = []
|
new_sold_list = []
|
||||||
|
|
||||||
@ -85,17 +92,18 @@ with concurrent.futures.ThreadPoolExecutor() as executor:
|
|||||||
|
|
||||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||||
for future in executor.map(new_sold_list_builder, page_list):
|
for future in executor.map(new_sold_list_builder, page_list):
|
||||||
new_sold_list.extend(future)
|
new_sold_list.extend(future)# if you can pull the nwt price simultaneously with used then you won't have to use this
|
||||||
|
|
||||||
|
|
||||||
for element in new_sold_list:
|
for element in new_sold_list:
|
||||||
all_sold_list.remove(element)
|
all_sold_list.remove(element)
|
||||||
used_sold_list = all_sold_list
|
used_sold_list = all_sold_list
|
||||||
|
|
||||||
average_used_sold_price = '$' + str(round(np.mean(used_sold_list), 2))
|
average_used_sold_price = '$' + str(round(np.mean(used_sold_list), 2))
|
||||||
average_new_sold_price = '$' + str(round(np.mean(new_sold_list), 2))
|
average_new_sold_price = '$' + str(round(np.mean(new_sold_list), 2))
|
||||||
|
|
||||||
used_sold_results = str(len(used_sold_list)) + ' Results'
|
used_sold_results = str(len(used_sold_list)) + ' Used Results'
|
||||||
new_sold_results = str(len(new_sold_list)) + ' Results'
|
new_sold_results = str(len(new_sold_list)) + ' NWT Results'
|
||||||
total_results = str(len(used_sold_list) + len(new_sold_list)) + ' Total Results'
|
total_results = str(len(used_sold_list) + len(new_sold_list)) + ' Total Results'
|
||||||
|
|
||||||
end = time.time()
|
end = time.time()
|
||||||
|
Loading…
Reference in New Issue
Block a user