Compare commits
3 Commits
faster_req
...
master
Author | SHA1 | Date | |
---|---|---|---|
071db4705e | |||
|
fd590d913f | ||
|
cd25b046e0 |
234
pms.py
234
pms.py
@ -1,172 +1,128 @@
|
|||||||
import seaborn as sns
|
|
||||||
import numpy as np
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import re
|
import re
|
||||||
# import bs4
|
import numpy as np
|
||||||
from bs4 import BeautifulSoup as b
|
|
||||||
import requests
|
import requests
|
||||||
import time
|
import bs4
|
||||||
|
from bs4 import BeautifulSoup as b
|
||||||
|
|
||||||
# Initial Search Query URL to Find ItemCondition Code
|
# Initial Search Query URL to Find ItemCondition Code
|
||||||
while True:
|
|
||||||
print("Title Search:")
|
|
||||||
|
|
||||||
start = time.time()
|
def url_build(search_query):
|
||||||
|
|
||||||
SQ = str(input())
|
SQ = search_query
|
||||||
SQ_1 = SQ.replace(' ', '+').replace('NEW',
|
SQ_1 = SQ.replace(' ', '+')
|
||||||
'').replace('men', '').replace('women', '')
|
|
||||||
gender = ['Men', 'Women']
|
|
||||||
|
|
||||||
#&color[]=color
|
#&color[]=color
|
||||||
posh_colors = ['Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple',
|
posh_colors = [
|
||||||
'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan']
|
'Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple',
|
||||||
|
'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan'
|
||||||
|
]
|
||||||
|
|
||||||
for i in range(0,len(posh_colors)):
|
for i in range(0,len(posh_colors)):
|
||||||
if posh_colors[i] in SQ:
|
if posh_colors[i] in SQ:
|
||||||
url_color = '&color[]=' + posh_colors[i]
|
url_color = '&color[]=' + posh_colors[i]
|
||||||
url_separator = "&availability=sold_out" + url_color + "&department=All&max_id="
|
url_separator = "&availability=sold_out" + url_color + "&department=All&max_id="
|
||||||
url_separator1 = "&availability=sold_out" + url_color + \
|
url_separator1 = "&availability=sold_out" + url_color + "&condition=nwt_and_ret&department=All&max_id="
|
||||||
"&condition=nwt_and_ret&department=All&max_id="
|
|
||||||
SQ_1 = SQ_1.replace(posh_colors[i], '')
|
SQ_1 = SQ_1.replace(posh_colors[i], '')
|
||||||
break
|
break
|
||||||
|
|
||||||
else:
|
else:
|
||||||
url_separator = "&availability=sold_out&department=All&max_id="
|
url_separator = "&availability=sold_out&department=All&max_id="
|
||||||
url_separator1 = "&availability=sold_out&condition=nwt_and_ret&department=All&max_id="
|
url_separator1 = "&availability=sold_out&condition=nwt_and_ret&department=All&max_id="
|
||||||
|
|
||||||
prices = []
|
|
||||||
prices1 = []
|
|
||||||
base_url = "https://poshmark.com/search?query="
|
base_url = "https://poshmark.com/search?query="
|
||||||
|
|
||||||
|
url = base_url + SQ_1 + url_separator
|
||||||
|
url_1 = base_url + SQ_1 + url_separator1
|
||||||
|
|
||||||
|
return url, url_1, SQ
|
||||||
|
|
||||||
|
def get_sold(url, url_1):
|
||||||
|
#TODO Need to add option to avoid irrelevant results using regex and search query
|
||||||
|
# keywords. First search the titles for every listing to see if they contain all or
|
||||||
|
# some of the keywords present in the search query. If they don't then just break
|
||||||
|
# the whole program and say no results
|
||||||
|
|
||||||
|
nau_prices = [] # NEW AND USED PRICES
|
||||||
|
new_prices = []
|
||||||
|
|
||||||
pg = 1
|
pg = 1
|
||||||
url = base_url + SQ_1 + url_separator + str(pg)
|
|
||||||
url_1 = base_url + SQ_1 + url_separator1 + str(pg)
|
|
||||||
url_a = base_url + SQ_1 + url_separator + str(pg)
|
|
||||||
url_1b = base_url + SQ_1 + url_separator1 + str(pg)
|
|
||||||
|
|
||||||
html = requests.get(url).text
|
|
||||||
html1 = requests.get(url_1).text
|
|
||||||
soup = b(html, "lxml")
|
|
||||||
soup1 = b(html1, 'lxml')
|
|
||||||
|
|
||||||
# Begin new and used condition items price list:
|
|
||||||
|
|
||||||
for listing in soup.findAll("div", {"class": "item__details"}):
|
|
||||||
price = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text
|
|
||||||
indices = price.find('$')
|
|
||||||
price = price[indices + 1:]
|
|
||||||
space = price.find(' ')
|
|
||||||
price = int(price[:space - 1])
|
|
||||||
prices.append(price)
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
last_page = soup.find_all(string=re.compile('No Listings Found'))
|
aurl = url + str(pg)
|
||||||
if last_page:
|
html = requests.get(aurl).text
|
||||||
|
soup = b(html, 'lxml') # NEW+USED SOUP OBJECT
|
||||||
|
temp_prices_nau = soup.find_all("span", {"class": "p--t--1 fw--bold"})
|
||||||
|
temp_prices_nau = [float(thing.text.strip(' $\n')) for thing in temp_prices_nau]
|
||||||
|
nau_prices.extend(temp_prices_nau)
|
||||||
|
pg += 1
|
||||||
|
if len(re.findall(r'display\:\;', html))>0:
|
||||||
break
|
break
|
||||||
pg = pg + 1
|
|
||||||
url = base_url + SQ_1 + url_separator + str(pg)
|
|
||||||
html = requests.get(url).text
|
|
||||||
soup = b(html, "lxml")
|
|
||||||
|
|
||||||
for listing in soup.findAll("div", {"class": "item__details"}):
|
|
||||||
price = listing.findAll(
|
|
||||||
"span", {"class": "p--t--1 fw--bold"})[0].text
|
|
||||||
# indices = [i for i, dollars in enumerate(price) if dollars == '$']
|
|
||||||
# price = int(price[1:indices[1]-1])
|
|
||||||
indices = price.find('$')
|
|
||||||
price = price[indices + 1:]
|
|
||||||
space = price.find(' ')
|
|
||||||
price = int(price[:space - 1])
|
|
||||||
prices.append(price)
|
|
||||||
|
|
||||||
# Begin new condition item prices list:
|
|
||||||
|
|
||||||
for listing in soup1.findAll("div", {"class": "item__details"}):
|
|
||||||
price1 = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text
|
|
||||||
# indices = [i for i, dollars in enumerate(price1) if dollars == '$']
|
|
||||||
# price1 = int(price1[1:indices[1]-1])
|
|
||||||
indices = price1.find('$')
|
|
||||||
price1 = price1[indices + 1:]
|
|
||||||
space = price1.find(' ')
|
|
||||||
price1 = int(price1[:space - 1])
|
|
||||||
prices1.append(price1)
|
|
||||||
|
|
||||||
|
pg = 1
|
||||||
while True:
|
while True:
|
||||||
|
burl_1 = url_1 + str(pg)
|
||||||
last_page = soup1.find_all(string=re.compile('No Listings Found'))
|
html1 = requests.get(burl_1).text
|
||||||
if last_page:
|
soup1 = b(html1,'lxml') # NEW SOUP OBJECT
|
||||||
|
temp_prices_new = soup1.find_all("span", {"class": "p--t--1 fw--bold"})
|
||||||
|
temp_prices_new = [float(thing.text.strip(' $\n')) for thing in temp_prices_new]
|
||||||
|
new_prices.extend(temp_prices_new)
|
||||||
|
pg += 1
|
||||||
|
if len(re.findall(r'display\:\;', html1))>0:
|
||||||
|
break
|
||||||
|
if len(new_prices)>len(nau_prices):
|
||||||
|
new_prices = []
|
||||||
break
|
break
|
||||||
pg = pg + 1
|
|
||||||
url_1 = base_url + SQ_1 + url_separator1 + str(pg)
|
|
||||||
html1 = requests.get(url_1).text
|
|
||||||
soup1 = b(html1, "lxml")
|
|
||||||
|
|
||||||
for listing in soup1.findAll("div", {"class": "item__details"}):
|
return nau_prices, new_prices
|
||||||
price1 = listing.findAll(
|
|
||||||
"span", {"class": "p--t--1 fw--bold"})[0].text
|
|
||||||
# indices = [i for i, dollars in enumerate(price1) if dollars == '$']
|
|
||||||
# price1 = int(price1[1:indices[1]-1])
|
|
||||||
indices = price1.find('$')
|
|
||||||
price1 = price1[indices + 1:]
|
|
||||||
space = price1.find(' ')
|
|
||||||
price1 = int(price1[:space - 1])
|
|
||||||
prices1.append(price1)
|
|
||||||
|
|
||||||
# Begin Element-wise substraction of new condition items price list from new&used items price list:
|
# Begin Element-wise substraction of new condition items price list from new&used items price list:
|
||||||
print(len(prices), 'NEW & USED')
|
def avgs(nau_prices, new_prices):
|
||||||
print(len(prices1), 'NEW')
|
for price in new_prices:
|
||||||
|
|
||||||
end = time.time()
|
|
||||||
|
|
||||||
print(end - start)
|
|
||||||
|
|
||||||
for element in prices1:
|
|
||||||
prices.remove(element)
|
|
||||||
|
|
||||||
if 'NEW' in SQ:
|
|
||||||
kde_datapoints = sns.kdeplot(prices1, shade=True).get_lines()[
|
|
||||||
0].get_data()
|
|
||||||
sns.rugplot(prices1)
|
|
||||||
print(str(len(prices1)) + " Results" + "\n")
|
|
||||||
print("Average Price Sold New = $" + str(np.mean(prices1)) + "\n")
|
|
||||||
total_price = np.mean(prices1) + 6.79
|
|
||||||
print("Average Total Price New = $" + str(total_price) + "\n")
|
|
||||||
print("Flat Rate Shipping = $6.79" + "\n")
|
|
||||||
|
|
||||||
kde_x = kde_datapoints[0]
|
|
||||||
kde_y = kde_datapoints[1]
|
|
||||||
optimal_price = kde_x[np.argmax(kde_y)]
|
|
||||||
print("Optimal Price New = $" + str(optimal_price) + "\n")
|
|
||||||
print("Optimal Price Including Shipping New = $" +
|
|
||||||
str(optimal_price + 6.79) + "\n")
|
|
||||||
print("URL Link (New): " + url_1b + "\n")
|
|
||||||
plt.ylabel('KDE')
|
|
||||||
plt.xlabel('Price ($)')
|
|
||||||
plt.show()
|
|
||||||
else:
|
|
||||||
try:
|
try:
|
||||||
|
nau_prices.remove(price)
|
||||||
|
except ValueError:
|
||||||
|
break
|
||||||
|
used_prices = nau_prices
|
||||||
|
|
||||||
kde_datapoints = sns.kdeplot(prices, shade=True).get_lines()[
|
if len(new_prices)>0:
|
||||||
0].get_data()
|
avg_new = np.mean(new_prices)
|
||||||
sns.rugplot(prices)
|
avg_used = np.mean(used_prices)
|
||||||
print(str(len(prices)) + " Results" + "\n")
|
else:
|
||||||
print("Average Price Sold Used = $" + str(np.mean(prices)) + "\n")
|
avg_new = 0
|
||||||
total_price = np.mean(prices) + 6.79
|
avg_used = np.mean(used_prices)
|
||||||
print("Average Total Price Used = $" + str(total_price) + "\n")
|
|
||||||
print("Flat Rate Shipping = $6.79" + "\n")
|
return avg_used, avg_new, used_prices
|
||||||
import winsound
|
|
||||||
winsound.Beep(440, 300)
|
def display_results(nau_prices, new_prices, used_prices, avg_new, avg_used, nau_url, new_url):
|
||||||
|
|
||||||
|
used_results = '\n{} total results used\nAverage used price = ${}'.format(len(used_prices),avg_used)
|
||||||
|
nau_link = 'URL new and used: {}\n'.format(nau_url+'1')
|
||||||
|
|
||||||
|
new_results = '\n{} total results new\nAverage new price = ${}'.format(len(new_prices),avg_new)
|
||||||
|
new_link = 'URL new: {}\n'.format(new_url+'1')
|
||||||
|
|
||||||
|
total_results = '{} Total results new and used'.format(nau_prices)
|
||||||
|
|
||||||
|
print(used_results)
|
||||||
|
print(nau_link)
|
||||||
|
print(new_results)
|
||||||
|
print(new_link)
|
||||||
|
print(total_results)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
search_query = input("\nSearch Title: ")
|
||||||
|
urls = url_build(search_query)
|
||||||
|
nau_url = urls[0]
|
||||||
|
new_url = urls[1]
|
||||||
|
prices = get_sold(nau_url, new_url)
|
||||||
|
nau_prices = prices[0]
|
||||||
|
new_prices = prices[1]
|
||||||
|
averages = avgs(nau_prices, new_prices)
|
||||||
|
avg_used = averages[0]
|
||||||
|
avg_new = averages[1]
|
||||||
|
used_prices = averages[2]
|
||||||
|
nau_prices = len(new_prices)+len(used_prices)
|
||||||
|
display_results(nau_prices, new_prices, used_prices, avg_new, avg_used, nau_url, new_url)
|
||||||
|
|
||||||
|
if __name__=='__main__':
|
||||||
|
main()
|
||||||
|
|
||||||
kde_x = kde_datapoints[0]
|
|
||||||
kde_y = kde_datapoints[1]
|
|
||||||
optimal_price = kde_x[np.argmax(kde_y)]
|
|
||||||
print("Optimal Price Used = $" + str(optimal_price) + "\n")
|
|
||||||
print("Optimal Price Including Shipping Used = $" +
|
|
||||||
str(optimal_price + 6.79) + "\n")
|
|
||||||
print("URL Link: " + url_a + "\n")
|
|
||||||
plt.ylabel('KDE')
|
|
||||||
plt.xlabel('Price ($)')
|
|
||||||
plt.show()
|
|
||||||
except IndexError:
|
|
||||||
print('\n' + '0 results' + '\n')
|
|
||||||
pass
|
|
||||||
|
@ -1,2 +0,0 @@
|
|||||||
import requests
|
|
||||||
import
|
|
Loading…
Reference in New Issue
Block a user