129 lines
4.3 KiB
Python
129 lines
4.3 KiB
Python
import re
|
|
import numpy as np
|
|
import requests
|
|
import bs4
|
|
from bs4 import BeautifulSoup as b
|
|
|
|
# Initial Search Query URL to Find ItemCondition Code
|
|
|
|
def url_build(search_query):
|
|
|
|
SQ = search_query
|
|
SQ_1 = SQ.replace(' ', '+')
|
|
|
|
#&color[]=color
|
|
posh_colors = [
|
|
'Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple',
|
|
'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan'
|
|
]
|
|
|
|
for i in range(0,len(posh_colors)):
|
|
if posh_colors[i] in SQ:
|
|
url_color = '&color[]=' + posh_colors[i]
|
|
url_separator = "&availability=sold_out" + url_color + "&department=All&max_id="
|
|
url_separator1 = "&availability=sold_out" + url_color + "&condition=nwt_and_ret&department=All&max_id="
|
|
SQ_1 = SQ_1.replace(posh_colors[i], '')
|
|
break
|
|
|
|
else:
|
|
url_separator = "&availability=sold_out&department=All&max_id="
|
|
url_separator1 = "&availability=sold_out&condition=nwt_and_ret&department=All&max_id="
|
|
|
|
base_url = "https://poshmark.com/search?query="
|
|
|
|
url = base_url + SQ_1 + url_separator
|
|
url_1 = base_url + SQ_1 + url_separator1
|
|
|
|
return url, url_1, SQ
|
|
|
|
def get_sold(url, url_1):
|
|
#TODO Need to add option to avoid irrelevant results using regex and search query
|
|
# keywords. First search the titles for every listing to see if they contain all or
|
|
# some of the keywords present in the search query. If they don't then just break
|
|
# the whole program and say no results
|
|
|
|
nau_prices = [] # NEW AND USED PRICES
|
|
new_prices = []
|
|
|
|
pg = 1
|
|
while True:
|
|
aurl = url + str(pg)
|
|
html = requests.get(aurl).text
|
|
soup = b(html, 'lxml') # NEW+USED SOUP OBJECT
|
|
temp_prices_nau = soup.find_all("span", {"class": "p--t--1 fw--bold"})
|
|
temp_prices_nau = [float(thing.text.strip(' $\n')) for thing in temp_prices_nau]
|
|
nau_prices.extend(temp_prices_nau)
|
|
pg += 1
|
|
if len(re.findall(r'display\:\;', html))>0:
|
|
break
|
|
|
|
pg = 1
|
|
while True:
|
|
burl_1 = url_1 + str(pg)
|
|
html1 = requests.get(burl_1).text
|
|
soup1 = b(html1,'lxml') # NEW SOUP OBJECT
|
|
temp_prices_new = soup1.find_all("span", {"class": "p--t--1 fw--bold"})
|
|
temp_prices_new = [float(thing.text.strip(' $\n')) for thing in temp_prices_new]
|
|
new_prices.extend(temp_prices_new)
|
|
pg += 1
|
|
if len(re.findall(r'display\:\;', html1))>0:
|
|
break
|
|
if len(new_prices)>len(nau_prices):
|
|
new_prices = []
|
|
break
|
|
|
|
return nau_prices, new_prices
|
|
|
|
# Begin Element-wise substraction of new condition items price list from new&used items price list:
|
|
def avgs(nau_prices, new_prices):
|
|
for price in new_prices:
|
|
try:
|
|
nau_prices.remove(price)
|
|
except ValueError:
|
|
break
|
|
used_prices = nau_prices
|
|
|
|
if len(new_prices)>0:
|
|
avg_new = np.mean(new_prices)
|
|
avg_used = np.mean(used_prices)
|
|
else:
|
|
avg_new = 0
|
|
avg_used = np.mean(used_prices)
|
|
|
|
return avg_used, avg_new, used_prices
|
|
|
|
def display_results(nau_prices, new_prices, used_prices, avg_new, avg_used, nau_url, new_url):
|
|
|
|
used_results = '\n{} total results used\nAverage used price = ${}'.format(len(used_prices),avg_used)
|
|
nau_link = 'URL new and used: {}\n'.format(nau_url+'1')
|
|
|
|
new_results = '\n{} total results new\nAverage new price = ${}'.format(len(new_prices),avg_new)
|
|
new_link = 'URL new: {}\n'.format(new_url+'1')
|
|
|
|
total_results = '{} Total results new and used'.format(nau_prices)
|
|
|
|
print(used_results)
|
|
print(nau_link)
|
|
print(new_results)
|
|
print(new_link)
|
|
print(total_results)
|
|
|
|
def main():
|
|
search_query = input("\nSearch Title: ")
|
|
urls = url_build(search_query)
|
|
nau_url = urls[0]
|
|
new_url = urls[1]
|
|
prices = get_sold(nau_url, new_url)
|
|
nau_prices = prices[0]
|
|
new_prices = prices[1]
|
|
averages = avgs(nau_prices, new_prices)
|
|
avg_used = averages[0]
|
|
avg_new = averages[1]
|
|
used_prices = averages[2]
|
|
nau_prices = len(new_prices)+len(used_prices)
|
|
display_results(nau_prices, new_prices, used_prices, avg_new, avg_used, nau_url, new_url)
|
|
|
|
if __name__=='__main__':
|
|
main()
|
|
|