poshmark/pms.py
2022-08-02 19:38:50 -07:00

129 lines
4.3 KiB
Python

import re
import numpy as np
import requests
import bs4
from bs4 import BeautifulSoup as b
# Initial Search Query URL to Find ItemCondition Code
def url_build(search_query):
SQ = search_query
SQ_1 = SQ.replace(' ', '+')
#&color[]=color
posh_colors = [
'Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple',
'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan'
]
for i in range(0,len(posh_colors)):
if posh_colors[i] in SQ:
url_color = '&color[]=' + posh_colors[i]
url_separator = "&availability=sold_out" + url_color + "&department=All&max_id="
url_separator1 = "&availability=sold_out" + url_color + "&condition=nwt_and_ret&department=All&max_id="
SQ_1 = SQ_1.replace(posh_colors[i], '')
break
else:
url_separator = "&availability=sold_out&department=All&max_id="
url_separator1 = "&availability=sold_out&condition=nwt_and_ret&department=All&max_id="
base_url = "https://poshmark.com/search?query="
url = base_url + SQ_1 + url_separator
url_1 = base_url + SQ_1 + url_separator1
return url, url_1, SQ
def get_sold(url, url_1):
#TODO Need to add option to avoid irrelevant results using regex and search query
# keywords. First search the titles for every listing to see if they contain all or
# some of the keywords present in the search query. If they don't then just break
# the whole program and say no results
nau_prices = [] # NEW AND USED PRICES
new_prices = []
pg = 1
while True:
aurl = url + str(pg)
html = requests.get(aurl).text
soup = b(html, 'lxml') # NEW+USED SOUP OBJECT
temp_prices_nau = soup.find_all("span", {"class": "p--t--1 fw--bold"})
temp_prices_nau = [float(thing.text.strip(' $\n')) for thing in temp_prices_nau]
nau_prices.extend(temp_prices_nau)
pg += 1
if len(re.findall(r'display\:\;', html))>0:
break
pg = 1
while True:
burl_1 = url_1 + str(pg)
html1 = requests.get(burl_1).text
soup1 = b(html1,'lxml') # NEW SOUP OBJECT
temp_prices_new = soup1.find_all("span", {"class": "p--t--1 fw--bold"})
temp_prices_new = [float(thing.text.strip(' $\n')) for thing in temp_prices_new]
new_prices.extend(temp_prices_new)
pg += 1
if len(re.findall(r'display\:\;', html1))>0:
break
if len(new_prices)>len(nau_prices):
new_prices = []
break
return nau_prices, new_prices
# Begin Element-wise substraction of new condition items price list from new&used items price list:
def avgs(nau_prices, new_prices):
for price in new_prices:
try:
nau_prices.remove(price)
except ValueError:
break
used_prices = nau_prices
if len(new_prices)>0:
avg_new = np.mean(new_prices)
avg_used = np.mean(used_prices)
else:
avg_new = 0
avg_used = np.mean(used_prices)
return avg_used, avg_new, used_prices
def display_results(nau_prices, new_prices, used_prices, avg_new, avg_used, nau_url, new_url):
used_results = '\n{} total results used\nAverage used price = ${}'.format(len(used_prices),avg_used)
nau_link = 'URL new and used: {}\n'.format(nau_url+'1')
new_results = '\n{} total results new\nAverage new price = ${}'.format(len(new_prices),avg_new)
new_link = 'URL new: {}\n'.format(new_url+'1')
total_results = '{} Total results new and used'.format(nau_prices)
print(used_results)
print(nau_link)
print(new_results)
print(new_link)
print(total_results)
def main():
search_query = input("\nSearch Title: ")
urls = url_build(search_query)
nau_url = urls[0]
new_url = urls[1]
prices = get_sold(nau_url, new_url)
nau_prices = prices[0]
new_prices = prices[1]
averages = avgs(nau_prices, new_prices)
avg_used = averages[0]
avg_new = averages[1]
used_prices = averages[2]
nau_prices = len(new_prices)+len(used_prices)
display_results(nau_prices, new_prices, used_prices, avg_new, avg_used, nau_url, new_url)
if __name__=='__main__':
main()