diff --git a/pms.py b/pms.py index f31e042..637410d 100644 --- a/pms.py +++ b/pms.py @@ -1,172 +1,128 @@ -import seaborn as sns -import numpy as np -import matplotlib.pyplot as plt import re -# import bs4 -from bs4 import BeautifulSoup as b +import numpy as np import requests -import time +import bs4 +from bs4 import BeautifulSoup as b # Initial Search Query URL to Find ItemCondition Code -while True: - print("Title Search:") - start = time.time() +def url_build(search_query): - SQ = str(input()) - SQ_1 = SQ.replace(' ', '+').replace('NEW', - '').replace('men', '').replace('women', '') - gender = ['Men', 'Women'] + SQ = search_query + SQ_1 = SQ.replace(' ', '+') - # &color[]=color - posh_colors = ['Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple', - 'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan'] + #&color[]=color + posh_colors = [ + 'Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple', + 'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan' + ] - for i in range(0, len(posh_colors)): + for i in range(0,len(posh_colors)): if posh_colors[i] in SQ: url_color = '&color[]=' + posh_colors[i] url_separator = "&availability=sold_out" + url_color + "&department=All&max_id=" - url_separator1 = "&availability=sold_out" + url_color + \ - "&condition=nwt_and_ret&department=All&max_id=" + url_separator1 = "&availability=sold_out" + url_color + "&condition=nwt_and_ret&department=All&max_id=" SQ_1 = SQ_1.replace(posh_colors[i], '') break + else: url_separator = "&availability=sold_out&department=All&max_id=" url_separator1 = "&availability=sold_out&condition=nwt_and_ret&department=All&max_id=" - prices = [] - prices1 = [] base_url = "https://poshmark.com/search?query=" + url = base_url + SQ_1 + url_separator + url_1 = base_url + SQ_1 + url_separator1 + + return url, url_1, SQ + +def get_sold(url, url_1): +#TODO Need to add option to avoid irrelevant results using regex and search query +# keywords. First search the titles for every listing to see if they contain all or +# some of the keywords present in the search query. If they don't then just break +# the whole program and say no results + + nau_prices = [] # NEW AND USED PRICES + new_prices = [] + pg = 1 - url = base_url + SQ_1 + url_separator + str(pg) - url_1 = base_url + SQ_1 + url_separator1 + str(pg) - url_a = base_url + SQ_1 + url_separator + str(pg) - url_1b = base_url + SQ_1 + url_separator1 + str(pg) - - html = requests.get(url).text - html1 = requests.get(url_1).text - soup = b(html, "lxml") - soup1 = b(html1, 'lxml') - - # Begin new and used condition items price list: - - for listing in soup.findAll("div", {"class": "item__details"}): - price = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text - indices = price.find('$') - price = price[indices + 1:] - space = price.find(' ') - price = int(price[:space - 1]) - prices.append(price) - while True: - last_page = soup.find_all(string=re.compile('No Listings Found')) - if last_page: + aurl = url + str(pg) + html = requests.get(aurl).text + soup = b(html, 'lxml') # NEW+USED SOUP OBJECT + temp_prices_nau = soup.find_all("span", {"class": "p--t--1 fw--bold"}) + temp_prices_nau = [float(thing.text.strip(' $\n')) for thing in temp_prices_nau] + nau_prices.extend(temp_prices_nau) + pg += 1 + if len(re.findall(r'display\:\;', html))>0: break - pg = pg + 1 - url = base_url + SQ_1 + url_separator + str(pg) - html = requests.get(url).text - soup = b(html, "lxml") - - for listing in soup.findAll("div", {"class": "item__details"}): - price = listing.findAll( - "span", {"class": "p--t--1 fw--bold"})[0].text - # indices = [i for i, dollars in enumerate(price) if dollars == '$'] - # price = int(price[1:indices[1]-1]) - indices = price.find('$') - price = price[indices + 1:] - space = price.find(' ') - price = int(price[:space - 1]) - prices.append(price) - - # Begin new condition item prices list: - - for listing in soup1.findAll("div", {"class": "item__details"}): - price1 = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text - # indices = [i for i, dollars in enumerate(price1) if dollars == '$'] - # price1 = int(price1[1:indices[1]-1]) - indices = price1.find('$') - price1 = price1[indices + 1:] - space = price1.find(' ') - price1 = int(price1[:space - 1]) - prices1.append(price1) + pg = 1 while True: - - last_page = soup1.find_all(string=re.compile('No Listings Found')) # problem is likely here... condition not in page - if last_page: + burl_1 = url_1 + str(pg) + html1 = requests.get(burl_1).text + soup1 = b(html1,'lxml') # NEW SOUP OBJECT + temp_prices_new = soup1.find_all("span", {"class": "p--t--1 fw--bold"}) + temp_prices_new = [float(thing.text.strip(' $\n')) for thing in temp_prices_new] + new_prices.extend(temp_prices_new) + pg += 1 + if len(re.findall(r'display\:\;', html1))>0: + break + if len(new_prices)>len(nau_prices): + new_prices = [] break - pg = pg + 1 - url_1 = base_url + SQ_1 + url_separator1 + str(pg) - html1 = requests.get(url_1).text - soup1 = b(html1, "lxml") - for listing in soup1.findAll("div", {"class": "item__details"}): - price1 = listing.findAll( - "span", {"class": "p--t--1 fw--bold"})[0].text - # indices = [i for i, dollars in enumerate(price1) if dollars == '$'] - # price1 = int(price1[1:indices[1]-1]) - indices = price1.find('$') - price1 = price1[indices + 1:] - space = price1.find(' ') - price1 = int(price1[:space - 1]) - prices1.append(price1) + return nau_prices, new_prices - # Begin Element-wise substraction of new condition items price list from new&used items price list: - print(len(prices), 'NEW & USED') - print(len(prices1), 'NEW') - - end = time.time() - - print(end - start) - - for element in prices1: - prices.remove(element) - - if 'NEW' in SQ: - kde_datapoints = sns.kdeplot(prices1, shade=True).get_lines()[ - 0].get_data() - sns.rugplot(prices1) - print(str(len(prices1)) + " Results" + "\n") - print("Average Price Sold New = $" + str(np.mean(prices1)) + "\n") - total_price = np.mean(prices1) + 6.79 - print("Average Total Price New = $" + str(total_price) + "\n") - print("Flat Rate Shipping = $6.79" + "\n") - - kde_x = kde_datapoints[0] - kde_y = kde_datapoints[1] - optimal_price = kde_x[np.argmax(kde_y)] - print("Optimal Price New = $" + str(optimal_price) + "\n") - print("Optimal Price Including Shipping New = $" + - str(optimal_price + 6.79) + "\n") - print("URL Link (New): " + url_1b + "\n") - plt.ylabel('KDE') - plt.xlabel('Price ($)') - plt.show() - else: +# Begin Element-wise substraction of new condition items price list from new&used items price list: +def avgs(nau_prices, new_prices): + for price in new_prices: try: + nau_prices.remove(price) + except ValueError: + break + used_prices = nau_prices - kde_datapoints = sns.kdeplot(prices, shade=True).get_lines()[ - 0].get_data() - sns.rugplot(prices) - print(str(len(prices)) + " Results" + "\n") - print("Average Price Sold Used = $" + str(np.mean(prices)) + "\n") - total_price = np.mean(prices) + 6.79 - print("Average Total Price Used = $" + str(total_price) + "\n") - print("Flat Rate Shipping = $6.79" + "\n") - import winsound - winsound.Beep(440, 300) + if len(new_prices)>0: + avg_new = np.mean(new_prices) + avg_used = np.mean(used_prices) + else: + avg_new = 0 + avg_used = np.mean(used_prices) + + return avg_used, avg_new, used_prices + +def display_results(nau_prices, new_prices, used_prices, avg_new, avg_used, nau_url, new_url): + + used_results = '\n{} total results used\nAverage used price = ${}'.format(len(used_prices),avg_used) + nau_link = 'URL new and used: {}\n'.format(nau_url+'1') + + new_results = '\n{} total results new\nAverage new price = ${}'.format(len(new_prices),avg_new) + new_link = 'URL new: {}\n'.format(new_url+'1') + + total_results = '{} Total results new and used'.format(nau_prices) + + print(used_results) + print(nau_link) + print(new_results) + print(new_link) + print(total_results) + +def main(): + search_query = input("\nSearch Title: ") + urls = url_build(search_query) + nau_url = urls[0] + new_url = urls[1] + prices = get_sold(nau_url, new_url) + nau_prices = prices[0] + new_prices = prices[1] + averages = avgs(nau_prices, new_prices) + avg_used = averages[0] + avg_new = averages[1] + used_prices = averages[2] + nau_prices = len(new_prices)+len(used_prices) + display_results(nau_prices, new_prices, used_prices, avg_new, avg_used, nau_url, new_url) + +if __name__=='__main__': + main() - kde_x = kde_datapoints[0] - kde_y = kde_datapoints[1] - optimal_price = kde_x[np.argmax(kde_y)] - print("Optimal Price Used = $" + str(optimal_price) + "\n") - print("Optimal Price Including Shipping Used = $" + - str(optimal_price + 6.79) + "\n") - print("URL Link: " + url_a + "\n") - plt.ylabel('KDE') - plt.xlabel('Price ($)') - plt.show() - except IndexError: - print('\n' + '0 results' + '\n') - pass