replaced pms.py with newer version
This commit is contained in:
		
							
								
								
									
										234
									
								
								pms.py
									
									
									
									
									
								
							
							
						
						
									
										234
									
								
								pms.py
									
									
									
									
									
								
							@@ -1,172 +1,128 @@
 | 
			
		||||
import seaborn as sns
 | 
			
		||||
import numpy as np
 | 
			
		||||
import matplotlib.pyplot as plt
 | 
			
		||||
import re
 | 
			
		||||
# import bs4
 | 
			
		||||
from bs4 import BeautifulSoup as b
 | 
			
		||||
import numpy as np
 | 
			
		||||
import requests
 | 
			
		||||
import time
 | 
			
		||||
import bs4
 | 
			
		||||
from bs4 import BeautifulSoup as b
 | 
			
		||||
 | 
			
		||||
# Initial Search Query URL to Find ItemCondition Code
 | 
			
		||||
while True:
 | 
			
		||||
    print("Title Search:")
 | 
			
		||||
 | 
			
		||||
    start = time.time()
 | 
			
		||||
def url_build(search_query):
 | 
			
		||||
 | 
			
		||||
    SQ = str(input())
 | 
			
		||||
    SQ_1 = SQ.replace(' ', '+').replace('NEW',
 | 
			
		||||
                                        '').replace('men', '').replace('women', '')
 | 
			
		||||
    gender = ['Men', 'Women']
 | 
			
		||||
    SQ = search_query
 | 
			
		||||
    SQ_1 = SQ.replace(' ', '+')
 | 
			
		||||
 | 
			
		||||
    #&color[]=color
 | 
			
		||||
    posh_colors = ['Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple',
 | 
			
		||||
                   'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan']
 | 
			
		||||
    posh_colors = [
 | 
			
		||||
            'Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple',
 | 
			
		||||
            'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan'
 | 
			
		||||
            ]
 | 
			
		||||
 | 
			
		||||
    for i in range(0,len(posh_colors)):
 | 
			
		||||
        if posh_colors[i] in SQ:
 | 
			
		||||
            url_color = '&color[]=' + posh_colors[i]
 | 
			
		||||
            url_separator = "&availability=sold_out" + url_color + "&department=All&max_id="
 | 
			
		||||
            url_separator1 = "&availability=sold_out" + url_color + \
 | 
			
		||||
                "&condition=nwt_and_ret&department=All&max_id="
 | 
			
		||||
            url_separator1 = "&availability=sold_out" + url_color + "&condition=nwt_and_ret&department=All&max_id="
 | 
			
		||||
            SQ_1 = SQ_1.replace(posh_colors[i], '')
 | 
			
		||||
            break
 | 
			
		||||
 | 
			
		||||
        else:
 | 
			
		||||
            url_separator = "&availability=sold_out&department=All&max_id="
 | 
			
		||||
            url_separator1 = "&availability=sold_out&condition=nwt_and_ret&department=All&max_id="
 | 
			
		||||
 | 
			
		||||
    prices = []
 | 
			
		||||
    prices1 = []
 | 
			
		||||
    base_url = "https://poshmark.com/search?query="
 | 
			
		||||
 | 
			
		||||
    url = base_url + SQ_1 + url_separator
 | 
			
		||||
    url_1 = base_url + SQ_1 + url_separator1
 | 
			
		||||
 | 
			
		||||
    return url, url_1, SQ
 | 
			
		||||
 | 
			
		||||
def get_sold(url, url_1):
 | 
			
		||||
#TODO Need to add option to avoid irrelevant results using regex and search query
 | 
			
		||||
# keywords. First search the titles for every listing to see if they contain all or
 | 
			
		||||
# some of the keywords present in the search query. If they don't then just break
 | 
			
		||||
# the whole program and say no results
 | 
			
		||||
 | 
			
		||||
    nau_prices = [] # NEW AND USED PRICES
 | 
			
		||||
    new_prices = []
 | 
			
		||||
 | 
			
		||||
    pg = 1
 | 
			
		||||
    url = base_url + SQ_1 + url_separator + str(pg)
 | 
			
		||||
    url_1 = base_url + SQ_1 + url_separator1 + str(pg)
 | 
			
		||||
    url_a = base_url + SQ_1 + url_separator + str(pg)
 | 
			
		||||
    url_1b = base_url + SQ_1 + url_separator1 + str(pg)
 | 
			
		||||
 | 
			
		||||
    html = requests.get(url).text
 | 
			
		||||
    html1 = requests.get(url_1).text
 | 
			
		||||
    soup = b(html, "lxml")
 | 
			
		||||
    soup1 = b(html1, 'lxml')
 | 
			
		||||
 | 
			
		||||
    # Begin new and used condition items price list:
 | 
			
		||||
 | 
			
		||||
    for listing in soup.findAll("div", {"class": "item__details"}):
 | 
			
		||||
        price = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text
 | 
			
		||||
        indices = price.find('$')
 | 
			
		||||
        price = price[indices + 1:]
 | 
			
		||||
        space = price.find(' ')
 | 
			
		||||
        price = int(price[:space - 1])
 | 
			
		||||
        prices.append(price)
 | 
			
		||||
 | 
			
		||||
    while True:
 | 
			
		||||
        last_page = soup.find_all(string=re.compile('No Listings Found'))
 | 
			
		||||
        if last_page:
 | 
			
		||||
        aurl = url + str(pg)
 | 
			
		||||
        html = requests.get(aurl).text
 | 
			
		||||
        soup = b(html, 'lxml') # NEW+USED SOUP OBJECT
 | 
			
		||||
        temp_prices_nau = soup.find_all("span", {"class": "p--t--1 fw--bold"})
 | 
			
		||||
        temp_prices_nau = [float(thing.text.strip(' $\n')) for thing in temp_prices_nau]
 | 
			
		||||
        nau_prices.extend(temp_prices_nau)
 | 
			
		||||
        pg += 1
 | 
			
		||||
        if len(re.findall(r'display\:\;', html))>0:
 | 
			
		||||
            break
 | 
			
		||||
        pg = pg + 1
 | 
			
		||||
        url = base_url + SQ_1 + url_separator + str(pg)
 | 
			
		||||
        html = requests.get(url).text
 | 
			
		||||
        soup = b(html, "lxml")
 | 
			
		||||
 | 
			
		||||
        for listing in soup.findAll("div", {"class": "item__details"}):
 | 
			
		||||
            price = listing.findAll(
 | 
			
		||||
                "span", {"class": "p--t--1 fw--bold"})[0].text
 | 
			
		||||
            # indices = [i for i, dollars in enumerate(price) if dollars == '$']
 | 
			
		||||
            # price = int(price[1:indices[1]-1])
 | 
			
		||||
            indices = price.find('$')
 | 
			
		||||
            price = price[indices + 1:]
 | 
			
		||||
            space = price.find(' ')
 | 
			
		||||
            price = int(price[:space - 1])
 | 
			
		||||
            prices.append(price)
 | 
			
		||||
 | 
			
		||||
    # Begin new condition item prices list:
 | 
			
		||||
 | 
			
		||||
    for listing in soup1.findAll("div", {"class": "item__details"}):
 | 
			
		||||
        price1 = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text
 | 
			
		||||
        # indices = [i for i, dollars in enumerate(price1) if dollars == '$']
 | 
			
		||||
        # price1 = int(price1[1:indices[1]-1])
 | 
			
		||||
        indices = price1.find('$')
 | 
			
		||||
        price1 = price1[indices + 1:]
 | 
			
		||||
        space = price1.find(' ')
 | 
			
		||||
        price1 = int(price1[:space - 1])
 | 
			
		||||
        prices1.append(price1)
 | 
			
		||||
 | 
			
		||||
    pg = 1
 | 
			
		||||
    while True:
 | 
			
		||||
 | 
			
		||||
        last_page = soup1.find_all(string=re.compile('No Listings Found')) # problem is likely here... condition not in page
 | 
			
		||||
        if last_page:
 | 
			
		||||
        burl_1 = url_1 + str(pg)
 | 
			
		||||
        html1 = requests.get(burl_1).text
 | 
			
		||||
        soup1 = b(html1,'lxml') # NEW SOUP OBJECT
 | 
			
		||||
        temp_prices_new = soup1.find_all("span", {"class": "p--t--1 fw--bold"})
 | 
			
		||||
        temp_prices_new = [float(thing.text.strip(' $\n')) for thing in temp_prices_new]
 | 
			
		||||
        new_prices.extend(temp_prices_new)
 | 
			
		||||
        pg += 1
 | 
			
		||||
        if len(re.findall(r'display\:\;', html1))>0:
 | 
			
		||||
            break
 | 
			
		||||
        if len(new_prices)>len(nau_prices):
 | 
			
		||||
            new_prices = []
 | 
			
		||||
            break
 | 
			
		||||
        pg = pg + 1
 | 
			
		||||
        url_1 = base_url + SQ_1 + url_separator1 + str(pg)
 | 
			
		||||
        html1 = requests.get(url_1).text
 | 
			
		||||
        soup1 = b(html1, "lxml")
 | 
			
		||||
 | 
			
		||||
        for listing in soup1.findAll("div", {"class": "item__details"}):
 | 
			
		||||
            price1 = listing.findAll(
 | 
			
		||||
                "span", {"class": "p--t--1 fw--bold"})[0].text
 | 
			
		||||
            # indices = [i for i, dollars in enumerate(price1) if dollars == '$']
 | 
			
		||||
            # price1 = int(price1[1:indices[1]-1])
 | 
			
		||||
            indices = price1.find('$')
 | 
			
		||||
            price1 = price1[indices + 1:]
 | 
			
		||||
            space = price1.find(' ')
 | 
			
		||||
            price1 = int(price1[:space - 1])
 | 
			
		||||
            prices1.append(price1)
 | 
			
		||||
    return nau_prices, new_prices
 | 
			
		||||
 | 
			
		||||
# Begin Element-wise substraction of new condition items price list from new&used items price list:
 | 
			
		||||
    print(len(prices), 'NEW & USED')
 | 
			
		||||
    print(len(prices1), 'NEW')
 | 
			
		||||
 | 
			
		||||
    end = time.time()
 | 
			
		||||
 | 
			
		||||
    print(end - start)
 | 
			
		||||
 | 
			
		||||
    for element in prices1:
 | 
			
		||||
        prices.remove(element)
 | 
			
		||||
 | 
			
		||||
    if 'NEW' in SQ:
 | 
			
		||||
        kde_datapoints = sns.kdeplot(prices1, shade=True).get_lines()[
 | 
			
		||||
            0].get_data()
 | 
			
		||||
        sns.rugplot(prices1)
 | 
			
		||||
        print(str(len(prices1)) + " Results" + "\n")
 | 
			
		||||
        print("Average Price Sold New = $" + str(np.mean(prices1)) + "\n")
 | 
			
		||||
        total_price = np.mean(prices1) + 6.79
 | 
			
		||||
        print("Average Total Price New = $" + str(total_price) + "\n")
 | 
			
		||||
        print("Flat Rate Shipping = $6.79" + "\n")
 | 
			
		||||
 | 
			
		||||
        kde_x = kde_datapoints[0]
 | 
			
		||||
        kde_y = kde_datapoints[1]
 | 
			
		||||
        optimal_price = kde_x[np.argmax(kde_y)]
 | 
			
		||||
        print("Optimal Price New = $" + str(optimal_price) + "\n")
 | 
			
		||||
        print("Optimal Price Including Shipping New = $" +
 | 
			
		||||
              str(optimal_price + 6.79) + "\n")
 | 
			
		||||
        print("URL Link (New): " + url_1b + "\n")
 | 
			
		||||
        plt.ylabel('KDE')
 | 
			
		||||
        plt.xlabel('Price ($)')
 | 
			
		||||
        plt.show()
 | 
			
		||||
    else:
 | 
			
		||||
def avgs(nau_prices, new_prices):
 | 
			
		||||
    for price in new_prices:
 | 
			
		||||
        try:
 | 
			
		||||
            nau_prices.remove(price)
 | 
			
		||||
        except ValueError:
 | 
			
		||||
            break
 | 
			
		||||
    used_prices = nau_prices
 | 
			
		||||
 | 
			
		||||
            kde_datapoints = sns.kdeplot(prices, shade=True).get_lines()[
 | 
			
		||||
                0].get_data()
 | 
			
		||||
            sns.rugplot(prices)
 | 
			
		||||
            print(str(len(prices)) + " Results" + "\n")
 | 
			
		||||
            print("Average Price Sold Used = $" + str(np.mean(prices)) + "\n")
 | 
			
		||||
            total_price = np.mean(prices) + 6.79
 | 
			
		||||
            print("Average Total Price Used = $" + str(total_price) + "\n")
 | 
			
		||||
            print("Flat Rate Shipping = $6.79" + "\n")
 | 
			
		||||
            import winsound
 | 
			
		||||
            winsound.Beep(440, 300)
 | 
			
		||||
    if len(new_prices)>0:
 | 
			
		||||
        avg_new = np.mean(new_prices)
 | 
			
		||||
        avg_used = np.mean(used_prices)
 | 
			
		||||
    else:
 | 
			
		||||
        avg_new = 0
 | 
			
		||||
        avg_used = np.mean(used_prices)
 | 
			
		||||
 | 
			
		||||
    return avg_used, avg_new, used_prices
 | 
			
		||||
 | 
			
		||||
def display_results(nau_prices, new_prices, used_prices, avg_new, avg_used, nau_url, new_url):
 | 
			
		||||
    
 | 
			
		||||
    used_results = '\n{} total results used\nAverage used price = ${}'.format(len(used_prices),avg_used)
 | 
			
		||||
    nau_link = 'URL new and used: {}\n'.format(nau_url+'1')
 | 
			
		||||
 | 
			
		||||
    new_results = '\n{} total results new\nAverage new price = ${}'.format(len(new_prices),avg_new)
 | 
			
		||||
    new_link = 'URL new: {}\n'.format(new_url+'1')
 | 
			
		||||
 | 
			
		||||
    total_results = '{} Total results new and used'.format(nau_prices)
 | 
			
		||||
 | 
			
		||||
    print(used_results)
 | 
			
		||||
    print(nau_link)
 | 
			
		||||
    print(new_results)
 | 
			
		||||
    print(new_link)
 | 
			
		||||
    print(total_results)
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
    search_query = input("\nSearch Title: ")
 | 
			
		||||
    urls = url_build(search_query)
 | 
			
		||||
    nau_url = urls[0]
 | 
			
		||||
    new_url = urls[1]
 | 
			
		||||
    prices = get_sold(nau_url, new_url)
 | 
			
		||||
    nau_prices = prices[0]
 | 
			
		||||
    new_prices = prices[1]
 | 
			
		||||
    averages = avgs(nau_prices, new_prices)
 | 
			
		||||
    avg_used = averages[0]
 | 
			
		||||
    avg_new = averages[1]
 | 
			
		||||
    used_prices = averages[2]
 | 
			
		||||
    nau_prices = len(new_prices)+len(used_prices)
 | 
			
		||||
    display_results(nau_prices, new_prices, used_prices, avg_new, avg_used, nau_url, new_url)
 | 
			
		||||
 | 
			
		||||
if  __name__=='__main__':
 | 
			
		||||
    main()
 | 
			
		||||
 | 
			
		||||
            kde_x = kde_datapoints[0]
 | 
			
		||||
            kde_y = kde_datapoints[1]
 | 
			
		||||
            optimal_price = kde_x[np.argmax(kde_y)]
 | 
			
		||||
            print("Optimal Price Used = $" + str(optimal_price) + "\n")
 | 
			
		||||
            print("Optimal Price Including Shipping Used = $" +
 | 
			
		||||
                  str(optimal_price + 6.79) + "\n")
 | 
			
		||||
            print("URL Link: " + url_a + "\n")
 | 
			
		||||
            plt.ylabel('KDE')
 | 
			
		||||
            plt.xlabel('Price ($)')
 | 
			
		||||
            plt.show()
 | 
			
		||||
        except IndexError:
 | 
			
		||||
            print('\n' + '0 results' + '\n')
 | 
			
		||||
            pass
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user