replaced pms.py with newer version
This commit is contained in:
		
							
								
								
									
										240
									
								
								pms.py
									
									
									
									
									
								
							
							
						
						
									
										240
									
								
								pms.py
									
									
									
									
									
								
							@@ -1,172 +1,128 @@
 | 
				
			|||||||
import seaborn as sns
 | 
					 | 
				
			||||||
import numpy as np
 | 
					 | 
				
			||||||
import matplotlib.pyplot as plt
 | 
					 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
# import bs4
 | 
					import numpy as np
 | 
				
			||||||
from bs4 import BeautifulSoup as b
 | 
					 | 
				
			||||||
import requests
 | 
					import requests
 | 
				
			||||||
import time
 | 
					import bs4
 | 
				
			||||||
 | 
					from bs4 import BeautifulSoup as b
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Initial Search Query URL to Find ItemCondition Code
 | 
					# Initial Search Query URL to Find ItemCondition Code
 | 
				
			||||||
while True:
 | 
					 | 
				
			||||||
    print("Title Search:")
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    start = time.time()
 | 
					def url_build(search_query):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    SQ = str(input())
 | 
					    SQ = search_query
 | 
				
			||||||
    SQ_1 = SQ.replace(' ', '+').replace('NEW',
 | 
					    SQ_1 = SQ.replace(' ', '+')
 | 
				
			||||||
                                        '').replace('men', '').replace('women', '')
 | 
					 | 
				
			||||||
    gender = ['Men', 'Women']
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # &color[]=color
 | 
					    #&color[]=color
 | 
				
			||||||
    posh_colors = ['Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple',
 | 
					    posh_colors = [
 | 
				
			||||||
                   'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan']
 | 
					            'Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple',
 | 
				
			||||||
 | 
					            'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan'
 | 
				
			||||||
 | 
					            ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for i in range(0, len(posh_colors)):
 | 
					    for i in range(0,len(posh_colors)):
 | 
				
			||||||
        if posh_colors[i] in SQ:
 | 
					        if posh_colors[i] in SQ:
 | 
				
			||||||
            url_color = '&color[]=' + posh_colors[i]
 | 
					            url_color = '&color[]=' + posh_colors[i]
 | 
				
			||||||
            url_separator = "&availability=sold_out" + url_color + "&department=All&max_id="
 | 
					            url_separator = "&availability=sold_out" + url_color + "&department=All&max_id="
 | 
				
			||||||
            url_separator1 = "&availability=sold_out" + url_color + \
 | 
					            url_separator1 = "&availability=sold_out" + url_color + "&condition=nwt_and_ret&department=All&max_id="
 | 
				
			||||||
                "&condition=nwt_and_ret&department=All&max_id="
 | 
					 | 
				
			||||||
            SQ_1 = SQ_1.replace(posh_colors[i], '')
 | 
					            SQ_1 = SQ_1.replace(posh_colors[i], '')
 | 
				
			||||||
            break
 | 
					            break
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            url_separator = "&availability=sold_out&department=All&max_id="
 | 
					            url_separator = "&availability=sold_out&department=All&max_id="
 | 
				
			||||||
            url_separator1 = "&availability=sold_out&condition=nwt_and_ret&department=All&max_id="
 | 
					            url_separator1 = "&availability=sold_out&condition=nwt_and_ret&department=All&max_id="
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    prices = []
 | 
					 | 
				
			||||||
    prices1 = []
 | 
					 | 
				
			||||||
    base_url = "https://poshmark.com/search?query="
 | 
					    base_url = "https://poshmark.com/search?query="
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    url = base_url + SQ_1 + url_separator
 | 
				
			||||||
 | 
					    url_1 = base_url + SQ_1 + url_separator1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return url, url_1, SQ
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_sold(url, url_1):
 | 
				
			||||||
 | 
					#TODO Need to add option to avoid irrelevant results using regex and search query
 | 
				
			||||||
 | 
					# keywords. First search the titles for every listing to see if they contain all or
 | 
				
			||||||
 | 
					# some of the keywords present in the search query. If they don't then just break
 | 
				
			||||||
 | 
					# the whole program and say no results
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    nau_prices = [] # NEW AND USED PRICES
 | 
				
			||||||
 | 
					    new_prices = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    pg = 1
 | 
					    pg = 1
 | 
				
			||||||
    url = base_url + SQ_1 + url_separator + str(pg)
 | 
					 | 
				
			||||||
    url_1 = base_url + SQ_1 + url_separator1 + str(pg)
 | 
					 | 
				
			||||||
    url_a = base_url + SQ_1 + url_separator + str(pg)
 | 
					 | 
				
			||||||
    url_1b = base_url + SQ_1 + url_separator1 + str(pg)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    html = requests.get(url).text
 | 
					 | 
				
			||||||
    html1 = requests.get(url_1).text
 | 
					 | 
				
			||||||
    soup = b(html, "lxml")
 | 
					 | 
				
			||||||
    soup1 = b(html1, 'lxml')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # Begin new and used condition items price list:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for listing in soup.findAll("div", {"class": "item__details"}):
 | 
					 | 
				
			||||||
        price = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text
 | 
					 | 
				
			||||||
        indices = price.find('$')
 | 
					 | 
				
			||||||
        price = price[indices + 1:]
 | 
					 | 
				
			||||||
        space = price.find(' ')
 | 
					 | 
				
			||||||
        price = int(price[:space - 1])
 | 
					 | 
				
			||||||
        prices.append(price)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    while True:
 | 
					    while True:
 | 
				
			||||||
        last_page = soup.find_all(string=re.compile('No Listings Found'))
 | 
					        aurl = url + str(pg)
 | 
				
			||||||
        if last_page:
 | 
					        html = requests.get(aurl).text
 | 
				
			||||||
 | 
					        soup = b(html, 'lxml') # NEW+USED SOUP OBJECT
 | 
				
			||||||
 | 
					        temp_prices_nau = soup.find_all("span", {"class": "p--t--1 fw--bold"})
 | 
				
			||||||
 | 
					        temp_prices_nau = [float(thing.text.strip(' $\n')) for thing in temp_prices_nau]
 | 
				
			||||||
 | 
					        nau_prices.extend(temp_prices_nau)
 | 
				
			||||||
 | 
					        pg += 1
 | 
				
			||||||
 | 
					        if len(re.findall(r'display\:\;', html))>0:
 | 
				
			||||||
            break
 | 
					            break
 | 
				
			||||||
        pg = pg + 1
 | 
					 | 
				
			||||||
        url = base_url + SQ_1 + url_separator + str(pg)
 | 
					 | 
				
			||||||
        html = requests.get(url).text
 | 
					 | 
				
			||||||
        soup = b(html, "lxml")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        for listing in soup.findAll("div", {"class": "item__details"}):
 | 
					 | 
				
			||||||
            price = listing.findAll(
 | 
					 | 
				
			||||||
                "span", {"class": "p--t--1 fw--bold"})[0].text
 | 
					 | 
				
			||||||
            # indices = [i for i, dollars in enumerate(price) if dollars == '$']
 | 
					 | 
				
			||||||
            # price = int(price[1:indices[1]-1])
 | 
					 | 
				
			||||||
            indices = price.find('$')
 | 
					 | 
				
			||||||
            price = price[indices + 1:]
 | 
					 | 
				
			||||||
            space = price.find(' ')
 | 
					 | 
				
			||||||
            price = int(price[:space - 1])
 | 
					 | 
				
			||||||
            prices.append(price)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # Begin new condition item prices list:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for listing in soup1.findAll("div", {"class": "item__details"}):
 | 
					 | 
				
			||||||
        price1 = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text
 | 
					 | 
				
			||||||
        # indices = [i for i, dollars in enumerate(price1) if dollars == '$']
 | 
					 | 
				
			||||||
        # price1 = int(price1[1:indices[1]-1])
 | 
					 | 
				
			||||||
        indices = price1.find('$')
 | 
					 | 
				
			||||||
        price1 = price1[indices + 1:]
 | 
					 | 
				
			||||||
        space = price1.find(' ')
 | 
					 | 
				
			||||||
        price1 = int(price1[:space - 1])
 | 
					 | 
				
			||||||
        prices1.append(price1)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pg = 1
 | 
				
			||||||
    while True:
 | 
					    while True:
 | 
				
			||||||
 | 
					        burl_1 = url_1 + str(pg)
 | 
				
			||||||
        last_page = soup1.find_all(string=re.compile('No Listings Found')) # problem is likely here... condition not in page
 | 
					        html1 = requests.get(burl_1).text
 | 
				
			||||||
        if last_page:
 | 
					        soup1 = b(html1,'lxml') # NEW SOUP OBJECT
 | 
				
			||||||
 | 
					        temp_prices_new = soup1.find_all("span", {"class": "p--t--1 fw--bold"})
 | 
				
			||||||
 | 
					        temp_prices_new = [float(thing.text.strip(' $\n')) for thing in temp_prices_new]
 | 
				
			||||||
 | 
					        new_prices.extend(temp_prices_new)
 | 
				
			||||||
 | 
					        pg += 1
 | 
				
			||||||
 | 
					        if len(re.findall(r'display\:\;', html1))>0:
 | 
				
			||||||
 | 
					            break
 | 
				
			||||||
 | 
					        if len(new_prices)>len(nau_prices):
 | 
				
			||||||
 | 
					            new_prices = []
 | 
				
			||||||
            break
 | 
					            break
 | 
				
			||||||
        pg = pg + 1
 | 
					 | 
				
			||||||
        url_1 = base_url + SQ_1 + url_separator1 + str(pg)
 | 
					 | 
				
			||||||
        html1 = requests.get(url_1).text
 | 
					 | 
				
			||||||
        soup1 = b(html1, "lxml")
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for listing in soup1.findAll("div", {"class": "item__details"}):
 | 
					    return nau_prices, new_prices
 | 
				
			||||||
            price1 = listing.findAll(
 | 
					 | 
				
			||||||
                "span", {"class": "p--t--1 fw--bold"})[0].text
 | 
					 | 
				
			||||||
            # indices = [i for i, dollars in enumerate(price1) if dollars == '$']
 | 
					 | 
				
			||||||
            # price1 = int(price1[1:indices[1]-1])
 | 
					 | 
				
			||||||
            indices = price1.find('$')
 | 
					 | 
				
			||||||
            price1 = price1[indices + 1:]
 | 
					 | 
				
			||||||
            space = price1.find(' ')
 | 
					 | 
				
			||||||
            price1 = int(price1[:space - 1])
 | 
					 | 
				
			||||||
            prices1.append(price1)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Begin Element-wise substraction of new condition items price list from new&used items price list:
 | 
					# Begin Element-wise substraction of new condition items price list from new&used items price list:
 | 
				
			||||||
    print(len(prices), 'NEW & USED')
 | 
					def avgs(nau_prices, new_prices):
 | 
				
			||||||
    print(len(prices1), 'NEW')
 | 
					    for price in new_prices:
 | 
				
			||||||
 | 
					 | 
				
			||||||
    end = time.time()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    print(end - start)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for element in prices1:
 | 
					 | 
				
			||||||
        prices.remove(element)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if 'NEW' in SQ:
 | 
					 | 
				
			||||||
        kde_datapoints = sns.kdeplot(prices1, shade=True).get_lines()[
 | 
					 | 
				
			||||||
            0].get_data()
 | 
					 | 
				
			||||||
        sns.rugplot(prices1)
 | 
					 | 
				
			||||||
        print(str(len(prices1)) + " Results" + "\n")
 | 
					 | 
				
			||||||
        print("Average Price Sold New = $" + str(np.mean(prices1)) + "\n")
 | 
					 | 
				
			||||||
        total_price = np.mean(prices1) + 6.79
 | 
					 | 
				
			||||||
        print("Average Total Price New = $" + str(total_price) + "\n")
 | 
					 | 
				
			||||||
        print("Flat Rate Shipping = $6.79" + "\n")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        kde_x = kde_datapoints[0]
 | 
					 | 
				
			||||||
        kde_y = kde_datapoints[1]
 | 
					 | 
				
			||||||
        optimal_price = kde_x[np.argmax(kde_y)]
 | 
					 | 
				
			||||||
        print("Optimal Price New = $" + str(optimal_price) + "\n")
 | 
					 | 
				
			||||||
        print("Optimal Price Including Shipping New = $" +
 | 
					 | 
				
			||||||
              str(optimal_price + 6.79) + "\n")
 | 
					 | 
				
			||||||
        print("URL Link (New): " + url_1b + "\n")
 | 
					 | 
				
			||||||
        plt.ylabel('KDE')
 | 
					 | 
				
			||||||
        plt.xlabel('Price ($)')
 | 
					 | 
				
			||||||
        plt.show()
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
 | 
					            nau_prices.remove(price)
 | 
				
			||||||
 | 
					        except ValueError:
 | 
				
			||||||
 | 
					            break
 | 
				
			||||||
 | 
					    used_prices = nau_prices
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            kde_datapoints = sns.kdeplot(prices, shade=True).get_lines()[
 | 
					    if len(new_prices)>0:
 | 
				
			||||||
                0].get_data()
 | 
					        avg_new = np.mean(new_prices)
 | 
				
			||||||
            sns.rugplot(prices)
 | 
					        avg_used = np.mean(used_prices)
 | 
				
			||||||
            print(str(len(prices)) + " Results" + "\n")
 | 
					    else:
 | 
				
			||||||
            print("Average Price Sold Used = $" + str(np.mean(prices)) + "\n")
 | 
					        avg_new = 0
 | 
				
			||||||
            total_price = np.mean(prices) + 6.79
 | 
					        avg_used = np.mean(used_prices)
 | 
				
			||||||
            print("Average Total Price Used = $" + str(total_price) + "\n")
 | 
					
 | 
				
			||||||
            print("Flat Rate Shipping = $6.79" + "\n")
 | 
					    return avg_used, avg_new, used_prices
 | 
				
			||||||
            import winsound
 | 
					
 | 
				
			||||||
            winsound.Beep(440, 300)
 | 
					def display_results(nau_prices, new_prices, used_prices, avg_new, avg_used, nau_url, new_url):
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    used_results = '\n{} total results used\nAverage used price = ${}'.format(len(used_prices),avg_used)
 | 
				
			||||||
 | 
					    nau_link = 'URL new and used: {}\n'.format(nau_url+'1')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    new_results = '\n{} total results new\nAverage new price = ${}'.format(len(new_prices),avg_new)
 | 
				
			||||||
 | 
					    new_link = 'URL new: {}\n'.format(new_url+'1')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    total_results = '{} Total results new and used'.format(nau_prices)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print(used_results)
 | 
				
			||||||
 | 
					    print(nau_link)
 | 
				
			||||||
 | 
					    print(new_results)
 | 
				
			||||||
 | 
					    print(new_link)
 | 
				
			||||||
 | 
					    print(total_results)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def main():
 | 
				
			||||||
 | 
					    search_query = input("\nSearch Title: ")
 | 
				
			||||||
 | 
					    urls = url_build(search_query)
 | 
				
			||||||
 | 
					    nau_url = urls[0]
 | 
				
			||||||
 | 
					    new_url = urls[1]
 | 
				
			||||||
 | 
					    prices = get_sold(nau_url, new_url)
 | 
				
			||||||
 | 
					    nau_prices = prices[0]
 | 
				
			||||||
 | 
					    new_prices = prices[1]
 | 
				
			||||||
 | 
					    averages = avgs(nau_prices, new_prices)
 | 
				
			||||||
 | 
					    avg_used = averages[0]
 | 
				
			||||||
 | 
					    avg_new = averages[1]
 | 
				
			||||||
 | 
					    used_prices = averages[2]
 | 
				
			||||||
 | 
					    nau_prices = len(new_prices)+len(used_prices)
 | 
				
			||||||
 | 
					    display_results(nau_prices, new_prices, used_prices, avg_new, avg_used, nau_url, new_url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if  __name__=='__main__':
 | 
				
			||||||
 | 
					    main()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            kde_x = kde_datapoints[0]
 | 
					 | 
				
			||||||
            kde_y = kde_datapoints[1]
 | 
					 | 
				
			||||||
            optimal_price = kde_x[np.argmax(kde_y)]
 | 
					 | 
				
			||||||
            print("Optimal Price Used = $" + str(optimal_price) + "\n")
 | 
					 | 
				
			||||||
            print("Optimal Price Including Shipping Used = $" +
 | 
					 | 
				
			||||||
                  str(optimal_price + 6.79) + "\n")
 | 
					 | 
				
			||||||
            print("URL Link: " + url_a + "\n")
 | 
					 | 
				
			||||||
            plt.ylabel('KDE')
 | 
					 | 
				
			||||||
            plt.xlabel('Price ($)')
 | 
					 | 
				
			||||||
            plt.show()
 | 
					 | 
				
			||||||
        except IndexError:
 | 
					 | 
				
			||||||
            print('\n' + '0 results' + '\n')
 | 
					 | 
				
			||||||
            pass
 | 
					 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user