import re import numpy as np import requests import bs4 from bs4 import BeautifulSoup as b # Initial Search Query URL to Find ItemCondition Code def url_build(search_query): SQ = search_query SQ_1 = SQ.replace(' ', '+') #&color[]=color posh_colors = [ 'Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple', 'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan' ] for i in range(0,len(posh_colors)): if posh_colors[i] in SQ: url_color = '&color[]=' + posh_colors[i] url_separator = "&availability=sold_out" + url_color + "&department=All&max_id=" url_separator1 = "&availability=sold_out" + url_color + "&condition=nwt_and_ret&department=All&max_id=" SQ_1 = SQ_1.replace(posh_colors[i], '') break else: url_separator = "&availability=sold_out&department=All&max_id=" url_separator1 = "&availability=sold_out&condition=nwt_and_ret&department=All&max_id=" base_url = "https://poshmark.com/search?query=" url = base_url + SQ_1 + url_separator url_1 = base_url + SQ_1 + url_separator1 return url, url_1, SQ def get_sold(url, url_1): #TODO Need to add option to avoid irrelevant results using regex and search query # keywords. First search the titles for every listing to see if they contain all or # some of the keywords present in the search query. If they don't then just break # the whole program and say no results nau_prices = [] # NEW AND USED PRICES new_prices = [] pg = 1 while True: aurl = url + str(pg) html = requests.get(aurl).text soup = b(html, 'lxml') # NEW+USED SOUP OBJECT temp_prices_nau = soup.find_all("span", {"class": "p--t--1 fw--bold"}) temp_prices_nau = [float(thing.text.strip(' $\n')) for thing in temp_prices_nau] nau_prices.extend(temp_prices_nau) pg += 1 if len(re.findall(r'display\:\;', html))>0: break pg = 1 while True: burl_1 = url_1 + str(pg) html1 = requests.get(burl_1).text soup1 = b(html1,'lxml') # NEW SOUP OBJECT temp_prices_new = soup1.find_all("span", {"class": "p--t--1 fw--bold"}) temp_prices_new = [float(thing.text.strip(' $\n')) for thing in temp_prices_new] new_prices.extend(temp_prices_new) pg += 1 if len(re.findall(r'display\:\;', html1))>0: break if len(new_prices)>len(nau_prices): new_prices = [] break return nau_prices, new_prices # Begin Element-wise substraction of new condition items price list from new&used items price list: def avgs(nau_prices, new_prices): for price in new_prices: try: nau_prices.remove(price) except ValueError: break used_prices = nau_prices if len(new_prices)>0: avg_new = np.mean(new_prices) avg_used = np.mean(used_prices) else: avg_new = 0 avg_used = np.mean(used_prices) return avg_used, avg_new, used_prices def display_results(nau_prices, new_prices, used_prices, avg_new, avg_used, nau_url, new_url): used_results = '\n{} total results used\nAverage used price = ${}'.format(len(used_prices),avg_used) nau_link = 'URL new and used: {}\n'.format(nau_url+'1') new_results = '\n{} total results new\nAverage new price = ${}'.format(len(new_prices),avg_new) new_link = 'URL new: {}\n'.format(new_url+'1') total_results = '{} Total results new and used'.format(nau_prices) print(used_results) print(nau_link) print(new_results) print(new_link) print(total_results) def main(): search_query = input("\nSearch Title: ") urls = url_build(search_query) nau_url = urls[0] new_url = urls[1] prices = get_sold(nau_url, new_url) nau_prices = prices[0] new_prices = prices[1] averages = avgs(nau_prices, new_prices) avg_used = averages[0] avg_new = averages[1] used_prices = averages[2] nau_prices = len(new_prices)+len(used_prices) display_results(nau_prices, new_prices, used_prices, avg_new, avg_used, nau_url, new_url) if __name__=='__main__': main()