added comments. changed html.parser to lxml
This commit is contained in:
		
							
								
								
									
										1
									
								
								bulkpp
									
									
									
									
									
										Submodule
									
								
							
							
								
								
								
								
								
							
						
						
									
										1
									
								
								bulkpp
									
									
									
									
									
										Submodule
									
								
							 Submodule bulkpp added at 32d3febba2
									
								
							
							
								
								
									
										28
									
								
								posh.py
									
									
									
									
									
								
							
							
						
						
									
										28
									
								
								posh.py
									
									
									
									
									
								
							@@ -4,12 +4,14 @@ import time
 | 
				
			|||||||
import re
 | 
					import re
 | 
				
			||||||
import concurrent.futures
 | 
					import concurrent.futures
 | 
				
			||||||
import numpy as np
 | 
					import numpy as np
 | 
				
			||||||
import matplotlib.pyplot as plt
 | 
					# import matplotlib.pyplot as plt
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def url_base_builder(search_query):
 | 
					def url_base_builder(search_query):
 | 
				
			||||||
    genders = ['Men', 'Women']
 | 
					    genders = ['Men', 'Women']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    posh_colors = ['Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple', 'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan']
 | 
					    posh_colors = ['Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple',
 | 
				
			||||||
 | 
					                   'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for i in range(0, len(posh_colors)):
 | 
					    for i in range(0, len(posh_colors)):
 | 
				
			||||||
        if posh_colors[i] in search_query:
 | 
					        if posh_colors[i] in search_query:
 | 
				
			||||||
@@ -30,19 +32,22 @@ def url_base_builder(search_query):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    sq = search_query.replace(color, '').replace(gender, '').replace('NEW', '').replace(' ', '+')
 | 
					    sq = search_query.replace(color, '').replace(gender, '').replace('NEW', '').replace(' ', '+')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    all_sold_url_base = 'https://poshmark.com/search?query=' + sq + "&availability=sold_out" + url_color + url_gender + '&max_id='
 | 
					    all_sold_url_base = 'https://poshmark.com/search?query=' + sq + \
 | 
				
			||||||
 | 
					        "&availability=sold_out" + url_color + url_gender + '&max_id='
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    new_sold_url_base = 'https://poshmark.com/search?query=' + sq + '&availability=sold_out' + '&condition=nwt_and_ret' + url_color + url_gender + '&max_id='
 | 
					    new_sold_url_base = 'https://poshmark.com/search?query=' + sq + '&availability=sold_out' + \
 | 
				
			||||||
 | 
					        '&condition=nwt_and_ret' + url_color + url_gender + '&max_id='
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return all_sold_url_base, new_sold_url_base
 | 
					    return all_sold_url_base, new_sold_url_base
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def all_sold_list_builder(i):
 | 
					def all_sold_list_builder(i):
 | 
				
			||||||
    bases = url_base_builder(search_query)
 | 
					    bases = url_base_builder(search_query)
 | 
				
			||||||
    all_sold_url_base = bases[0]
 | 
					    all_sold_url_base = bases[0]
 | 
				
			||||||
    all_sold_prices = []
 | 
					    all_sold_prices = []
 | 
				
			||||||
    url = all_sold_url_base + str(i)
 | 
					    url = all_sold_url_base + str(i)
 | 
				
			||||||
    html = requests.get(url).text
 | 
					    html = requests.get(url).text
 | 
				
			||||||
    soup = b(html, "html.parser")
 | 
					    soup = b(html, "lxml")
 | 
				
			||||||
    # last_page = soup.find(string = re.compile('No Listings Found'))
 | 
					    # last_page = soup.find(string = re.compile('No Listings Found'))
 | 
				
			||||||
    for price in soup.find_all('span', {'class': 'p--t--1 fw--bold'}):
 | 
					    for price in soup.find_all('span', {'class': 'p--t--1 fw--bold'}):
 | 
				
			||||||
        price = price.get_text()
 | 
					        price = price.get_text()
 | 
				
			||||||
@@ -53,13 +58,14 @@ def all_sold_list_builder(i):
 | 
				
			|||||||
        all_sold_prices.append(price)
 | 
					        all_sold_prices.append(price)
 | 
				
			||||||
    return all_sold_prices
 | 
					    return all_sold_prices
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def new_sold_list_builder(i):
 | 
					def new_sold_list_builder(i):
 | 
				
			||||||
    bases = url_base_builder(search_query)
 | 
					    bases = url_base_builder(search_query)
 | 
				
			||||||
    new_sold_url_base = bases[1]
 | 
					    new_sold_url_base = bases[1]
 | 
				
			||||||
    new_sold_prices = []
 | 
					    new_sold_prices = []
 | 
				
			||||||
    url = new_sold_url_base + str(i)
 | 
					    url = new_sold_url_base + str(i)
 | 
				
			||||||
    html = requests.get(url).text
 | 
					    html = requests.get(url).text
 | 
				
			||||||
    soup = b(html, "html.parser")
 | 
					    soup = b(html, "lxml")
 | 
				
			||||||
    # last_page = soup.find(string = re.compile('No Listings Found'))#this is present in all pages that don't have a full 48 listings on them. So you end up with an empty price list becuase of your conditional statement
 | 
					    # last_page = soup.find(string = re.compile('No Listings Found'))#this is present in all pages that don't have a full 48 listings on them. So you end up with an empty price list becuase of your conditional statement
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for price in soup.find_all('span', {'class': 'p--t--1 fw--bold'}):
 | 
					    for price in soup.find_all('span', {'class': 'p--t--1 fw--bold'}):
 | 
				
			||||||
@@ -71,11 +77,12 @@ def new_sold_list_builder(i):
 | 
				
			|||||||
        new_sold_prices.append(price)
 | 
					        new_sold_prices.append(price)
 | 
				
			||||||
    return new_sold_prices
 | 
					    return new_sold_prices
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
search_query = str(input('Title Search: '))
 | 
					search_query = str(input('Title Search: '))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
start = time.time()
 | 
					start = time.time()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
page_list = list(range(1, 13))
 | 
					page_list = list(range(1, 2))
 | 
				
			||||||
all_sold_list = []
 | 
					all_sold_list = []
 | 
				
			||||||
new_sold_list = []
 | 
					new_sold_list = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -85,7 +92,8 @@ with concurrent.futures.ThreadPoolExecutor() as executor:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
with concurrent.futures.ThreadPoolExecutor() as executor:
 | 
					with concurrent.futures.ThreadPoolExecutor() as executor:
 | 
				
			||||||
    for future in executor.map(new_sold_list_builder, page_list):
 | 
					    for future in executor.map(new_sold_list_builder, page_list):
 | 
				
			||||||
        new_sold_list.extend(future)
 | 
					        new_sold_list.extend(future)# if you can pull the nwt price simultaneously with used then you won't have to use this
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
for element in new_sold_list:
 | 
					for element in new_sold_list:
 | 
				
			||||||
    all_sold_list.remove(element)
 | 
					    all_sold_list.remove(element)
 | 
				
			||||||
@@ -94,8 +102,8 @@ used_sold_list = all_sold_list
 | 
				
			|||||||
average_used_sold_price = '$' + str(round(np.mean(used_sold_list), 2))
 | 
					average_used_sold_price = '$' + str(round(np.mean(used_sold_list), 2))
 | 
				
			||||||
average_new_sold_price = '$' + str(round(np.mean(new_sold_list), 2))
 | 
					average_new_sold_price = '$' + str(round(np.mean(new_sold_list), 2))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
used_sold_results = str(len(used_sold_list)) + ' Results'
 | 
					used_sold_results = str(len(used_sold_list)) + ' Used Results'
 | 
				
			||||||
new_sold_results = str(len(new_sold_list)) + ' Results'
 | 
					new_sold_results = str(len(new_sold_list)) + ' NWT Results'
 | 
				
			||||||
total_results = str(len(used_sold_list) + len(new_sold_list)) + ' Total Results'
 | 
					total_results = str(len(used_sold_list) + len(new_sold_list)) + ' Total Results'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
end = time.time()
 | 
					end = time.time()
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user