Iitial Commit

This commit is contained in:
spbeach46 2020-05-25 18:36:44 -07:00
commit 6fd57ec6b9
4 changed files with 367 additions and 0 deletions

166
pms.py Normal file
View File

@ -0,0 +1,166 @@
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import re
import bs4
from bs4 import BeautifulSoup as b
import requests
import time
# Initial Search Query URL to Find ItemCondition Code
while True:
print("Title Search:")
start = time.time()
SQ = str(input())
SQ_1 = SQ.replace(' ', '+').replace('NEW','').replace('men', '').replace('women', '')
gender = ['Men', 'Women']
#&color[]=color
posh_colors = ['Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple', 'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan']
for i in range(0,len(posh_colors)):
if posh_colors[i] in SQ:
url_color = '&color[]=' + posh_colors[i]
url_separator = "&availability=sold_out" + url_color + "&department=All&max_id="
url_separator1 = "&availability=sold_out" + url_color + "&condition=nwt_and_ret&department=All&max_id="
SQ_1 = SQ_1.replace(posh_colors[i], '')
break
else:
url_separator = "&availability=sold_out&department=All&max_id="
url_separator1 = "&availability=sold_out&condition=nwt_and_ret&department=All&max_id="
prices = []
prices1 = []
base_url = "https://poshmark.com/search?query="
pg = 1
url = base_url + SQ_1 + url_separator + str(pg)
url_1 = base_url + SQ_1 + url_separator1 + str(pg)
url_a = base_url + SQ_1 + url_separator + str(pg)
url_1b = base_url + SQ_1 + url_separator1 + str(pg)
html = requests.get(url).text
html1 = requests.get(url_1).text
soup = b(html, "html.parser")
soup1 = b(html1,'html.parser')
# Begin new and used condition items price list:
for listing in soup.findAll("div", {"class": "item__details"}):
price = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text
indices = price.find('$')
price = price[indices+1:]
space = price.find(' ')
price = int(price[:space-1])
prices.append(price)
while True:
last_page = soup.find_all(string = re.compile('No Listings Found'))
if last_page:
break
pg = pg + 1
url = base_url + SQ_1 + url_separator + str(pg)
html = requests.get(url).text
soup = b(html, "html.parser")
for listing in soup.findAll("div", {"class": "item__details"}):
price = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text
#indices = [i for i, dollars in enumerate(price) if dollars == '$']
#price = int(price[1:indices[1]-1])
indices = price.find('$')
price = price[indices+1:]
space = price.find(' ')
price = int(price[:space-1])
prices.append(price)
# Begin new condition item prices list:
for listing in soup1.findAll("div", {"class": "item__details"}):
price1 = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text
#indices = [i for i, dollars in enumerate(price1) if dollars == '$']
#price1 = int(price1[1:indices[1]-1])
indices = price1.find('$')
price1 = price1[indices+1:]
space = price1.find(' ')
price1 = int(price1[:space-1])
prices1.append(price1)
while True:
last_page = soup1.find_all(string = re.compile('No Listings Found'))
if last_page:
break
pg = pg + 1
url_1 = base_url + SQ_1 + url_separator1 + str(pg)
html1 = requests.get(url_1).text
soup1 = b(html1, "html.parser")
for listing in soup1.findAll("div", {"class": "item__details"}):
price1 = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text
#indices = [i for i, dollars in enumerate(price1) if dollars == '$']
#price1 = int(price1[1:indices[1]-1])
indices = price1.find('$')
price1 = price1[indices+1:]
space = price1.find(' ')
price1 = int(price1[:space-1])
prices1.append(price1)
# Begin Element-wise substraction of new condition items price list from new&used items price list:
print(len(prices), 'NEW & USED')
print(len(prices1), 'NEW')
end = time.time()
print(end - start)
for element in prices1:
prices.remove(element)
if 'NEW' in SQ:
kde_datapoints = sns.kdeplot(prices1, shade = True).get_lines()[0].get_data()
sns.rugplot(prices1)
print(str(len(prices1)) + " Results" + "\n")
print("Average Price Sold New = $" + str(np.mean(prices1)) + "\n")
total_price = np.mean(prices1) + 6.79
print("Average Total Price New = $" + str(total_price) + "\n")
print("Flat Rate Shipping = $6.79" + "\n")
kde_x = kde_datapoints[0]
kde_y = kde_datapoints[1]
optimal_price = kde_x[np.argmax(kde_y)]
print("Optimal Price New = $" + str(optimal_price) + "\n")
print("Optimal Price Including Shipping New = $" + str(optimal_price + 6.79) + "\n")
print("URL Link (New): " + url_1b + "\n")
plt.ylabel('KDE')
plt.xlabel('Price ($)')
plt.show()
else:
try:
kde_datapoints = sns.kdeplot(prices, shade = True).get_lines()[0].get_data()
sns.rugplot(prices)
print(str(len(prices)) + " Results" + "\n")
print("Average Price Sold Used = $" + str(np.mean(prices)) + "\n")
total_price = np.mean(prices) + 6.79
print("Average Total Price Used = $" + str(total_price) + "\n")
print("Flat Rate Shipping = $6.79" + "\n")
import winsound
winsound.Beep(440, 300)
kde_x = kde_datapoints[0]
kde_y = kde_datapoints[1]
optimal_price = kde_x[np.argmax(kde_y)]
print("Optimal Price Used = $" + str(optimal_price) + "\n")
print("Optimal Price Including Shipping Used = $" + str(optimal_price + 6.79) + "\n")
print("URL Link: " + url_a + "\n")
plt.ylabel('KDE')
plt.xlabel('Price ($)')
plt.show()
except IndexError:
print('\n' + '0 results' + '\n')
pass

113
posh.py Normal file
View File

@ -0,0 +1,113 @@
import requests
from bs4 import BeautifulSoup as b
import time
import re
import concurrent.futures
import numpy as np
import matplotlib.pyplot as plt
def url_base_builder(search_query):
genders = ['Men', 'Women']
posh_colors = ['Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple', 'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan']
for i in range(0,len(posh_colors)):
if posh_colors[i] in search_query:
url_color = '&color[]=' + posh_colors[i]
color = posh_colors[i]
break
else:
color = ''
url_color = ''
for i in range(0,len(genders)):
if genders[i] in search_query:
url_gender = '&department=' + genders[i]
gender = genders[i]
break
else:
gender = ''
url_gender = '&department=All'
sq = search_query.replace(color,'').replace(gender,'').replace('NEW','').replace(' ', '+')
all_sold_url_base = 'https://poshmark.com/search?query=' + sq + "&availability=sold_out" + url_color + url_gender + '&max_id='
new_sold_url_base = 'https://poshmark.com/search?query=' + sq + '&availability=sold_out' + '&condition=nwt_and_ret' + url_color + url_gender + '&max_id='
return all_sold_url_base, new_sold_url_base
def all_sold_list_builder(i):
bases = url_base_builder(search_query)
all_sold_url_base = bases[0]
all_sold_prices = []
url = all_sold_url_base + str(i)
html = requests.get(url).text
soup = b(html, "html.parser")
#last_page = soup.find(string = re.compile('No Listings Found'))
for price in soup.find_all('span', {'class':'p--t--1 fw--bold'}):
price = price.get_text()
dollar_index = price.find('$')
price = price[dollar_index+1:]
space = price.find(' ')
price = int(price[:space-1])
all_sold_prices.append(price)
return all_sold_prices
def new_sold_list_builder(i):
bases = url_base_builder(search_query)
new_sold_url_base = bases[1]
new_sold_prices = []
url = new_sold_url_base + str(i)
html = requests.get(url).text
soup = b(html, "html.parser")
#last_page = soup.find(string = re.compile('No Listings Found'))#this is present in all pages that don't have a full 48 listings on them. So you end up with an empty price list becuase of your conditional statement
for price in soup.find_all('span', {'class':'p--t--1 fw--bold'}):
price = price.get_text()
dollar_index = price.find('$')
price = price[dollar_index+1:]
space = price.find(' ')
price = int(price[:space-1])
new_sold_prices.append(price)
return new_sold_prices
search_query = str(input('Title Search: '))
start = time.time()
page_list = list(range(1, 13))
all_sold_list = []
new_sold_list = []
with concurrent.futures.ThreadPoolExecutor() as executor:
for future in executor.map(all_sold_list_builder, page_list):
all_sold_list.extend(future)
with concurrent.futures.ThreadPoolExecutor() as executor:
for future in executor.map(new_sold_list_builder, page_list):
new_sold_list.extend(future)
for element in new_sold_list:
all_sold_list.remove(element)
used_sold_list = all_sold_list
average_used_sold_price = '$' + str(round(np.mean(used_sold_list), 2))
average_new_sold_price = '$' + str(round(np.mean(new_sold_list), 2))
used_sold_results = str(len(used_sold_list)) + ' Results'
new_sold_results = str(len(new_sold_list)) + ' Results'
total_results = str(len(used_sold_list) + len(new_sold_list)) + ' Total Results'
end = time.time()
print(end - start, 'seconds')
print('Average Used Sold Price', average_used_sold_price, used_sold_results)
print('Average New Sold Price', average_new_sold_price, new_sold_results)
print(total_results)
'''There has to be a way to determine the number of pages present prior to making far too many requests. Look at network in the element inspector to see if there might be some kind of id that gives away the page type. some responses might be different other than in their html code. Otherwise you can maybe determine a threshold payload; so if a payload is smaller than so many kb then you can block it.
This will be significant to speeding up your programming. If you keep having to make requests to 20 pages and wait on the results it may not be that much faster than just using pms.py, but if you can limit it only to what is absolutely required then that would be best. Also You nee to see if multiprocessing would be best for crunching all the prices in the lists.
Another workaround is by just doing a loop but in chunks of more than one page. so make your first list 1-4, send that to the multithreadpool, then take the next 4 pages and do the same thing until you get no listings found at which point you will stop the while loop. and you could even have the multithreader use separate page lists for the new and all . '''

88
poshare.py Normal file
View File

@ -0,0 +1,88 @@
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import random
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup as b
import time
from selenium.common.exceptions import ElementClickInterceptedException
browser = webdriver.Firefox(executable_path="C:/Users/unknown/Desktop/projects/geckodriver")
wait = WebDriverWait(browser, 30)
browser.get('https://poshmark.com/login')
usename = browser.find_element_by_id('login_form_username_email')
usename.send_keys('SpeanutButter')
psw = browser.find_element_by_id('login_form_password')
psw.send_keys('***REMOVED***')
psw.submit()
input('press "enter" to continue')
wait.until(EC.presence_of_element_located((By.XPATH, "//title[text()='Feed - Poshmark']")))
browser.get('https://poshmark.com/closet/speanutbutter?department=Women&sort_by=price_desc')
input('press "enter" to continue')
html = browser.page_source
soup = b(html)
elm = browser.find_element_by_tag_name('html')
while not soup.find('i', text = "Not for Sale"):
elm.send_keys(Keys.END)
html = browser.page_source
soup = b(html)
list_titles = soup.find_all('a',{'class':'title'})
active_listings = soup.find_all('i', {'class':"icon share-gray"})
print(len(active_listings))
container = browser.find_elements_by_xpath("//div[@id='tiles-con']/div")
i = -1
share_to = input('share to followers (F) or share to party (P)?: ')
for divs in container:
i += 1
wait.until(EC.presence_of_element_located((By.XPATH, ".//i[@class = 'icon share-gray']")))
try:
if divs.find_element_by_xpath(".//i[@class = 'icon inventory-tag not-for-sale-tag']"):
pass
except NoSuchElementException:
try:
if divs.find_element_by_xpath(".//i[@class = 'icon inventory-tag sold-tag']"):
pass
except NoSuchElementException:
share = divs.find_element_by_xpath(".//i[@class = 'icon share-gray']")
time.sleep(random.uniform(.6,1.2))
try:
share.click()
if soup.find('input', id = "recaptcha-token"):
input('Finish recapcha and press "enter" to continue')
if soup.find('span', text = "I'm not a robot"):
input('Finish recapcha and press "enter" to continue')
if share_to == 'F':
wait.until(EC.presence_of_element_located((By.XPATH, "//span[text()='To My Followers']")))
share = browser.find_element_by_xpath("//span[text()='To My Followers']")
time.sleep(random.uniform(.6,1.2))
share.click()
if soup.find('input', id = "recaptcha-token"):
input('Finish recapcha and press "enter" to continue')
print(i)
if soup.find('span', text = "I'm not a robot"):
input('Finish recapcha and press "enter" to continue')
title = list_titles[i].get_text()
print(title)
if share_to == 'P':
wait.until(EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Happening Now')]")))
share = browser.find_element_by_xpath("//*[contains(text(), 'Happening Now')]")
time.sleep(random.uniform(.6,1.2))
share.click()
if soup.find('input', id = "recaptcha-token"):
input('Finish recapcha and press "enter" to continue')
if soup.find('span', text = "I'm not a robot"):
input('Finish recapcha and press "enter" to continue')
print(i)
title = list_titles[i].get_text()
print(title)
except ElementClickInterceptedException:
pass
'''If poshmark lets you browser.get any page then you should skip the pagination loading to load all the pages and then just go through each page and share that way. It wouldn't be such a time consuming process'''

BIN
requirements.txt Normal file

Binary file not shown.