commit 6fd57ec6b9f438e8d6ebee285deefe1832b91730 Author: spbeach46 Date: Mon May 25 18:36:44 2020 -0700 Iitial Commit diff --git a/pms.py b/pms.py new file mode 100644 index 0000000..fb8007c --- /dev/null +++ b/pms.py @@ -0,0 +1,166 @@ +import seaborn as sns +import numpy as np +import matplotlib.pyplot as plt +import re +import bs4 +from bs4 import BeautifulSoup as b +import requests +import time + +# Initial Search Query URL to Find ItemCondition Code +while True: + print("Title Search:") + + start = time.time() + + SQ = str(input()) + SQ_1 = SQ.replace(' ', '+').replace('NEW','').replace('men', '').replace('women', '') + gender = ['Men', 'Women'] + + #&color[]=color + posh_colors = ['Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple', 'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan'] + + for i in range(0,len(posh_colors)): + if posh_colors[i] in SQ: + url_color = '&color[]=' + posh_colors[i] + url_separator = "&availability=sold_out" + url_color + "&department=All&max_id=" + url_separator1 = "&availability=sold_out" + url_color + "&condition=nwt_and_ret&department=All&max_id=" + SQ_1 = SQ_1.replace(posh_colors[i], '') + break + else: + url_separator = "&availability=sold_out&department=All&max_id=" + url_separator1 = "&availability=sold_out&condition=nwt_and_ret&department=All&max_id=" + + prices = [] + prices1 = [] + base_url = "https://poshmark.com/search?query=" + + pg = 1 + url = base_url + SQ_1 + url_separator + str(pg) + url_1 = base_url + SQ_1 + url_separator1 + str(pg) + url_a = base_url + SQ_1 + url_separator + str(pg) + url_1b = base_url + SQ_1 + url_separator1 + str(pg) + + html = requests.get(url).text + html1 = requests.get(url_1).text + soup = b(html, "html.parser") + soup1 = b(html1,'html.parser') + + # Begin new and used condition items price list: + + for listing in soup.findAll("div", {"class": "item__details"}): + price = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text + indices = price.find('$') + price = price[indices+1:] + space = price.find(' ') + price = int(price[:space-1]) + prices.append(price) + + while True: + last_page = soup.find_all(string = re.compile('No Listings Found')) + if last_page: + break + pg = pg + 1 + url = base_url + SQ_1 + url_separator + str(pg) + html = requests.get(url).text + soup = b(html, "html.parser") + + + for listing in soup.findAll("div", {"class": "item__details"}): + price = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text + #indices = [i for i, dollars in enumerate(price) if dollars == '$'] + #price = int(price[1:indices[1]-1]) + indices = price.find('$') + price = price[indices+1:] + space = price.find(' ') + price = int(price[:space-1]) + prices.append(price) + + # Begin new condition item prices list: + + for listing in soup1.findAll("div", {"class": "item__details"}): + price1 = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text + #indices = [i for i, dollars in enumerate(price1) if dollars == '$'] + #price1 = int(price1[1:indices[1]-1]) + indices = price1.find('$') + price1 = price1[indices+1:] + space = price1.find(' ') + price1 = int(price1[:space-1]) + prices1.append(price1) + + while True: + + last_page = soup1.find_all(string = re.compile('No Listings Found')) + if last_page: + break + pg = pg + 1 + url_1 = base_url + SQ_1 + url_separator1 + str(pg) + html1 = requests.get(url_1).text + soup1 = b(html1, "html.parser") + + + for listing in soup1.findAll("div", {"class": "item__details"}): + price1 = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text + #indices = [i for i, dollars in enumerate(price1) if dollars == '$'] + #price1 = int(price1[1:indices[1]-1]) + indices = price1.find('$') + price1 = price1[indices+1:] + space = price1.find(' ') + price1 = int(price1[:space-1]) + prices1.append(price1) + + + # Begin Element-wise substraction of new condition items price list from new&used items price list: + print(len(prices), 'NEW & USED') + print(len(prices1), 'NEW') + + end = time.time() + + print(end - start) + + for element in prices1: + prices.remove(element) + + if 'NEW' in SQ: + kde_datapoints = sns.kdeplot(prices1, shade = True).get_lines()[0].get_data() + sns.rugplot(prices1) + print(str(len(prices1)) + " Results" + "\n") + print("Average Price Sold New = $" + str(np.mean(prices1)) + "\n") + total_price = np.mean(prices1) + 6.79 + print("Average Total Price New = $" + str(total_price) + "\n") + print("Flat Rate Shipping = $6.79" + "\n") + + kde_x = kde_datapoints[0] + kde_y = kde_datapoints[1] + optimal_price = kde_x[np.argmax(kde_y)] + print("Optimal Price New = $" + str(optimal_price) + "\n") + print("Optimal Price Including Shipping New = $" + str(optimal_price + 6.79) + "\n") + print("URL Link (New): " + url_1b + "\n") + plt.ylabel('KDE') + plt.xlabel('Price ($)') + plt.show() + else: + try: + + kde_datapoints = sns.kdeplot(prices, shade = True).get_lines()[0].get_data() + sns.rugplot(prices) + print(str(len(prices)) + " Results" + "\n") + print("Average Price Sold Used = $" + str(np.mean(prices)) + "\n") + total_price = np.mean(prices) + 6.79 + print("Average Total Price Used = $" + str(total_price) + "\n") + print("Flat Rate Shipping = $6.79" + "\n") + import winsound + winsound.Beep(440, 300) + + kde_x = kde_datapoints[0] + kde_y = kde_datapoints[1] + optimal_price = kde_x[np.argmax(kde_y)] + print("Optimal Price Used = $" + str(optimal_price) + "\n") + print("Optimal Price Including Shipping Used = $" + str(optimal_price + 6.79) + "\n") + print("URL Link: " + url_a + "\n") + plt.ylabel('KDE') + plt.xlabel('Price ($)') + plt.show() + except IndexError: + print('\n' + '0 results' + '\n') + pass diff --git a/posh.py b/posh.py new file mode 100644 index 0000000..643ce9c --- /dev/null +++ b/posh.py @@ -0,0 +1,113 @@ +import requests +from bs4 import BeautifulSoup as b +import time +import re +import concurrent.futures +import numpy as np +import matplotlib.pyplot as plt + +def url_base_builder(search_query): + genders = ['Men', 'Women'] + + posh_colors = ['Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple', 'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan'] + + for i in range(0,len(posh_colors)): + if posh_colors[i] in search_query: + url_color = '&color[]=' + posh_colors[i] + color = posh_colors[i] + break + else: + color = '' + url_color = '' + for i in range(0,len(genders)): + if genders[i] in search_query: + url_gender = '&department=' + genders[i] + gender = genders[i] + break + else: + gender = '' + url_gender = '&department=All' + + sq = search_query.replace(color,'').replace(gender,'').replace('NEW','').replace(' ', '+') + + all_sold_url_base = 'https://poshmark.com/search?query=' + sq + "&availability=sold_out" + url_color + url_gender + '&max_id=' + + new_sold_url_base = 'https://poshmark.com/search?query=' + sq + '&availability=sold_out' + '&condition=nwt_and_ret' + url_color + url_gender + '&max_id=' + + return all_sold_url_base, new_sold_url_base + +def all_sold_list_builder(i): + bases = url_base_builder(search_query) + all_sold_url_base = bases[0] + all_sold_prices = [] + url = all_sold_url_base + str(i) + html = requests.get(url).text + soup = b(html, "html.parser") + #last_page = soup.find(string = re.compile('No Listings Found')) + for price in soup.find_all('span', {'class':'p--t--1 fw--bold'}): + price = price.get_text() + dollar_index = price.find('$') + price = price[dollar_index+1:] + space = price.find(' ') + price = int(price[:space-1]) + all_sold_prices.append(price) + return all_sold_prices + +def new_sold_list_builder(i): + bases = url_base_builder(search_query) + new_sold_url_base = bases[1] + new_sold_prices = [] + url = new_sold_url_base + str(i) + html = requests.get(url).text + soup = b(html, "html.parser") + #last_page = soup.find(string = re.compile('No Listings Found'))#this is present in all pages that don't have a full 48 listings on them. So you end up with an empty price list becuase of your conditional statement + + for price in soup.find_all('span', {'class':'p--t--1 fw--bold'}): + price = price.get_text() + dollar_index = price.find('$') + price = price[dollar_index+1:] + space = price.find(' ') + price = int(price[:space-1]) + new_sold_prices.append(price) + return new_sold_prices + +search_query = str(input('Title Search: ')) + +start = time.time() + +page_list = list(range(1, 13)) +all_sold_list = [] +new_sold_list = [] + +with concurrent.futures.ThreadPoolExecutor() as executor: + for future in executor.map(all_sold_list_builder, page_list): + all_sold_list.extend(future) + +with concurrent.futures.ThreadPoolExecutor() as executor: + for future in executor.map(new_sold_list_builder, page_list): + new_sold_list.extend(future) + +for element in new_sold_list: + all_sold_list.remove(element) +used_sold_list = all_sold_list + +average_used_sold_price = '$' + str(round(np.mean(used_sold_list), 2)) +average_new_sold_price = '$' + str(round(np.mean(new_sold_list), 2)) + +used_sold_results = str(len(used_sold_list)) + ' Results' +new_sold_results = str(len(new_sold_list)) + ' Results' +total_results = str(len(used_sold_list) + len(new_sold_list)) + ' Total Results' + +end = time.time() + +print(end - start, 'seconds') + +print('Average Used Sold Price', average_used_sold_price, used_sold_results) +print('Average New Sold Price', average_new_sold_price, new_sold_results) +print(total_results) + +'''There has to be a way to determine the number of pages present prior to making far too many requests. Look at network in the element inspector to see if there might be some kind of id that gives away the page type. some responses might be different other than in their html code. Otherwise you can maybe determine a threshold payload; so if a payload is smaller than so many kb then you can block it. + +This will be significant to speeding up your programming. If you keep having to make requests to 20 pages and wait on the results it may not be that much faster than just using pms.py, but if you can limit it only to what is absolutely required then that would be best. Also You nee to see if multiprocessing would be best for crunching all the prices in the lists. + +Another workaround is by just doing a loop but in chunks of more than one page. so make your first list 1-4, send that to the multithreadpool, then take the next 4 pages and do the same thing until you get no listings found at which point you will stop the while loop. and you could even have the multithreader use separate page lists for the new and all . ''' diff --git a/poshare.py b/poshare.py new file mode 100644 index 0000000..86f61ec --- /dev/null +++ b/poshare.py @@ -0,0 +1,88 @@ +from selenium import webdriver +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.support.select import Select +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.common.action_chains import ActionChains +import random +from selenium.common.exceptions import NoSuchElementException +from selenium.webdriver.common.keys import Keys +from bs4 import BeautifulSoup as b +import time +from selenium.common.exceptions import ElementClickInterceptedException + +browser = webdriver.Firefox(executable_path="C:/Users/unknown/Desktop/projects/geckodriver") +wait = WebDriverWait(browser, 30) +browser.get('https://poshmark.com/login') +usename = browser.find_element_by_id('login_form_username_email') +usename.send_keys('SpeanutButter') +psw = browser.find_element_by_id('login_form_password') +psw.send_keys('***REMOVED***') +psw.submit() +input('press "enter" to continue') +wait.until(EC.presence_of_element_located((By.XPATH, "//title[text()='Feed - Poshmark']"))) +browser.get('https://poshmark.com/closet/speanutbutter?department=Women&sort_by=price_desc') +input('press "enter" to continue') + +html = browser.page_source +soup = b(html) +elm = browser.find_element_by_tag_name('html') +while not soup.find('i', text = "Not for Sale"): + elm.send_keys(Keys.END) + html = browser.page_source + soup = b(html) +list_titles = soup.find_all('a',{'class':'title'}) +active_listings = soup.find_all('i', {'class':"icon share-gray"}) +print(len(active_listings)) +container = browser.find_elements_by_xpath("//div[@id='tiles-con']/div") +i = -1 +share_to = input('share to followers (F) or share to party (P)?: ') +for divs in container: + i += 1 + wait.until(EC.presence_of_element_located((By.XPATH, ".//i[@class = 'icon share-gray']"))) + try: + if divs.find_element_by_xpath(".//i[@class = 'icon inventory-tag not-for-sale-tag']"): + pass + except NoSuchElementException: + try: + if divs.find_element_by_xpath(".//i[@class = 'icon inventory-tag sold-tag']"): + pass + except NoSuchElementException: + share = divs.find_element_by_xpath(".//i[@class = 'icon share-gray']") + time.sleep(random.uniform(.6,1.2)) + try: + share.click() + if soup.find('input', id = "recaptcha-token"): + input('Finish recapcha and press "enter" to continue') + if soup.find('span', text = "I'm not a robot"): + input('Finish recapcha and press "enter" to continue') + if share_to == 'F': + wait.until(EC.presence_of_element_located((By.XPATH, "//span[text()='To My Followers']"))) + share = browser.find_element_by_xpath("//span[text()='To My Followers']") + time.sleep(random.uniform(.6,1.2)) + share.click() + + if soup.find('input', id = "recaptcha-token"): + input('Finish recapcha and press "enter" to continue') + print(i) + if soup.find('span', text = "I'm not a robot"): + input('Finish recapcha and press "enter" to continue') + title = list_titles[i].get_text() + print(title) + if share_to == 'P': + wait.until(EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Happening Now')]"))) + share = browser.find_element_by_xpath("//*[contains(text(), 'Happening Now')]") + time.sleep(random.uniform(.6,1.2)) + share.click() + if soup.find('input', id = "recaptcha-token"): + input('Finish recapcha and press "enter" to continue') + if soup.find('span', text = "I'm not a robot"): + input('Finish recapcha and press "enter" to continue') + print(i) + title = list_titles[i].get_text() + print(title) + except ElementClickInterceptedException: + pass + +'''If poshmark lets you browser.get any page then you should skip the pagination loading to load all the pages and then just go through each page and share that way. It wouldn't be such a time consuming process''' diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..2e1ecd3 Binary files /dev/null and b/requirements.txt differ