.gitignore updated

This commit is contained in:
spbeach46 2022-02-21 19:27:02 -07:00
parent 869a4613fb
commit 42f6cd6f4e
15 changed files with 839 additions and 377 deletions

161
.gitignore vendored Normal file
View File

@ -0,0 +1,161 @@
# User-defined
*.txt
*.csv
*.yaml
config.py
passwords.txt
training_images/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

BIN
.posh.py.swp Normal file

Binary file not shown.

BIN
.posh.py.un~ Normal file

Binary file not shown.

BIN
_.swp Normal file

Binary file not shown.

2
bulkpp

@ -1 +1 @@
Subproject commit 32d3febba22b73b37ac188d84330ae789288c535 Subproject commit a25d1b578e93fa63bdc65c02a6c007cbec5b2aa0

BIN
chromedriver.exe Normal file

Binary file not shown.

BIN
geckodriver.exe Normal file

Binary file not shown.

76
pms.py
View File

@ -2,7 +2,7 @@ import seaborn as sns
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import re import re
import bs4 # import bs4
from bs4 import BeautifulSoup as b from bs4 import BeautifulSoup as b
import requests import requests
import time import time
@ -14,17 +14,20 @@ while True:
start = time.time() start = time.time()
SQ = str(input()) SQ = str(input())
SQ_1 = SQ.replace(' ', '+').replace('NEW','').replace('men', '').replace('women', '') SQ_1 = SQ.replace(' ', '+').replace('NEW',
'').replace('men', '').replace('women', '')
gender = ['Men', 'Women'] gender = ['Men', 'Women']
#&color[]=color # &color[]=color
posh_colors = ['Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple', 'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan'] posh_colors = ['Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple',
'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan']
for i in range(0,len(posh_colors)): for i in range(0, len(posh_colors)):
if posh_colors[i] in SQ: if posh_colors[i] in SQ:
url_color = '&color[]=' + posh_colors[i] url_color = '&color[]=' + posh_colors[i]
url_separator = "&availability=sold_out" + url_color + "&department=All&max_id=" url_separator = "&availability=sold_out" + url_color + "&department=All&max_id="
url_separator1 = "&availability=sold_out" + url_color + "&condition=nwt_and_ret&department=All&max_id=" url_separator1 = "&availability=sold_out" + url_color + \
"&condition=nwt_and_ret&department=All&max_id="
SQ_1 = SQ_1.replace(posh_colors[i], '') SQ_1 = SQ_1.replace(posh_colors[i], '')
break break
else: else:
@ -43,73 +46,72 @@ while True:
html = requests.get(url).text html = requests.get(url).text
html1 = requests.get(url_1).text html1 = requests.get(url_1).text
soup = b(html, "html.parser") soup = b(html, "lxml")
soup1 = b(html1,'html.parser') soup1 = b(html1, 'lxml')
# Begin new and used condition items price list: # Begin new and used condition items price list:
for listing in soup.findAll("div", {"class": "item__details"}): for listing in soup.findAll("div", {"class": "item__details"}):
price = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text price = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text
indices = price.find('$') indices = price.find('$')
price = price[indices+1:] price = price[indices + 1:]
space = price.find(' ') space = price.find(' ')
price = int(price[:space-1]) price = int(price[:space - 1])
prices.append(price) prices.append(price)
while True: while True:
last_page = soup.find_all(string = re.compile('No Listings Found')) last_page = soup.find_all(string=re.compile('No Listings Found'))
if last_page: if last_page:
break break
pg = pg + 1 pg = pg + 1
url = base_url + SQ_1 + url_separator + str(pg) url = base_url + SQ_1 + url_separator + str(pg)
html = requests.get(url).text html = requests.get(url).text
soup = b(html, "html.parser") soup = b(html, "lxml")
for listing in soup.findAll("div", {"class": "item__details"}): for listing in soup.findAll("div", {"class": "item__details"}):
price = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text price = listing.findAll(
#indices = [i for i, dollars in enumerate(price) if dollars == '$'] "span", {"class": "p--t--1 fw--bold"})[0].text
#price = int(price[1:indices[1]-1]) # indices = [i for i, dollars in enumerate(price) if dollars == '$']
# price = int(price[1:indices[1]-1])
indices = price.find('$') indices = price.find('$')
price = price[indices+1:] price = price[indices + 1:]
space = price.find(' ') space = price.find(' ')
price = int(price[:space-1]) price = int(price[:space - 1])
prices.append(price) prices.append(price)
# Begin new condition item prices list: # Begin new condition item prices list:
for listing in soup1.findAll("div", {"class": "item__details"}): for listing in soup1.findAll("div", {"class": "item__details"}):
price1 = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text price1 = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text
#indices = [i for i, dollars in enumerate(price1) if dollars == '$'] # indices = [i for i, dollars in enumerate(price1) if dollars == '$']
#price1 = int(price1[1:indices[1]-1]) # price1 = int(price1[1:indices[1]-1])
indices = price1.find('$') indices = price1.find('$')
price1 = price1[indices+1:] price1 = price1[indices + 1:]
space = price1.find(' ') space = price1.find(' ')
price1 = int(price1[:space-1]) price1 = int(price1[:space - 1])
prices1.append(price1) prices1.append(price1)
while True: while True:
last_page = soup1.find_all(string = re.compile('No Listings Found')) last_page = soup1.find_all(string=re.compile('No Listings Found'))
if last_page: if last_page:
break break
pg = pg + 1 pg = pg + 1
url_1 = base_url + SQ_1 + url_separator1 + str(pg) url_1 = base_url + SQ_1 + url_separator1 + str(pg)
html1 = requests.get(url_1).text html1 = requests.get(url_1).text
soup1 = b(html1, "html.parser") soup1 = b(html1, "lxml")
for listing in soup1.findAll("div", {"class": "item__details"}): for listing in soup1.findAll("div", {"class": "item__details"}):
price1 = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text price1 = listing.findAll(
#indices = [i for i, dollars in enumerate(price1) if dollars == '$'] "span", {"class": "p--t--1 fw--bold"})[0].text
#price1 = int(price1[1:indices[1]-1]) # indices = [i for i, dollars in enumerate(price1) if dollars == '$']
# price1 = int(price1[1:indices[1]-1])
indices = price1.find('$') indices = price1.find('$')
price1 = price1[indices+1:] price1 = price1[indices + 1:]
space = price1.find(' ') space = price1.find(' ')
price1 = int(price1[:space-1]) price1 = int(price1[:space - 1])
prices1.append(price1) prices1.append(price1)
# Begin Element-wise substraction of new condition items price list from new&used items price list: # Begin Element-wise substraction of new condition items price list from new&used items price list:
print(len(prices), 'NEW & USED') print(len(prices), 'NEW & USED')
print(len(prices1), 'NEW') print(len(prices1), 'NEW')
@ -122,7 +124,8 @@ while True:
prices.remove(element) prices.remove(element)
if 'NEW' in SQ: if 'NEW' in SQ:
kde_datapoints = sns.kdeplot(prices1, shade = True).get_lines()[0].get_data() kde_datapoints = sns.kdeplot(prices1, shade=True).get_lines()[
0].get_data()
sns.rugplot(prices1) sns.rugplot(prices1)
print(str(len(prices1)) + " Results" + "\n") print(str(len(prices1)) + " Results" + "\n")
print("Average Price Sold New = $" + str(np.mean(prices1)) + "\n") print("Average Price Sold New = $" + str(np.mean(prices1)) + "\n")
@ -134,7 +137,8 @@ while True:
kde_y = kde_datapoints[1] kde_y = kde_datapoints[1]
optimal_price = kde_x[np.argmax(kde_y)] optimal_price = kde_x[np.argmax(kde_y)]
print("Optimal Price New = $" + str(optimal_price) + "\n") print("Optimal Price New = $" + str(optimal_price) + "\n")
print("Optimal Price Including Shipping New = $" + str(optimal_price + 6.79) + "\n") print("Optimal Price Including Shipping New = $" +
str(optimal_price + 6.79) + "\n")
print("URL Link (New): " + url_1b + "\n") print("URL Link (New): " + url_1b + "\n")
plt.ylabel('KDE') plt.ylabel('KDE')
plt.xlabel('Price ($)') plt.xlabel('Price ($)')
@ -142,7 +146,8 @@ while True:
else: else:
try: try:
kde_datapoints = sns.kdeplot(prices, shade = True).get_lines()[0].get_data() kde_datapoints = sns.kdeplot(prices, shade=True).get_lines()[
0].get_data()
sns.rugplot(prices) sns.rugplot(prices)
print(str(len(prices)) + " Results" + "\n") print(str(len(prices)) + " Results" + "\n")
print("Average Price Sold Used = $" + str(np.mean(prices)) + "\n") print("Average Price Sold Used = $" + str(np.mean(prices)) + "\n")
@ -156,7 +161,8 @@ while True:
kde_y = kde_datapoints[1] kde_y = kde_datapoints[1]
optimal_price = kde_x[np.argmax(kde_y)] optimal_price = kde_x[np.argmax(kde_y)]
print("Optimal Price Used = $" + str(optimal_price) + "\n") print("Optimal Price Used = $" + str(optimal_price) + "\n")
print("Optimal Price Including Shipping Used = $" + str(optimal_price + 6.79) + "\n") print("Optimal Price Including Shipping Used = $" +
str(optimal_price + 6.79) + "\n")
print("URL Link: " + url_a + "\n") print("URL Link: " + url_a + "\n")
plt.ylabel('KDE') plt.ylabel('KDE')
plt.xlabel('Price ($)') plt.xlabel('Price ($)')

42
posh.py
View File

@ -6,10 +6,10 @@ import concurrent.futures
import numpy as np import numpy as np
# import matplotlib.pyplot as plt # import matplotlib.pyplot as plt
search_query = str(input('Title Search: '))
def main():
def url_base_builder(search_query): def url_base_builder(search_query):
genders = ['Men', 'Women'] genders = ['Men', 'Women']
posh_colors = ['Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple', posh_colors = ['Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple',
@ -43,7 +43,7 @@ def main():
return all_sold_url_base, new_sold_url_base return all_sold_url_base, new_sold_url_base
def all_sold_list_builder(i): def all_sold_list_builder(i):
bases = url_base_builder(search_query) bases = url_base_builder(search_query)
all_sold_url_base = bases[0] all_sold_url_base = bases[0]
all_sold_prices = [] all_sold_prices = []
@ -53,15 +53,18 @@ def main():
# last_page = soup.find(string = re.compile('No Listings Found')) # last_page = soup.find(string = re.compile('No Listings Found'))
for price in soup.find_all('span', {'class': 'p--t--1 fw--bold'}): for price in soup.find_all('span', {'class': 'p--t--1 fw--bold'}):
price = price.get_text() price = price.get_text()
dollar_index = price.find('$') price = re.findall(r'[^\$]\d+', price)[0]
price = price[dollar_index+1:] price = float(price)
space = price.find(' ') # dollar_index = price.find('$')
price = int(price[:space-1]) # price = price[dollar_index+1:]
# space = price.find(' ')
# price = int(price[:space-1])
all_sold_prices.append(price) all_sold_prices.append(price)
# all_sold_prices = list(map(float, re.findall(r'[^\$]\d+[.]\d+', html)))
return all_sold_prices return all_sold_prices
def new_sold_list_builder(i): def new_sold_list_builder(i):
bases = url_base_builder(search_query) bases = url_base_builder(search_query)
new_sold_url_base = bases[1] new_sold_url_base = bases[1]
new_sold_prices = [] new_sold_prices = []
@ -69,24 +72,24 @@ def main():
html = requests.get(url).text html = requests.get(url).text
soup = b(html, "lxml") soup = b(html, "lxml")
# last_page = soup.find(string = re.compile('No Listings Found'))#this is present in all pages that don't have a full 48 listings on them. So you end up with an empty price list becuase of your conditional statement # last_page = soup.find(string = re.compile('No Listings Found'))#this is present in all pages that don't have a full 48 listings on them. So you end up with an empty price list becuase of your conditional statement
# new_sold_prices = list(map(float, re.findall(r'[^\$]\d+[.]\d+', html)))
for price in soup.find_all('span', {'class': 'p--t--1 fw--bold'}): for price in soup.find_all('span', {'class': 'p--t--1 fw--bold'}):
price = price.get_text() price = price.get_text()
dollar_index = price.find('$') price = re.findall(r'[^\$]\d+', price)[0]
price = price[dollar_index+1:] price = float(price)
space = price.find(' ') # dollar_index = price.find('$')
price = int(price[:space-1]) # price = price[dollar_index+1:]
# space = price.find(' ')
# price = int(price[:space-1])
new_sold_prices.append(price) new_sold_prices.append(price)
return new_sold_prices return new_sold_prices
search_query = str(input('Title Search: '))
def main():
start = time.time() start = time.time()
page_list = list(range(1, 5))
all_sold_list = []
new_sold_list = []
with concurrent.futures.ThreadPoolExecutor() as executor: with concurrent.futures.ThreadPoolExecutor() as executor:
for future in executor.map(all_sold_list_builder, page_list): for future in executor.map(all_sold_list_builder, page_list):
all_sold_list.extend(future) all_sold_list.extend(future)
@ -115,7 +118,12 @@ def main():
print('Average New Sold Price', average_new_sold_price, new_sold_results) print('Average New Sold Price', average_new_sold_price, new_sold_results)
print(total_results) print(total_results)
if __name__ == '__main__': if __name__ == '__main__':
page_list = list(range(1, 5))
all_sold_list = []
new_sold_list = []
main() main()

130
posh.py~ Normal file
View File

@ -0,0 +1,130 @@
import requests
from bs4 import BeautifulSoup as b
import time
import re
import concurrent.futures
import numpy as np
# import matplotlib.pyplot as plt
search_query = str(input('Title Search: '))
def url_base_builder(search_query):
genders = ['Men', 'Women']
posh_colors = ['Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple',
'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan']
for i in range(0, len(posh_colors)):
if posh_colors[i] in search_query:
url_color = '&color[]=' + posh_colors[i]
color = posh_colors[i]
break
else:
color = ''
url_color = ''
for i in range(0, len(genders)):
if genders[i] in search_query:
url_gender = '&department=' + genders[i]
gender = genders[i]
break
else:
gender = ''
url_gender = '&department=All'
sq = search_query.replace(color, '').replace(gender, '').replace('NEW', '').replace(' ', '+')
all_sold_url_base = 'https://poshmark.com/search?query=' + sq + \
"&availability=sold_out" + url_color + url_gender + '&max_id='
new_sold_url_base = 'https://poshmark.com/search?query=' + sq + '&availability=sold_out' + \
'&condition=nwt_and_ret' + url_color + url_gender + '&max_id='
return all_sold_url_base, new_sold_url_base
def all_sold_list_builder(i):
bases = url_base_builder(search_query)
all_sold_url_base = bases[0]
all_sold_prices = []
url = all_sold_url_base + str(i)
html = requests.get(url).text
soup = b(html, "lxml")
# last_page = soup.find(string = re.compile('No Listings Found'))
for price in soup.find_all('span', {'class': 'p--t--1 fw--bold'}):
price = price.get_text()
price = re.findall(r'[^\$]\d+', price)[0]
price = float(price)
# dollar_index = price.find('$')
# price = price[dollar_index+1:]
# space = price.find(' ')
# price = int(price[:space-1])
all_sold_prices.append(price)
# all_sold_prices = list(map(float, re.findall(r'[^\$]\d+[.]\d+', html)))
return all_sold_prices
def new_sold_list_builder(i):
bases = url_base_builder(search_query)
new_sold_url_base = bases[1]
new_sold_prices = []
url = new_sold_url_base + str(i)
html = requests.get(url).text
soup = b(html, "lxml")
# last_page = soup.find(string = re.compile('No Listings Found'))#this is present in all pages that don't have a full 48 listings on them. So you end up with an empty price list becuase of your conditional statement
# new_sold_prices = list(map(float, re.findall(r'[^\$]\d+[.]\d+', html)))
for price in soup.find_all('span', {'class': 'p--t--1 fw--bold'}):
price = price.get_text()
price = re.findall(r'[^\$]\d+', price)[0]
price = float(price)
# dollar_index = price.find('$')
# price = price[dollar_index+1:]
# space = price.find(' ')
# price = int(price[:space-1])
new_sold_prices.append(price)
return new_sold_prices
def main():
start = time.time()
with concurrent.futures.ThreadPoolExecutor() as executor:
for future in executor.map(all_sold_list_builder, page_list):
all_sold_list.extend(future)
with concurrent.futures.ThreadPoolExecutor() as executor:
for future in executor.map(new_sold_list_builder, page_list):
new_sold_list.extend(future)# if you can pull the nwt price simultaneously with used then you won't have to use this
for element in new_sold_list:
all_sold_list.remove(element)
used_sold_list = all_sold_list
average_used_sold_price = '$' + str(round(np.mean(used_sold_list), 2))
average_new_sold_price = '$' + str(round(np.mean(new_sold_list), 2))
used_sold_results = str(len(used_sold_list)) + ' Used Results'
new_sold_results = str(len(new_sold_list)) + ' NWT Results'
total_results = str(len(used_sold_list) + len(new_sold_list)) + ' Total Results'
end = time.time()
print(end - start, 'seconds')
print('Average Used Sold Price', average_used_sold_price, used_sold_results)
print('Average New Sold Price', average_new_sold_price, new_sold_results)
print(total_results)
if __name__ == '__main__':
page_list = list(range(1, 5))
all_sold_list = []
new_sold_list = []
main()
'''to speed up the program you can include a few things: 1) only parse the total results and sift for the NWT listings to create a separate NWT list 2) Implement processpoolexecutor to use more than one worker to parse the pages 3) find a better way to find the last page so you don't have to make more requests than necessary. This could be either taking the "smallest" "no listings found" page of the pages while excluding the others after the smallest one is found. Or, determining from the request headers whether a page is worth downloading or not 4) using a while loop in chunks of 2-4 pages to find the last page in conjunction with number 3'''

View File

@ -11,29 +11,53 @@ from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup as b from bs4 import BeautifulSoup as b
import time import time
from selenium.common.exceptions import ElementClickInterceptedException from selenium.common.exceptions import ElementClickInterceptedException
import winsound
browser = webdriver.Firefox(executable_path="C:/Users/unknown/Desktop/projects/geckodriver")
browser = webdriver.Firefox(executable_path="C:/Users/unknown/Desktop/projects/poshmark/geckodriver")
wait = WebDriverWait(browser, 30) wait = WebDriverWait(browser, 30)
browser.get('https://poshmark.com/login') browser.get('https://poshmark.com/login')
usename = browser.find_element_by_id('login_form_username_email') usename = browser.find_element_by_id('login_form_username_email')
usename.send_keys('SpeanutButter') usename.send_keys('SpeanutButter')
psw = browser.find_element_by_id('login_form_password') psw = browser.find_element_by_id('login_form_password')
psw.send_keys('***REMOVED***') psw.send_keys('***REMOVED***')
iframe = 'default value'
def reCaptcha_finder(): # Still not working. Works on the first recaptcha popup but not second one for some reason
global iframe
time.sleep(4)
iframes = browser.find_elements_by_tag_name('iframe')
for frame in iframes:
if not iframe in iframes:
browser.switch_to.frame(frame)
try:
browser.find_element_by_xpath('//label[text()="I\'m not a robot"]')
winsound.Beep(540, 1000)
iframe = frame
input('Complete reCaptcha then press "enter" to continue')
browser.switch_to.default_content()
break
except NoSuchElementException:
browser.switch_to.default_content()
psw.submit() psw.submit()
input('press "enter" to continue')
reCaptcha_finder()
# have to find different element for log in after recaptcha is completed. Do manually for now
wait.until(EC.presence_of_element_located((By.XPATH, "//title[text()='Feed - Poshmark']"))) wait.until(EC.presence_of_element_located((By.XPATH, "//title[text()='Feed - Poshmark']")))
browser.get('https://poshmark.com/closet/speanutbutter?department=Women&sort_by=price_desc') browser.get('https://poshmark.com/closet/speanutbutter?department=Women&sort_by=price_desc')
input('press "enter" to continue') input('press "enter" to continue')
html = browser.page_source html = browser.page_source
soup = b(html) soup = b(html, "lxml")
elm = browser.find_element_by_tag_name('html') elm = browser.find_element_by_tag_name('html')
while not soup.find('i', text = "Not for Sale"): while not soup.find('span', text="Not for Sale"):
elm.send_keys(Keys.END) elm.send_keys(Keys.END)
html = browser.page_source html = browser.page_source
soup = b(html) soup = b(html)
list_titles = soup.find_all('a',{'class':'title'}) list_titles = soup.find_all('a', {'class': 'title'})
active_listings = soup.find_all('i', {'class':"icon share-gray"}) active_listings = soup.find_all('i', {'class': "icon share-gray"})
print(len(active_listings)) print(len(active_listings))
container = browser.find_elements_by_xpath("//div[@id='tiles-con']/div") container = browser.find_elements_by_xpath("//div[@id='tiles-con']/div")
i = -1 i = -1
@ -50,39 +74,29 @@ for divs in container:
pass pass
except NoSuchElementException: except NoSuchElementException:
share = divs.find_element_by_xpath(".//i[@class = 'icon share-gray']") share = divs.find_element_by_xpath(".//i[@class = 'icon share-gray']")
time.sleep(random.uniform(.6,1.2)) time.sleep(random.uniform(.6, 1.2))
try: try:
share.click() share.click()
if soup.find('input', id = "recaptcha-token"): # reCaptcha_finder() # Might only need reCaptcha_finder() here and not afterwards or the other way around. Pay attention to where the recaptcha occurs
input('Finish recapcha and press "enter" to continue')
if soup.find('span', text = "I'm not a robot"):
input('Finish recapcha and press "enter" to continue')
if share_to == 'F': if share_to == 'F':
wait.until(EC.presence_of_element_located((By.XPATH, "//span[text()='To My Followers']"))) wait.until(EC.presence_of_element_located((By.XPATH, "//span[text()='To My Followers']")))
share = browser.find_element_by_xpath("//span[text()='To My Followers']") share = browser.find_element_by_xpath("//span[text()='To My Followers']")
time.sleep(random.uniform(.6,1.2)) time.sleep(random.uniform(.6, 1.2))
share.click() share.click()
reCaptcha_finder()
if soup.find('input', id = "recaptcha-token"):
input('Finish recapcha and press "enter" to continue')
print(i) print(i)
if soup.find('span', text = "I'm not a robot"):
input('Finish recapcha and press "enter" to continue')
title = list_titles[i].get_text() title = list_titles[i].get_text()
print(title) print(title)
if share_to == 'P': if share_to == 'P':
wait.until(EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Happening Now')]"))) wait.until(EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Happening Now')]")))
share = browser.find_element_by_xpath("//*[contains(text(), 'Happening Now')]") share = browser.find_element_by_xpath("//*[contains(text(), 'Happening Now')]")
time.sleep(random.uniform(.6,1.2)) time.sleep(random.uniform(.6, 1.2))
share.click() share.click()
if soup.find('input', id = "recaptcha-token"): reCaptcha_finder()
input('Finish recapcha and press "enter" to continue')
if soup.find('span', text = "I'm not a robot"):
input('Finish recapcha and press "enter" to continue')
print(i) print(i)
title = list_titles[i].get_text() title = list_titles[i].get_text()
print(title) print(title)
except ElementClickInterceptedException: except ElementClickInterceptedException:
pass pass
'''If poshmark lets you browser.get any page then you should skip the pagination loading to load all the pages and then just go through each page and share that way. It wouldn't be such a time consuming process''' # If poshmark lets you browser.get any page then you should skip the pagination loading to load all the pages and then just go through each page and share that way. It wouldn't be such a time consuming process

2
poshlogin_requests.py Normal file
View File

@ -0,0 +1,2 @@
import requests
import

0
psm.py Normal file
View File

Binary file not shown.

141
sharevision.ipynb Normal file
View File

@ -0,0 +1,141 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from selenium import webdriver\n",
"from selenium.webdriver.common.keys import Keys\n",
"from selenium.webdriver.support.select import Select\n",
"from selenium.webdriver.common.by import By\n",
"from selenium.webdriver.support.ui import WebDriverWait\n",
"from selenium.webdriver.support import expected_conditions as EC\n",
"from selenium.webdriver.common.action_chains import ActionChains\n",
"import random\n",
"from selenium.common.exceptions import NoSuchElementException\n",
"from selenium.webdriver.common.keys import Keys\n",
"from bs4 import BeautifulSoup as b\n",
"import time\n",
"from selenium.common.exceptions import ElementClickInterceptedException\n",
"import winsound"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"browser = webdriver.Firefox(executable_path=\"C:/Users/unknown/Desktop/projects/poshmark/geckodriver\")\n",
"wait = WebDriverWait(browser, 30)\n",
"browser.get('https://poshmark.com/login')\n",
"usename = browser.find_element_by_id('login_form_username_email')\n",
"usename.send_keys('SpeanutButter')\n",
"psw = browser.find_element_by_id('login_form_password')\n",
"psw.send_keys('***REMOVED***')\n",
"iframe = 'default value'\n",
"page_number = list(range(1,18))\n",
"base_url = 'https://poshmark.com/closet/speanutbutter?availability=available&sort_by=price_desc&all_size=true&max_id='\n",
"\n",
"def reCaptcha_finder(): # Still not working. Works on the first recaptcha popup but not second one for some reason\n",
" global iframe\n",
" time.sleep(4)\n",
" iframes = browser.find_elements_by_tag_name('iframe')\n",
" for frame in iframes:\n",
" if not iframe in iframes:\n",
" browser.switch_to.frame(frame)\n",
" try:\n",
" browser.find_element_by_xpath('//label[text()=\"I\\'m not a robot\"]')\n",
" winsound.Beep(540, 1000)\n",
" iframe = frame\n",
" input('Complete reCaptcha then press \"enter\" to continue')\n",
" browser.switch_to.default_content()\n",
" break\n",
" except NoSuchElementException:\n",
" browser.switch_to.default_content()\n",
"\n",
"psw.submit()\n",
"\n",
"reCaptcha_finder()\n",
"\n",
"wait.until(EC.presence_of_element_located((By.XPATH, \"//title[text()='Feed - Poshmark']\")))\n",
"\n",
"\n",
"for page in page_number:\n",
" \n",
" browser.get(base_url+str(page))\n",
" input('press \"enter\" to continue')\n",
"\n",
" html = browser.page_source\n",
" soup = b(html, \"lxml\")\n",
"\n",
" list_titles = soup.find_all('a', {'class': 'title'})\n",
" active_listings = soup.find_all('i', {'class': \"icon share-gray\"})\n",
" print(len(active_listings))\n",
" container = browser.find_elements_by_xpath(\"//div[@id='tiles-con']/div\")\n",
" i = -1\n",
" share_to = input('share to followers (F) or share to party (P)?: ')\n",
"\n",
" for divs in container:\n",
" i += 1\n",
" wait.until(EC.presence_of_element_located((By.XPATH, \".//i[@class = 'icon share-gray']\")))\n",
" try:\n",
" if divs.find_element_by_xpath(\".//i[@class = 'icon inventory-tag not-for-sale-tag']\"):\n",
" pass\n",
" except NoSuchElementException:\n",
" try:\n",
" if divs.find_element_by_xpath(\".//i[@class = 'icon inventory-tag sold-tag']\"):\n",
" pass\n",
" except NoSuchElementException:\n",
" share = divs.find_element_by_xpath(\".//i[@class = 'icon share-gray']\")\n",
" time.sleep(random.uniform(.6, 1.2))\n",
" try:\n",
" share.click()\n",
" # reCaptcha_finder() # Might only need reCaptcha_finder() here and not afterwards or the other way around. Pay attention to where the recaptcha occurs\n",
" if share_to == 'F':\n",
" wait.until(EC.presence_of_element_located((By.XPATH, \"//span[text()='To My Followers']\")))\n",
" share = browser.find_element_by_xpath(\"//span[text()='To My Followers']\")\n",
" time.sleep(random.uniform(.6, 1.2))\n",
" share.click()\n",
" reCaptcha_finder()\n",
" print(i)\n",
" title = list_titles[i].get_text()\n",
" print(title)\n",
" if share_to == 'P':\n",
" wait.until(EC.presence_of_element_located((By.XPATH, \"//*[contains(text(), 'Happening Now')]\")))\n",
" share = browser.find_element_by_xpath(\"//*[contains(text(), 'Happening Now')]\")\n",
" time.sleep(random.uniform(.6, 1.2))\n",
" share.click()\n",
" reCaptcha_finder()\n",
" print(i)\n",
" title = list_titles[i].get_text()\n",
" print(title)\n",
" except ElementClickInterceptedException:\n",
" pass\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "scrapers",
"language": "python",
"name": "scrapers"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}