.gitignore updated
This commit is contained in:
parent
869a4613fb
commit
42f6cd6f4e
161
.gitignore
vendored
Normal file
161
.gitignore
vendored
Normal file
@ -0,0 +1,161 @@
|
|||||||
|
# User-defined
|
||||||
|
*.txt
|
||||||
|
*.csv
|
||||||
|
*.yaml
|
||||||
|
config.py
|
||||||
|
passwords.txt
|
||||||
|
training_images/
|
||||||
|
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
# .python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
#Pipfile.lock
|
||||||
|
|
||||||
|
# poetry
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||||
|
#poetry.lock
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
|
||||||
|
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
|
#.idea/
|
||||||
|
|
BIN
.posh.py.swp
Normal file
BIN
.posh.py.swp
Normal file
Binary file not shown.
BIN
.posh.py.un~
Normal file
BIN
.posh.py.un~
Normal file
Binary file not shown.
2
bulkpp
2
bulkpp
@ -1 +1 @@
|
|||||||
Subproject commit 32d3febba22b73b37ac188d84330ae789288c535
|
Subproject commit a25d1b578e93fa63bdc65c02a6c007cbec5b2aa0
|
BIN
chromedriver.exe
Normal file
BIN
chromedriver.exe
Normal file
Binary file not shown.
BIN
geckodriver.exe
Normal file
BIN
geckodriver.exe
Normal file
Binary file not shown.
338
pms.py
338
pms.py
@ -1,166 +1,172 @@
|
|||||||
import seaborn as sns
|
import seaborn as sns
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import re
|
import re
|
||||||
import bs4
|
# import bs4
|
||||||
from bs4 import BeautifulSoup as b
|
from bs4 import BeautifulSoup as b
|
||||||
import requests
|
import requests
|
||||||
import time
|
import time
|
||||||
|
|
||||||
# Initial Search Query URL to Find ItemCondition Code
|
# Initial Search Query URL to Find ItemCondition Code
|
||||||
while True:
|
while True:
|
||||||
print("Title Search:")
|
print("Title Search:")
|
||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
|
||||||
SQ = str(input())
|
SQ = str(input())
|
||||||
SQ_1 = SQ.replace(' ', '+').replace('NEW','').replace('men', '').replace('women', '')
|
SQ_1 = SQ.replace(' ', '+').replace('NEW',
|
||||||
gender = ['Men', 'Women']
|
'').replace('men', '').replace('women', '')
|
||||||
|
gender = ['Men', 'Women']
|
||||||
#&color[]=color
|
|
||||||
posh_colors = ['Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple', 'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan']
|
# &color[]=color
|
||||||
|
posh_colors = ['Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple',
|
||||||
for i in range(0,len(posh_colors)):
|
'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan']
|
||||||
if posh_colors[i] in SQ:
|
|
||||||
url_color = '&color[]=' + posh_colors[i]
|
for i in range(0, len(posh_colors)):
|
||||||
url_separator = "&availability=sold_out" + url_color + "&department=All&max_id="
|
if posh_colors[i] in SQ:
|
||||||
url_separator1 = "&availability=sold_out" + url_color + "&condition=nwt_and_ret&department=All&max_id="
|
url_color = '&color[]=' + posh_colors[i]
|
||||||
SQ_1 = SQ_1.replace(posh_colors[i], '')
|
url_separator = "&availability=sold_out" + url_color + "&department=All&max_id="
|
||||||
break
|
url_separator1 = "&availability=sold_out" + url_color + \
|
||||||
else:
|
"&condition=nwt_and_ret&department=All&max_id="
|
||||||
url_separator = "&availability=sold_out&department=All&max_id="
|
SQ_1 = SQ_1.replace(posh_colors[i], '')
|
||||||
url_separator1 = "&availability=sold_out&condition=nwt_and_ret&department=All&max_id="
|
break
|
||||||
|
else:
|
||||||
prices = []
|
url_separator = "&availability=sold_out&department=All&max_id="
|
||||||
prices1 = []
|
url_separator1 = "&availability=sold_out&condition=nwt_and_ret&department=All&max_id="
|
||||||
base_url = "https://poshmark.com/search?query="
|
|
||||||
|
prices = []
|
||||||
pg = 1
|
prices1 = []
|
||||||
url = base_url + SQ_1 + url_separator + str(pg)
|
base_url = "https://poshmark.com/search?query="
|
||||||
url_1 = base_url + SQ_1 + url_separator1 + str(pg)
|
|
||||||
url_a = base_url + SQ_1 + url_separator + str(pg)
|
pg = 1
|
||||||
url_1b = base_url + SQ_1 + url_separator1 + str(pg)
|
url = base_url + SQ_1 + url_separator + str(pg)
|
||||||
|
url_1 = base_url + SQ_1 + url_separator1 + str(pg)
|
||||||
html = requests.get(url).text
|
url_a = base_url + SQ_1 + url_separator + str(pg)
|
||||||
html1 = requests.get(url_1).text
|
url_1b = base_url + SQ_1 + url_separator1 + str(pg)
|
||||||
soup = b(html, "html.parser")
|
|
||||||
soup1 = b(html1,'html.parser')
|
html = requests.get(url).text
|
||||||
|
html1 = requests.get(url_1).text
|
||||||
# Begin new and used condition items price list:
|
soup = b(html, "lxml")
|
||||||
|
soup1 = b(html1, 'lxml')
|
||||||
for listing in soup.findAll("div", {"class": "item__details"}):
|
|
||||||
price = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text
|
# Begin new and used condition items price list:
|
||||||
indices = price.find('$')
|
|
||||||
price = price[indices+1:]
|
for listing in soup.findAll("div", {"class": "item__details"}):
|
||||||
space = price.find(' ')
|
price = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text
|
||||||
price = int(price[:space-1])
|
indices = price.find('$')
|
||||||
prices.append(price)
|
price = price[indices + 1:]
|
||||||
|
space = price.find(' ')
|
||||||
while True:
|
price = int(price[:space - 1])
|
||||||
last_page = soup.find_all(string = re.compile('No Listings Found'))
|
prices.append(price)
|
||||||
if last_page:
|
|
||||||
break
|
while True:
|
||||||
pg = pg + 1
|
last_page = soup.find_all(string=re.compile('No Listings Found'))
|
||||||
url = base_url + SQ_1 + url_separator + str(pg)
|
if last_page:
|
||||||
html = requests.get(url).text
|
break
|
||||||
soup = b(html, "html.parser")
|
pg = pg + 1
|
||||||
|
url = base_url + SQ_1 + url_separator + str(pg)
|
||||||
|
html = requests.get(url).text
|
||||||
for listing in soup.findAll("div", {"class": "item__details"}):
|
soup = b(html, "lxml")
|
||||||
price = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text
|
|
||||||
#indices = [i for i, dollars in enumerate(price) if dollars == '$']
|
for listing in soup.findAll("div", {"class": "item__details"}):
|
||||||
#price = int(price[1:indices[1]-1])
|
price = listing.findAll(
|
||||||
indices = price.find('$')
|
"span", {"class": "p--t--1 fw--bold"})[0].text
|
||||||
price = price[indices+1:]
|
# indices = [i for i, dollars in enumerate(price) if dollars == '$']
|
||||||
space = price.find(' ')
|
# price = int(price[1:indices[1]-1])
|
||||||
price = int(price[:space-1])
|
indices = price.find('$')
|
||||||
prices.append(price)
|
price = price[indices + 1:]
|
||||||
|
space = price.find(' ')
|
||||||
# Begin new condition item prices list:
|
price = int(price[:space - 1])
|
||||||
|
prices.append(price)
|
||||||
for listing in soup1.findAll("div", {"class": "item__details"}):
|
|
||||||
price1 = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text
|
# Begin new condition item prices list:
|
||||||
#indices = [i for i, dollars in enumerate(price1) if dollars == '$']
|
|
||||||
#price1 = int(price1[1:indices[1]-1])
|
for listing in soup1.findAll("div", {"class": "item__details"}):
|
||||||
indices = price1.find('$')
|
price1 = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text
|
||||||
price1 = price1[indices+1:]
|
# indices = [i for i, dollars in enumerate(price1) if dollars == '$']
|
||||||
space = price1.find(' ')
|
# price1 = int(price1[1:indices[1]-1])
|
||||||
price1 = int(price1[:space-1])
|
indices = price1.find('$')
|
||||||
prices1.append(price1)
|
price1 = price1[indices + 1:]
|
||||||
|
space = price1.find(' ')
|
||||||
while True:
|
price1 = int(price1[:space - 1])
|
||||||
|
prices1.append(price1)
|
||||||
last_page = soup1.find_all(string = re.compile('No Listings Found'))
|
|
||||||
if last_page:
|
while True:
|
||||||
break
|
|
||||||
pg = pg + 1
|
last_page = soup1.find_all(string=re.compile('No Listings Found'))
|
||||||
url_1 = base_url + SQ_1 + url_separator1 + str(pg)
|
if last_page:
|
||||||
html1 = requests.get(url_1).text
|
break
|
||||||
soup1 = b(html1, "html.parser")
|
pg = pg + 1
|
||||||
|
url_1 = base_url + SQ_1 + url_separator1 + str(pg)
|
||||||
|
html1 = requests.get(url_1).text
|
||||||
for listing in soup1.findAll("div", {"class": "item__details"}):
|
soup1 = b(html1, "lxml")
|
||||||
price1 = listing.findAll("span", {"class": "p--t--1 fw--bold"})[0].text
|
|
||||||
#indices = [i for i, dollars in enumerate(price1) if dollars == '$']
|
for listing in soup1.findAll("div", {"class": "item__details"}):
|
||||||
#price1 = int(price1[1:indices[1]-1])
|
price1 = listing.findAll(
|
||||||
indices = price1.find('$')
|
"span", {"class": "p--t--1 fw--bold"})[0].text
|
||||||
price1 = price1[indices+1:]
|
# indices = [i for i, dollars in enumerate(price1) if dollars == '$']
|
||||||
space = price1.find(' ')
|
# price1 = int(price1[1:indices[1]-1])
|
||||||
price1 = int(price1[:space-1])
|
indices = price1.find('$')
|
||||||
prices1.append(price1)
|
price1 = price1[indices + 1:]
|
||||||
|
space = price1.find(' ')
|
||||||
|
price1 = int(price1[:space - 1])
|
||||||
# Begin Element-wise substraction of new condition items price list from new&used items price list:
|
prices1.append(price1)
|
||||||
print(len(prices), 'NEW & USED')
|
|
||||||
print(len(prices1), 'NEW')
|
# Begin Element-wise substraction of new condition items price list from new&used items price list:
|
||||||
|
print(len(prices), 'NEW & USED')
|
||||||
end = time.time()
|
print(len(prices1), 'NEW')
|
||||||
|
|
||||||
print(end - start)
|
end = time.time()
|
||||||
|
|
||||||
for element in prices1:
|
print(end - start)
|
||||||
prices.remove(element)
|
|
||||||
|
for element in prices1:
|
||||||
if 'NEW' in SQ:
|
prices.remove(element)
|
||||||
kde_datapoints = sns.kdeplot(prices1, shade = True).get_lines()[0].get_data()
|
|
||||||
sns.rugplot(prices1)
|
if 'NEW' in SQ:
|
||||||
print(str(len(prices1)) + " Results" + "\n")
|
kde_datapoints = sns.kdeplot(prices1, shade=True).get_lines()[
|
||||||
print("Average Price Sold New = $" + str(np.mean(prices1)) + "\n")
|
0].get_data()
|
||||||
total_price = np.mean(prices1) + 6.79
|
sns.rugplot(prices1)
|
||||||
print("Average Total Price New = $" + str(total_price) + "\n")
|
print(str(len(prices1)) + " Results" + "\n")
|
||||||
print("Flat Rate Shipping = $6.79" + "\n")
|
print("Average Price Sold New = $" + str(np.mean(prices1)) + "\n")
|
||||||
|
total_price = np.mean(prices1) + 6.79
|
||||||
kde_x = kde_datapoints[0]
|
print("Average Total Price New = $" + str(total_price) + "\n")
|
||||||
kde_y = kde_datapoints[1]
|
print("Flat Rate Shipping = $6.79" + "\n")
|
||||||
optimal_price = kde_x[np.argmax(kde_y)]
|
|
||||||
print("Optimal Price New = $" + str(optimal_price) + "\n")
|
kde_x = kde_datapoints[0]
|
||||||
print("Optimal Price Including Shipping New = $" + str(optimal_price + 6.79) + "\n")
|
kde_y = kde_datapoints[1]
|
||||||
print("URL Link (New): " + url_1b + "\n")
|
optimal_price = kde_x[np.argmax(kde_y)]
|
||||||
plt.ylabel('KDE')
|
print("Optimal Price New = $" + str(optimal_price) + "\n")
|
||||||
plt.xlabel('Price ($)')
|
print("Optimal Price Including Shipping New = $" +
|
||||||
plt.show()
|
str(optimal_price + 6.79) + "\n")
|
||||||
else:
|
print("URL Link (New): " + url_1b + "\n")
|
||||||
try:
|
plt.ylabel('KDE')
|
||||||
|
plt.xlabel('Price ($)')
|
||||||
kde_datapoints = sns.kdeplot(prices, shade = True).get_lines()[0].get_data()
|
plt.show()
|
||||||
sns.rugplot(prices)
|
else:
|
||||||
print(str(len(prices)) + " Results" + "\n")
|
try:
|
||||||
print("Average Price Sold Used = $" + str(np.mean(prices)) + "\n")
|
|
||||||
total_price = np.mean(prices) + 6.79
|
kde_datapoints = sns.kdeplot(prices, shade=True).get_lines()[
|
||||||
print("Average Total Price Used = $" + str(total_price) + "\n")
|
0].get_data()
|
||||||
print("Flat Rate Shipping = $6.79" + "\n")
|
sns.rugplot(prices)
|
||||||
import winsound
|
print(str(len(prices)) + " Results" + "\n")
|
||||||
winsound.Beep(440, 300)
|
print("Average Price Sold Used = $" + str(np.mean(prices)) + "\n")
|
||||||
|
total_price = np.mean(prices) + 6.79
|
||||||
kde_x = kde_datapoints[0]
|
print("Average Total Price Used = $" + str(total_price) + "\n")
|
||||||
kde_y = kde_datapoints[1]
|
print("Flat Rate Shipping = $6.79" + "\n")
|
||||||
optimal_price = kde_x[np.argmax(kde_y)]
|
import winsound
|
||||||
print("Optimal Price Used = $" + str(optimal_price) + "\n")
|
winsound.Beep(440, 300)
|
||||||
print("Optimal Price Including Shipping Used = $" + str(optimal_price + 6.79) + "\n")
|
|
||||||
print("URL Link: " + url_a + "\n")
|
kde_x = kde_datapoints[0]
|
||||||
plt.ylabel('KDE')
|
kde_y = kde_datapoints[1]
|
||||||
plt.xlabel('Price ($)')
|
optimal_price = kde_x[np.argmax(kde_y)]
|
||||||
plt.show()
|
print("Optimal Price Used = $" + str(optimal_price) + "\n")
|
||||||
except IndexError:
|
print("Optimal Price Including Shipping Used = $" +
|
||||||
print('\n' + '0 results' + '\n')
|
str(optimal_price + 6.79) + "\n")
|
||||||
pass
|
print("URL Link: " + url_a + "\n")
|
||||||
|
plt.ylabel('KDE')
|
||||||
|
plt.xlabel('Price ($)')
|
||||||
|
plt.show()
|
||||||
|
except IndexError:
|
||||||
|
print('\n' + '0 results' + '\n')
|
||||||
|
pass
|
||||||
|
252
posh.py
252
posh.py
@ -1,122 +1,130 @@
|
|||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup as b
|
from bs4 import BeautifulSoup as b
|
||||||
import time
|
import time
|
||||||
import re
|
import re
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import numpy as np
|
import numpy as np
|
||||||
# import matplotlib.pyplot as plt
|
# import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
search_query = str(input('Title Search: '))
|
||||||
def main():
|
|
||||||
|
|
||||||
def url_base_builder(search_query):
|
def url_base_builder(search_query):
|
||||||
genders = ['Men', 'Women']
|
genders = ['Men', 'Women']
|
||||||
|
|
||||||
posh_colors = ['Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple',
|
posh_colors = ['Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple',
|
||||||
'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan']
|
'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan']
|
||||||
|
|
||||||
for i in range(0, len(posh_colors)):
|
for i in range(0, len(posh_colors)):
|
||||||
if posh_colors[i] in search_query:
|
if posh_colors[i] in search_query:
|
||||||
url_color = '&color[]=' + posh_colors[i]
|
url_color = '&color[]=' + posh_colors[i]
|
||||||
color = posh_colors[i]
|
color = posh_colors[i]
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
color = ''
|
color = ''
|
||||||
url_color = ''
|
url_color = ''
|
||||||
for i in range(0, len(genders)):
|
for i in range(0, len(genders)):
|
||||||
if genders[i] in search_query:
|
if genders[i] in search_query:
|
||||||
url_gender = '&department=' + genders[i]
|
url_gender = '&department=' + genders[i]
|
||||||
gender = genders[i]
|
gender = genders[i]
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
gender = ''
|
gender = ''
|
||||||
url_gender = '&department=All'
|
url_gender = '&department=All'
|
||||||
|
|
||||||
sq = search_query.replace(color, '').replace(gender, '').replace('NEW', '').replace(' ', '+')
|
sq = search_query.replace(color, '').replace(gender, '').replace('NEW', '').replace(' ', '+')
|
||||||
|
|
||||||
all_sold_url_base = 'https://poshmark.com/search?query=' + sq + \
|
all_sold_url_base = 'https://poshmark.com/search?query=' + sq + \
|
||||||
"&availability=sold_out" + url_color + url_gender + '&max_id='
|
"&availability=sold_out" + url_color + url_gender + '&max_id='
|
||||||
|
|
||||||
new_sold_url_base = 'https://poshmark.com/search?query=' + sq + '&availability=sold_out' + \
|
new_sold_url_base = 'https://poshmark.com/search?query=' + sq + '&availability=sold_out' + \
|
||||||
'&condition=nwt_and_ret' + url_color + url_gender + '&max_id='
|
'&condition=nwt_and_ret' + url_color + url_gender + '&max_id='
|
||||||
|
|
||||||
return all_sold_url_base, new_sold_url_base
|
return all_sold_url_base, new_sold_url_base
|
||||||
|
|
||||||
|
|
||||||
def all_sold_list_builder(i):
|
def all_sold_list_builder(i):
|
||||||
bases = url_base_builder(search_query)
|
bases = url_base_builder(search_query)
|
||||||
all_sold_url_base = bases[0]
|
all_sold_url_base = bases[0]
|
||||||
all_sold_prices = []
|
all_sold_prices = []
|
||||||
url = all_sold_url_base + str(i)
|
url = all_sold_url_base + str(i)
|
||||||
html = requests.get(url).text
|
html = requests.get(url).text
|
||||||
soup = b(html, "lxml")
|
soup = b(html, "lxml")
|
||||||
# last_page = soup.find(string = re.compile('No Listings Found'))
|
# last_page = soup.find(string = re.compile('No Listings Found'))
|
||||||
for price in soup.find_all('span', {'class': 'p--t--1 fw--bold'}):
|
for price in soup.find_all('span', {'class': 'p--t--1 fw--bold'}):
|
||||||
price = price.get_text()
|
price = price.get_text()
|
||||||
dollar_index = price.find('$')
|
price = re.findall(r'[^\$]\d+', price)[0]
|
||||||
price = price[dollar_index+1:]
|
price = float(price)
|
||||||
space = price.find(' ')
|
# dollar_index = price.find('$')
|
||||||
price = int(price[:space-1])
|
# price = price[dollar_index+1:]
|
||||||
all_sold_prices.append(price)
|
# space = price.find(' ')
|
||||||
return all_sold_prices
|
# price = int(price[:space-1])
|
||||||
|
all_sold_prices.append(price)
|
||||||
|
# all_sold_prices = list(map(float, re.findall(r'[^\$]\d+[.]\d+', html)))
|
||||||
def new_sold_list_builder(i):
|
return all_sold_prices
|
||||||
bases = url_base_builder(search_query)
|
|
||||||
new_sold_url_base = bases[1]
|
|
||||||
new_sold_prices = []
|
def new_sold_list_builder(i):
|
||||||
url = new_sold_url_base + str(i)
|
bases = url_base_builder(search_query)
|
||||||
html = requests.get(url).text
|
new_sold_url_base = bases[1]
|
||||||
soup = b(html, "lxml")
|
new_sold_prices = []
|
||||||
# last_page = soup.find(string = re.compile('No Listings Found'))#this is present in all pages that don't have a full 48 listings on them. So you end up with an empty price list becuase of your conditional statement
|
url = new_sold_url_base + str(i)
|
||||||
|
html = requests.get(url).text
|
||||||
for price in soup.find_all('span', {'class': 'p--t--1 fw--bold'}):
|
soup = b(html, "lxml")
|
||||||
price = price.get_text()
|
# last_page = soup.find(string = re.compile('No Listings Found'))#this is present in all pages that don't have a full 48 listings on them. So you end up with an empty price list becuase of your conditional statement
|
||||||
dollar_index = price.find('$')
|
# new_sold_prices = list(map(float, re.findall(r'[^\$]\d+[.]\d+', html)))
|
||||||
price = price[dollar_index+1:]
|
|
||||||
space = price.find(' ')
|
for price in soup.find_all('span', {'class': 'p--t--1 fw--bold'}):
|
||||||
price = int(price[:space-1])
|
price = price.get_text()
|
||||||
new_sold_prices.append(price)
|
price = re.findall(r'[^\$]\d+', price)[0]
|
||||||
return new_sold_prices
|
price = float(price)
|
||||||
|
# dollar_index = price.find('$')
|
||||||
search_query = str(input('Title Search: '))
|
# price = price[dollar_index+1:]
|
||||||
|
# space = price.find(' ')
|
||||||
start = time.time()
|
# price = int(price[:space-1])
|
||||||
|
new_sold_prices.append(price)
|
||||||
page_list = list(range(1, 5))
|
return new_sold_prices
|
||||||
all_sold_list = []
|
|
||||||
new_sold_list = []
|
|
||||||
|
def main():
|
||||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
||||||
for future in executor.map(all_sold_list_builder, page_list):
|
start = time.time()
|
||||||
all_sold_list.extend(future)
|
|
||||||
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
for future in executor.map(all_sold_list_builder, page_list):
|
||||||
for future in executor.map(new_sold_list_builder, page_list):
|
all_sold_list.extend(future)
|
||||||
new_sold_list.extend(future)# if you can pull the nwt price simultaneously with used then you won't have to use this
|
|
||||||
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||||
|
for future in executor.map(new_sold_list_builder, page_list):
|
||||||
for element in new_sold_list:
|
new_sold_list.extend(future)# if you can pull the nwt price simultaneously with used then you won't have to use this
|
||||||
all_sold_list.remove(element)
|
|
||||||
used_sold_list = all_sold_list
|
|
||||||
|
for element in new_sold_list:
|
||||||
average_used_sold_price = '$' + str(round(np.mean(used_sold_list), 2))
|
all_sold_list.remove(element)
|
||||||
average_new_sold_price = '$' + str(round(np.mean(new_sold_list), 2))
|
used_sold_list = all_sold_list
|
||||||
|
|
||||||
used_sold_results = str(len(used_sold_list)) + ' Used Results'
|
average_used_sold_price = '$' + str(round(np.mean(used_sold_list), 2))
|
||||||
new_sold_results = str(len(new_sold_list)) + ' NWT Results'
|
average_new_sold_price = '$' + str(round(np.mean(new_sold_list), 2))
|
||||||
total_results = str(len(used_sold_list) + len(new_sold_list)) + ' Total Results'
|
|
||||||
|
used_sold_results = str(len(used_sold_list)) + ' Used Results'
|
||||||
end = time.time()
|
new_sold_results = str(len(new_sold_list)) + ' NWT Results'
|
||||||
|
total_results = str(len(used_sold_list) + len(new_sold_list)) + ' Total Results'
|
||||||
print(end - start, 'seconds')
|
|
||||||
|
end = time.time()
|
||||||
print('Average Used Sold Price', average_used_sold_price, used_sold_results)
|
|
||||||
print('Average New Sold Price', average_new_sold_price, new_sold_results)
|
print(end - start, 'seconds')
|
||||||
print(total_results)
|
|
||||||
|
print('Average Used Sold Price', average_used_sold_price, used_sold_results)
|
||||||
if __name__ == '__main__':
|
print('Average New Sold Price', average_new_sold_price, new_sold_results)
|
||||||
main()
|
print(total_results)
|
||||||
|
|
||||||
|
|
||||||
'''to speed up the program you can include a few things: 1) only parse the total results and sift for the NWT listings to create a separate NWT list 2) Implement processpoolexecutor to use more than one worker to parse the pages 3) find a better way to find the last page so you don't have to make more requests than necessary. This could be either taking the "smallest" "no listings found" page of the pages while excluding the others after the smallest one is found. Or, determining from the request headers whether a page is worth downloading or not 4) using a while loop in chunks of 2-4 pages to find the last page in conjunction with number 3'''
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
page_list = list(range(1, 5))
|
||||||
|
all_sold_list = []
|
||||||
|
new_sold_list = []
|
||||||
|
main()
|
||||||
|
|
||||||
|
|
||||||
|
'''to speed up the program you can include a few things: 1) only parse the total results and sift for the NWT listings to create a separate NWT list 2) Implement processpoolexecutor to use more than one worker to parse the pages 3) find a better way to find the last page so you don't have to make more requests than necessary. This could be either taking the "smallest" "no listings found" page of the pages while excluding the others after the smallest one is found. Or, determining from the request headers whether a page is worth downloading or not 4) using a while loop in chunks of 2-4 pages to find the last page in conjunction with number 3'''
|
||||||
|
130
posh.py~
Normal file
130
posh.py~
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup as b
|
||||||
|
import time
|
||||||
|
import re
|
||||||
|
import concurrent.futures
|
||||||
|
import numpy as np
|
||||||
|
# import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
search_query = str(input('Title Search: '))
|
||||||
|
|
||||||
|
|
||||||
|
def url_base_builder(search_query):
|
||||||
|
genders = ['Men', 'Women']
|
||||||
|
|
||||||
|
posh_colors = ['Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple',
|
||||||
|
'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan']
|
||||||
|
|
||||||
|
for i in range(0, len(posh_colors)):
|
||||||
|
if posh_colors[i] in search_query:
|
||||||
|
url_color = '&color[]=' + posh_colors[i]
|
||||||
|
color = posh_colors[i]
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
color = ''
|
||||||
|
url_color = ''
|
||||||
|
for i in range(0, len(genders)):
|
||||||
|
if genders[i] in search_query:
|
||||||
|
url_gender = '&department=' + genders[i]
|
||||||
|
gender = genders[i]
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
gender = ''
|
||||||
|
url_gender = '&department=All'
|
||||||
|
|
||||||
|
sq = search_query.replace(color, '').replace(gender, '').replace('NEW', '').replace(' ', '+')
|
||||||
|
|
||||||
|
all_sold_url_base = 'https://poshmark.com/search?query=' + sq + \
|
||||||
|
"&availability=sold_out" + url_color + url_gender + '&max_id='
|
||||||
|
|
||||||
|
new_sold_url_base = 'https://poshmark.com/search?query=' + sq + '&availability=sold_out' + \
|
||||||
|
'&condition=nwt_and_ret' + url_color + url_gender + '&max_id='
|
||||||
|
|
||||||
|
return all_sold_url_base, new_sold_url_base
|
||||||
|
|
||||||
|
|
||||||
|
def all_sold_list_builder(i):
|
||||||
|
bases = url_base_builder(search_query)
|
||||||
|
all_sold_url_base = bases[0]
|
||||||
|
all_sold_prices = []
|
||||||
|
url = all_sold_url_base + str(i)
|
||||||
|
html = requests.get(url).text
|
||||||
|
soup = b(html, "lxml")
|
||||||
|
# last_page = soup.find(string = re.compile('No Listings Found'))
|
||||||
|
for price in soup.find_all('span', {'class': 'p--t--1 fw--bold'}):
|
||||||
|
price = price.get_text()
|
||||||
|
price = re.findall(r'[^\$]\d+', price)[0]
|
||||||
|
price = float(price)
|
||||||
|
# dollar_index = price.find('$')
|
||||||
|
# price = price[dollar_index+1:]
|
||||||
|
# space = price.find(' ')
|
||||||
|
# price = int(price[:space-1])
|
||||||
|
all_sold_prices.append(price)
|
||||||
|
# all_sold_prices = list(map(float, re.findall(r'[^\$]\d+[.]\d+', html)))
|
||||||
|
return all_sold_prices
|
||||||
|
|
||||||
|
|
||||||
|
def new_sold_list_builder(i):
|
||||||
|
bases = url_base_builder(search_query)
|
||||||
|
new_sold_url_base = bases[1]
|
||||||
|
new_sold_prices = []
|
||||||
|
url = new_sold_url_base + str(i)
|
||||||
|
html = requests.get(url).text
|
||||||
|
soup = b(html, "lxml")
|
||||||
|
# last_page = soup.find(string = re.compile('No Listings Found'))#this is present in all pages that don't have a full 48 listings on them. So you end up with an empty price list becuase of your conditional statement
|
||||||
|
# new_sold_prices = list(map(float, re.findall(r'[^\$]\d+[.]\d+', html)))
|
||||||
|
|
||||||
|
for price in soup.find_all('span', {'class': 'p--t--1 fw--bold'}):
|
||||||
|
price = price.get_text()
|
||||||
|
price = re.findall(r'[^\$]\d+', price)[0]
|
||||||
|
price = float(price)
|
||||||
|
# dollar_index = price.find('$')
|
||||||
|
# price = price[dollar_index+1:]
|
||||||
|
# space = price.find(' ')
|
||||||
|
# price = int(price[:space-1])
|
||||||
|
new_sold_prices.append(price)
|
||||||
|
return new_sold_prices
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
|
||||||
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||||
|
for future in executor.map(all_sold_list_builder, page_list):
|
||||||
|
all_sold_list.extend(future)
|
||||||
|
|
||||||
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||||
|
for future in executor.map(new_sold_list_builder, page_list):
|
||||||
|
new_sold_list.extend(future)# if you can pull the nwt price simultaneously with used then you won't have to use this
|
||||||
|
|
||||||
|
|
||||||
|
for element in new_sold_list:
|
||||||
|
all_sold_list.remove(element)
|
||||||
|
used_sold_list = all_sold_list
|
||||||
|
|
||||||
|
average_used_sold_price = '$' + str(round(np.mean(used_sold_list), 2))
|
||||||
|
average_new_sold_price = '$' + str(round(np.mean(new_sold_list), 2))
|
||||||
|
|
||||||
|
used_sold_results = str(len(used_sold_list)) + ' Used Results'
|
||||||
|
new_sold_results = str(len(new_sold_list)) + ' NWT Results'
|
||||||
|
total_results = str(len(used_sold_list) + len(new_sold_list)) + ' Total Results'
|
||||||
|
|
||||||
|
end = time.time()
|
||||||
|
|
||||||
|
print(end - start, 'seconds')
|
||||||
|
|
||||||
|
print('Average Used Sold Price', average_used_sold_price, used_sold_results)
|
||||||
|
print('Average New Sold Price', average_new_sold_price, new_sold_results)
|
||||||
|
print(total_results)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
page_list = list(range(1, 5))
|
||||||
|
all_sold_list = []
|
||||||
|
new_sold_list = []
|
||||||
|
main()
|
||||||
|
|
||||||
|
|
||||||
|
'''to speed up the program you can include a few things: 1) only parse the total results and sift for the NWT listings to create a separate NWT list 2) Implement processpoolexecutor to use more than one worker to parse the pages 3) find a better way to find the last page so you don't have to make more requests than necessary. This could be either taking the "smallest" "no listings found" page of the pages while excluding the others after the smallest one is found. Or, determining from the request headers whether a page is worth downloading or not 4) using a while loop in chunks of 2-4 pages to find the last page in conjunction with number 3'''
|
190
poshare.py
190
poshare.py
@ -1,88 +1,102 @@
|
|||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
from selenium.webdriver.common.keys import Keys
|
from selenium.webdriver.common.keys import Keys
|
||||||
from selenium.webdriver.support.select import Select
|
from selenium.webdriver.support.select import Select
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common.by import By
|
||||||
from selenium.webdriver.support.ui import WebDriverWait
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
from selenium.webdriver.common.action_chains import ActionChains
|
from selenium.webdriver.common.action_chains import ActionChains
|
||||||
import random
|
import random
|
||||||
from selenium.common.exceptions import NoSuchElementException
|
from selenium.common.exceptions import NoSuchElementException
|
||||||
from selenium.webdriver.common.keys import Keys
|
from selenium.webdriver.common.keys import Keys
|
||||||
from bs4 import BeautifulSoup as b
|
from bs4 import BeautifulSoup as b
|
||||||
import time
|
import time
|
||||||
from selenium.common.exceptions import ElementClickInterceptedException
|
from selenium.common.exceptions import ElementClickInterceptedException
|
||||||
|
import winsound
|
||||||
browser = webdriver.Firefox(executable_path="C:/Users/unknown/Desktop/projects/geckodriver")
|
|
||||||
wait = WebDriverWait(browser, 30)
|
|
||||||
browser.get('https://poshmark.com/login')
|
browser = webdriver.Firefox(executable_path="C:/Users/unknown/Desktop/projects/poshmark/geckodriver")
|
||||||
usename = browser.find_element_by_id('login_form_username_email')
|
wait = WebDriverWait(browser, 30)
|
||||||
usename.send_keys('SpeanutButter')
|
browser.get('https://poshmark.com/login')
|
||||||
psw = browser.find_element_by_id('login_form_password')
|
usename = browser.find_element_by_id('login_form_username_email')
|
||||||
psw.send_keys('***REMOVED***')
|
usename.send_keys('SpeanutButter')
|
||||||
psw.submit()
|
psw = browser.find_element_by_id('login_form_password')
|
||||||
input('press "enter" to continue')
|
psw.send_keys('***REMOVED***')
|
||||||
wait.until(EC.presence_of_element_located((By.XPATH, "//title[text()='Feed - Poshmark']")))
|
iframe = 'default value'
|
||||||
browser.get('https://poshmark.com/closet/speanutbutter?department=Women&sort_by=price_desc')
|
|
||||||
input('press "enter" to continue')
|
def reCaptcha_finder(): # Still not working. Works on the first recaptcha popup but not second one for some reason
|
||||||
|
global iframe
|
||||||
html = browser.page_source
|
time.sleep(4)
|
||||||
soup = b(html)
|
iframes = browser.find_elements_by_tag_name('iframe')
|
||||||
elm = browser.find_element_by_tag_name('html')
|
for frame in iframes:
|
||||||
while not soup.find('i', text = "Not for Sale"):
|
if not iframe in iframes:
|
||||||
elm.send_keys(Keys.END)
|
browser.switch_to.frame(frame)
|
||||||
html = browser.page_source
|
try:
|
||||||
soup = b(html)
|
browser.find_element_by_xpath('//label[text()="I\'m not a robot"]')
|
||||||
list_titles = soup.find_all('a',{'class':'title'})
|
winsound.Beep(540, 1000)
|
||||||
active_listings = soup.find_all('i', {'class':"icon share-gray"})
|
iframe = frame
|
||||||
print(len(active_listings))
|
input('Complete reCaptcha then press "enter" to continue')
|
||||||
container = browser.find_elements_by_xpath("//div[@id='tiles-con']/div")
|
browser.switch_to.default_content()
|
||||||
i = -1
|
break
|
||||||
share_to = input('share to followers (F) or share to party (P)?: ')
|
except NoSuchElementException:
|
||||||
for divs in container:
|
browser.switch_to.default_content()
|
||||||
i += 1
|
|
||||||
wait.until(EC.presence_of_element_located((By.XPATH, ".//i[@class = 'icon share-gray']")))
|
psw.submit()
|
||||||
try:
|
|
||||||
if divs.find_element_by_xpath(".//i[@class = 'icon inventory-tag not-for-sale-tag']"):
|
reCaptcha_finder()
|
||||||
pass
|
# have to find different element for log in after recaptcha is completed. Do manually for now
|
||||||
except NoSuchElementException:
|
|
||||||
try:
|
wait.until(EC.presence_of_element_located((By.XPATH, "//title[text()='Feed - Poshmark']")))
|
||||||
if divs.find_element_by_xpath(".//i[@class = 'icon inventory-tag sold-tag']"):
|
browser.get('https://poshmark.com/closet/speanutbutter?department=Women&sort_by=price_desc')
|
||||||
pass
|
input('press "enter" to continue')
|
||||||
except NoSuchElementException:
|
|
||||||
share = divs.find_element_by_xpath(".//i[@class = 'icon share-gray']")
|
html = browser.page_source
|
||||||
time.sleep(random.uniform(.6,1.2))
|
soup = b(html, "lxml")
|
||||||
try:
|
elm = browser.find_element_by_tag_name('html')
|
||||||
share.click()
|
while not soup.find('span', text="Not for Sale"):
|
||||||
if soup.find('input', id = "recaptcha-token"):
|
elm.send_keys(Keys.END)
|
||||||
input('Finish recapcha and press "enter" to continue')
|
html = browser.page_source
|
||||||
if soup.find('span', text = "I'm not a robot"):
|
soup = b(html)
|
||||||
input('Finish recapcha and press "enter" to continue')
|
list_titles = soup.find_all('a', {'class': 'title'})
|
||||||
if share_to == 'F':
|
active_listings = soup.find_all('i', {'class': "icon share-gray"})
|
||||||
wait.until(EC.presence_of_element_located((By.XPATH, "//span[text()='To My Followers']")))
|
print(len(active_listings))
|
||||||
share = browser.find_element_by_xpath("//span[text()='To My Followers']")
|
container = browser.find_elements_by_xpath("//div[@id='tiles-con']/div")
|
||||||
time.sleep(random.uniform(.6,1.2))
|
i = -1
|
||||||
share.click()
|
share_to = input('share to followers (F) or share to party (P)?: ')
|
||||||
|
for divs in container:
|
||||||
if soup.find('input', id = "recaptcha-token"):
|
i += 1
|
||||||
input('Finish recapcha and press "enter" to continue')
|
wait.until(EC.presence_of_element_located((By.XPATH, ".//i[@class = 'icon share-gray']")))
|
||||||
print(i)
|
try:
|
||||||
if soup.find('span', text = "I'm not a robot"):
|
if divs.find_element_by_xpath(".//i[@class = 'icon inventory-tag not-for-sale-tag']"):
|
||||||
input('Finish recapcha and press "enter" to continue')
|
pass
|
||||||
title = list_titles[i].get_text()
|
except NoSuchElementException:
|
||||||
print(title)
|
try:
|
||||||
if share_to == 'P':
|
if divs.find_element_by_xpath(".//i[@class = 'icon inventory-tag sold-tag']"):
|
||||||
wait.until(EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Happening Now')]")))
|
pass
|
||||||
share = browser.find_element_by_xpath("//*[contains(text(), 'Happening Now')]")
|
except NoSuchElementException:
|
||||||
time.sleep(random.uniform(.6,1.2))
|
share = divs.find_element_by_xpath(".//i[@class = 'icon share-gray']")
|
||||||
share.click()
|
time.sleep(random.uniform(.6, 1.2))
|
||||||
if soup.find('input', id = "recaptcha-token"):
|
try:
|
||||||
input('Finish recapcha and press "enter" to continue')
|
share.click()
|
||||||
if soup.find('span', text = "I'm not a robot"):
|
# reCaptcha_finder() # Might only need reCaptcha_finder() here and not afterwards or the other way around. Pay attention to where the recaptcha occurs
|
||||||
input('Finish recapcha and press "enter" to continue')
|
if share_to == 'F':
|
||||||
print(i)
|
wait.until(EC.presence_of_element_located((By.XPATH, "//span[text()='To My Followers']")))
|
||||||
title = list_titles[i].get_text()
|
share = browser.find_element_by_xpath("//span[text()='To My Followers']")
|
||||||
print(title)
|
time.sleep(random.uniform(.6, 1.2))
|
||||||
except ElementClickInterceptedException:
|
share.click()
|
||||||
pass
|
reCaptcha_finder()
|
||||||
|
print(i)
|
||||||
'''If poshmark lets you browser.get any page then you should skip the pagination loading to load all the pages and then just go through each page and share that way. It wouldn't be such a time consuming process'''
|
title = list_titles[i].get_text()
|
||||||
|
print(title)
|
||||||
|
if share_to == 'P':
|
||||||
|
wait.until(EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Happening Now')]")))
|
||||||
|
share = browser.find_element_by_xpath("//*[contains(text(), 'Happening Now')]")
|
||||||
|
time.sleep(random.uniform(.6, 1.2))
|
||||||
|
share.click()
|
||||||
|
reCaptcha_finder()
|
||||||
|
print(i)
|
||||||
|
title = list_titles[i].get_text()
|
||||||
|
print(title)
|
||||||
|
except ElementClickInterceptedException:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# If poshmark lets you browser.get any page then you should skip the pagination loading to load all the pages and then just go through each page and share that way. It wouldn't be such a time consuming process
|
2
poshlogin_requests.py
Normal file
2
poshlogin_requests.py
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
import requests
|
||||||
|
import
|
BIN
requirements.txt
BIN
requirements.txt
Binary file not shown.
141
sharevision.ipynb
Normal file
141
sharevision.ipynb
Normal file
@ -0,0 +1,141 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from selenium import webdriver\n",
|
||||||
|
"from selenium.webdriver.common.keys import Keys\n",
|
||||||
|
"from selenium.webdriver.support.select import Select\n",
|
||||||
|
"from selenium.webdriver.common.by import By\n",
|
||||||
|
"from selenium.webdriver.support.ui import WebDriverWait\n",
|
||||||
|
"from selenium.webdriver.support import expected_conditions as EC\n",
|
||||||
|
"from selenium.webdriver.common.action_chains import ActionChains\n",
|
||||||
|
"import random\n",
|
||||||
|
"from selenium.common.exceptions import NoSuchElementException\n",
|
||||||
|
"from selenium.webdriver.common.keys import Keys\n",
|
||||||
|
"from bs4 import BeautifulSoup as b\n",
|
||||||
|
"import time\n",
|
||||||
|
"from selenium.common.exceptions import ElementClickInterceptedException\n",
|
||||||
|
"import winsound"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"browser = webdriver.Firefox(executable_path=\"C:/Users/unknown/Desktop/projects/poshmark/geckodriver\")\n",
|
||||||
|
"wait = WebDriverWait(browser, 30)\n",
|
||||||
|
"browser.get('https://poshmark.com/login')\n",
|
||||||
|
"usename = browser.find_element_by_id('login_form_username_email')\n",
|
||||||
|
"usename.send_keys('SpeanutButter')\n",
|
||||||
|
"psw = browser.find_element_by_id('login_form_password')\n",
|
||||||
|
"psw.send_keys('***REMOVED***')\n",
|
||||||
|
"iframe = 'default value'\n",
|
||||||
|
"page_number = list(range(1,18))\n",
|
||||||
|
"base_url = 'https://poshmark.com/closet/speanutbutter?availability=available&sort_by=price_desc&all_size=true&max_id='\n",
|
||||||
|
"\n",
|
||||||
|
"def reCaptcha_finder(): # Still not working. Works on the first recaptcha popup but not second one for some reason\n",
|
||||||
|
" global iframe\n",
|
||||||
|
" time.sleep(4)\n",
|
||||||
|
" iframes = browser.find_elements_by_tag_name('iframe')\n",
|
||||||
|
" for frame in iframes:\n",
|
||||||
|
" if not iframe in iframes:\n",
|
||||||
|
" browser.switch_to.frame(frame)\n",
|
||||||
|
" try:\n",
|
||||||
|
" browser.find_element_by_xpath('//label[text()=\"I\\'m not a robot\"]')\n",
|
||||||
|
" winsound.Beep(540, 1000)\n",
|
||||||
|
" iframe = frame\n",
|
||||||
|
" input('Complete reCaptcha then press \"enter\" to continue')\n",
|
||||||
|
" browser.switch_to.default_content()\n",
|
||||||
|
" break\n",
|
||||||
|
" except NoSuchElementException:\n",
|
||||||
|
" browser.switch_to.default_content()\n",
|
||||||
|
"\n",
|
||||||
|
"psw.submit()\n",
|
||||||
|
"\n",
|
||||||
|
"reCaptcha_finder()\n",
|
||||||
|
"\n",
|
||||||
|
"wait.until(EC.presence_of_element_located((By.XPATH, \"//title[text()='Feed - Poshmark']\")))\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"for page in page_number:\n",
|
||||||
|
" \n",
|
||||||
|
" browser.get(base_url+str(page))\n",
|
||||||
|
" input('press \"enter\" to continue')\n",
|
||||||
|
"\n",
|
||||||
|
" html = browser.page_source\n",
|
||||||
|
" soup = b(html, \"lxml\")\n",
|
||||||
|
"\n",
|
||||||
|
" list_titles = soup.find_all('a', {'class': 'title'})\n",
|
||||||
|
" active_listings = soup.find_all('i', {'class': \"icon share-gray\"})\n",
|
||||||
|
" print(len(active_listings))\n",
|
||||||
|
" container = browser.find_elements_by_xpath(\"//div[@id='tiles-con']/div\")\n",
|
||||||
|
" i = -1\n",
|
||||||
|
" share_to = input('share to followers (F) or share to party (P)?: ')\n",
|
||||||
|
"\n",
|
||||||
|
" for divs in container:\n",
|
||||||
|
" i += 1\n",
|
||||||
|
" wait.until(EC.presence_of_element_located((By.XPATH, \".//i[@class = 'icon share-gray']\")))\n",
|
||||||
|
" try:\n",
|
||||||
|
" if divs.find_element_by_xpath(\".//i[@class = 'icon inventory-tag not-for-sale-tag']\"):\n",
|
||||||
|
" pass\n",
|
||||||
|
" except NoSuchElementException:\n",
|
||||||
|
" try:\n",
|
||||||
|
" if divs.find_element_by_xpath(\".//i[@class = 'icon inventory-tag sold-tag']\"):\n",
|
||||||
|
" pass\n",
|
||||||
|
" except NoSuchElementException:\n",
|
||||||
|
" share = divs.find_element_by_xpath(\".//i[@class = 'icon share-gray']\")\n",
|
||||||
|
" time.sleep(random.uniform(.6, 1.2))\n",
|
||||||
|
" try:\n",
|
||||||
|
" share.click()\n",
|
||||||
|
" # reCaptcha_finder() # Might only need reCaptcha_finder() here and not afterwards or the other way around. Pay attention to where the recaptcha occurs\n",
|
||||||
|
" if share_to == 'F':\n",
|
||||||
|
" wait.until(EC.presence_of_element_located((By.XPATH, \"//span[text()='To My Followers']\")))\n",
|
||||||
|
" share = browser.find_element_by_xpath(\"//span[text()='To My Followers']\")\n",
|
||||||
|
" time.sleep(random.uniform(.6, 1.2))\n",
|
||||||
|
" share.click()\n",
|
||||||
|
" reCaptcha_finder()\n",
|
||||||
|
" print(i)\n",
|
||||||
|
" title = list_titles[i].get_text()\n",
|
||||||
|
" print(title)\n",
|
||||||
|
" if share_to == 'P':\n",
|
||||||
|
" wait.until(EC.presence_of_element_located((By.XPATH, \"//*[contains(text(), 'Happening Now')]\")))\n",
|
||||||
|
" share = browser.find_element_by_xpath(\"//*[contains(text(), 'Happening Now')]\")\n",
|
||||||
|
" time.sleep(random.uniform(.6, 1.2))\n",
|
||||||
|
" share.click()\n",
|
||||||
|
" reCaptcha_finder()\n",
|
||||||
|
" print(i)\n",
|
||||||
|
" title = list_titles[i].get_text()\n",
|
||||||
|
" print(title)\n",
|
||||||
|
" except ElementClickInterceptedException:\n",
|
||||||
|
" pass\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "scrapers",
|
||||||
|
"language": "python",
|
||||||
|
"name": "scrapers"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user