poshmark/pms.py

import re
import numpy as np
import requests
import bs4
from bs4 import BeautifulSoup as b

# Initial Search Query URL to Find ItemCondition Code

def url_build(search_query):

    SQ = search_query
    SQ_1 = SQ.replace(' ', '+')

    #&color[]=color
    posh_colors = [
            'Red', 'Pink', 'Orange', 'Yellow', 'Green', 'Blue', 'Purple',
            'Gold', 'Silver', 'Black', 'Gray', 'White', 'Cream', 'Brown', 'Tan'
            ]

    for i in range(0,len(posh_colors)):
        if posh_colors[i] in SQ:
            url_color = '&color[]=' + posh_colors[i]
            url_separator = "&availability=sold_out" + url_color + "&department=All&max_id="
            url_separator1 = "&availability=sold_out" + url_color + "&condition=nwt_and_ret&department=All&max_id="
            SQ_1 = SQ_1.replace(posh_colors[i], '')
            break

        else:
            url_separator = "&availability=sold_out&department=All&max_id="
            url_separator1 = "&availability=sold_out&condition=nwt_and_ret&department=All&max_id="

    base_url = "https://poshmark.com/search?query="

    url = base_url + SQ_1 + url_separator
    url_1 = base_url + SQ_1 + url_separator1

    return url, url_1, SQ

def get_sold(url, url_1):
#TODO Need to add option to avoid irrelevant results using regex and search query
# keywords. First search the titles for every listing to see if they contain all or
# some of the keywords present in the search query. If they don't then just break
# the whole program and say no results

    nau_prices = [] # NEW AND USED PRICES
    new_prices = []

    pg = 1
    while True:
        aurl = url + str(pg)
        html = requests.get(aurl).text
        soup = b(html, 'lxml') # NEW+USED SOUP OBJECT
        temp_prices_nau = soup.find_all("span", {"class": "p--t--1 fw--bold"})
        temp_prices_nau = [float(thing.text.strip(' $\n')) for thing in temp_prices_nau]
        nau_prices.extend(temp_prices_nau)
        pg += 1
        if len(re.findall(r'display\:\;', html))>0:
            break

    pg = 1
    while True:
        burl_1 = url_1 + str(pg)
        html1 = requests.get(burl_1).text
        soup1 = b(html1,'lxml') # NEW SOUP OBJECT
        temp_prices_new = soup1.find_all("span", {"class": "p--t--1 fw--bold"})
        temp_prices_new = [float(thing.text.strip(' $\n')) for thing in temp_prices_new]
        new_prices.extend(temp_prices_new)
        pg += 1
        if len(re.findall(r'display\:\;', html1))>0:
            break
        if len(new_prices)>len(nau_prices):
            new_prices = []
            break

    return nau_prices, new_prices

# Begin Element-wise substraction of new condition items price list from new&used items price list:
def avgs(nau_prices, new_prices):
    for price in new_prices:
        try:
            nau_prices.remove(price)
        except ValueError:
            break
    used_prices = nau_prices

    if len(new_prices)>0:
        avg_new = np.mean(new_prices)
        avg_used = np.mean(used_prices)
    else:
        avg_new = 0
        avg_used = np.mean(used_prices)

    return avg_used, avg_new, used_prices

def display_results(nau_prices, new_prices, used_prices, avg_new, avg_used, nau_url, new_url):

    used_results = '\n{} total results used\nAverage used price = ${}'.format(len(used_prices),avg_used)
    nau_link = 'URL new and used: {}\n'.format(nau_url+'1')

    new_results = '\n{} total results new\nAverage new price = ${}'.format(len(new_prices),avg_new)
    new_link = 'URL new: {}\n'.format(new_url+'1')

    total_results = '{} Total results new and used'.format(nau_prices)

    print(used_results)
    print(nau_link)
    print(new_results)
    print(new_link)
    print(total_results)

def main():
    search_query = input("\nSearch Title: ")
    urls = url_build(search_query)
    nau_url = urls[0]
    new_url = urls[1]
    prices = get_sold(nau_url, new_url)
    nau_prices = prices[0]
    new_prices = prices[1]
    averages = avgs(nau_prices, new_prices)
    avg_used = averages[0]
    avg_new = averages[1]
    used_prices = averages[2]
    nau_prices = len(new_prices)+len(used_prices)
    display_results(nau_prices, new_prices, used_prices, avg_new, avg_used, nau_url, new_url)

if  __name__=='__main__':
    main()