unique ids fix
This commit is contained in:
		@@ -36,15 +36,15 @@ def get_isurl(category_id): # "get itemSearchURL"
 | 
			
		||||
        return url
 | 
			
		||||
    try:
 | 
			
		||||
        data = response.json()
 | 
			
		||||
        print(data)
 | 
			
		||||
        # NOTE approx 220 pages of listings per cat @ 35 items per page
 | 
			
		||||
        item_cond = "&rt=nc&LH_ItemCondition=3000&mag=1" # preowned
 | 
			
		||||
        item_cond_new = '&LH_ItemCondition=3'
 | 
			
		||||
        urls = []
 | 
			
		||||
        url = data['findItemsByCategoryResponse'][0]['itemSearchURL'][0] 
 | 
			
		||||
        url = url+item_cond
 | 
			
		||||
        j = list(range(1,221))
 | 
			
		||||
        for i in j:
 | 
			
		||||
            pg = "&_pgn={}".format(str(i))
 | 
			
		||||
            url = url.replace('&_pgn=1', pg)
 | 
			
		||||
        base_url = data['findItemsByCategoryResponse'][0]['itemSearchURL'][0] 
 | 
			
		||||
        for pg in list(range(1,34)): # No results after around page 32
 | 
			
		||||
            url = base_url+"&_pgn="+str(pg)+item_cond
 | 
			
		||||
            print(url)
 | 
			
		||||
            urls.append(url)
 | 
			
		||||
            
 | 
			
		||||
    except (AttributeError, KeyError):
 | 
			
		||||
@@ -70,17 +70,22 @@ def get_ids(url):
 | 
			
		||||
    '''
 | 
			
		||||
    html = requests.get(url).text
 | 
			
		||||
    soup = b(html, "html.parser")
 | 
			
		||||
    print(soup)
 | 
			
		||||
    ids = list(soup.find_all(href=re.compile(r"[\d]+(?=\?hash)")))
 | 
			
		||||
    ids = [id['href'] for id in ids]
 | 
			
		||||
    ids = [re.findall(r"[\d]+(?=\?)", id)[0] for id in ids]
 | 
			
		||||
    ids = list(set(ids)) # necessary; two links are returned with pattern match
 | 
			
		||||
    print(ids)
 | 
			
		||||
 | 
			
		||||
    return ids
 | 
			
		||||
 | 
			
		||||
def threaded_get_ids(urls):
 | 
			
		||||
    
 | 
			
		||||
    '''
 | 
			
		||||
    Runs get_ids() w/in ThreadPoolExecutor() for multi threaded requests.
 | 
			
		||||
    Constructs and saves unique ids and 20_itemIDs for use with ebay_api
 | 
			
		||||
    methods
 | 
			
		||||
   ''' 
 | 
			
		||||
    try:
 | 
			
		||||
        with open('item_id_results.txt') as f:
 | 
			
		||||
        with open('ids.txt') as f:
 | 
			
		||||
            ids = json.load(f)
 | 
			
		||||
    except FileNotFoundError:
 | 
			
		||||
        ids = []
 | 
			
		||||
@@ -89,14 +94,32 @@ def threaded_get_ids(urls):
 | 
			
		||||
        for future in executor.map(get_ids, urls):
 | 
			
		||||
            ids.extend(future)
 | 
			
		||||
 | 
			
		||||
    ids = list(set(ids)) # necessary; two links are returned with pattern match
 | 
			
		||||
    item_id_results = [','.join(ids[n:n+20]) for n in list(range(0,
 | 
			
		||||
    len(ids), 20))] # 20-ItemID list created to maximize dataset/decrease calls given call constraints
 | 
			
		||||
 | 
			
		||||
    with open('ids.txt', 'w') as f:
 | 
			
		||||
        json.dump(ids,f)
 | 
			
		||||
 | 
			
		||||
    with open('item_id_results.txt', 'w') as f:
 | 
			
		||||
        json.dump(item_id_results, f)
 | 
			
		||||
 | 
			
		||||
    return item_id_results
 | 
			
		||||
 | 
			
		||||
def  id_count():
 | 
			
		||||
    '''
 | 
			
		||||
    Counts Unique IDs of item_id_results for testing
 | 
			
		||||
    '''
 | 
			
		||||
    with open('item_id_results.txt') as f:
 | 
			
		||||
        item_id_results = json.load(f)
 | 
			
		||||
    
 | 
			
		||||
    ids = ','.join(item_id_results)
 | 
			
		||||
    ids = ids.split(',')
 | 
			
		||||
    uniq = len(list(set(ids)))
 | 
			
		||||
    print('{} Unique IDs'.format(uniq))
 | 
			
		||||
 | 
			
		||||
    return ids
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
    urls = threaded_urls()
 | 
			
		||||
    item_id_results = threaded_get_ids(urls)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user