import concurrent.futures
import json
import requests
import pandas as pd

class FindingApi:
    '''Methods for accessing eBays FindingApi services'''
    def __init__(self, service, pageNumber):
        self.service = [
            'findItemsAdvanced', 'findCompletedItems',
            'findItemsByKeywords', 'findItemsIneBayStores', 'findItemsByCategory',
            'findItemsByProduct'
            ][service]
        self.pageNumber = list(range(1, pageNumber)) # 64 pages is recommended
        # this will give equal weights to cats given call constraints

    # departments = ["3034","93427"] (womens and mens)

    def get_data(self, i):

        '''
        Gets raw JSON data fom FindingApi service call
        Currently being used to get itemIDs from categories
        '''

        params = {
            "OPERATION-NAME":self.service,
            "SECURITY-APPNAME":"scottbea-xlister-PRD-6796e0ff6-14862949",
            "SERVICE-VERSION":"1.13.0",
            "RESPONSE-DATA-FORMAT":"JSON",
            "categoryId":category_id,
            "paginationInput.entriesPerPage":"100",
            "paginationInput.PageNumber":i
            }
        response = requests.get("https://svcs.ebay.com/services/search/FindingService/v1",
                params=params)
        data = response.json()
        return data

    def get_ids_from_cats(self):
        '''
        Creates a 20-itemId list to use for the ShoppingApi
        call
        '''
        with open('cat_list.txt') as jf:
            cat_list = json.load(jf)

        itemid_results_list = []

        for category_id in cat_list:
            with concurrent.futures.ThreadPoolExecutor() as executor:
                for future in executor.map(self.get_data(), self.pageNumber):
                    data = future

                    try:
                        training = pd.read_csv('training.csv')
                        for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
                            if (item not in training.values) and (item not in itemid_results_list):
                                itemid_results_list.append(item['itemId'][0])

                    except (pd.errors.EmptyDataError, FileNotFoundError):
                        for item in data['findItemsByCategoryResponse'][0]['searchResult'][0]['item']:
                            if item not in itemid_results_list:
                                itemid_results_list.append(item['itemId'][0])

        item_id_results = [','.join(itemid_results_list[n:n+20]) for n in list(range(0,
            len(itemid_results_list), 20))]

        return item_id_results

class ShoppingApi:
    '''
    Creates objects from ShoppingApi service calls that can interact with
    pandas dataframes
    '''
    def get_item_from_findItemsByCategory(self, twenty_id):
        '''
        Gets raw JSON data from multiple live listings given multiple itemIds
        '''
        params = {
            "callname":"GetMultipleItems",
            "appid":"scottbea-xlister-PRD-6796e0ff6-14862949",
            "version":"671",
            "responseencoding":"JSON",
            "ItemID":twenty_id,
            "IncludeSelector":"ItemSpecifics",
            }

        response = requests.get("https://open.api.ebay.com/shopping?", params=params)
        response = response.json()
        return response

    def conky(self):
        data = {}
        with concurrent.futures.ThreadPoolExecutor() as executor:
            for future in executor.map(self.get_item_from_findItemsByCategory(), item_id_results):
                data.update(future)
        return data

# TODO the structure of data as is (ie, as an updated dict) means CurateData class methods are going to have trouble running

class CurateData:
    '''
    Contains functions for curating data for machine learning training sets;
    Takes item in data from ShoppingApi request as argument and extracts/ creates key
    value pairs that gets updated to custom dataframe used in Ml training sets.
    '''
    def extract_itemId(self, item):
        item_id = {'ItemID':item['ItemID']}
        return item_id

    def extract_catId(self, item):
        catId = {'PrimaryCategoryID':item['PrimaryCategoryID']}
        return catId

    def extract_prime_cat_name(self, item):
        prime_cat_name= {'PrimaryCategoryName':item['PrimaryCategoryName']}
        return prime_cat_name

    def extract_picture_url(self, item):
        '''
        Only pulls PictureURL list and does not
        create dictionary
        '''
        picture_url_list = item['PictureURL']
        return picture_url_list

    def extract_nvl(self, item):
        names = []
        values = []
        nvl = item['itemspecifics']['namevaluelist']
        for nvl_dict in nvl:
            names.append(nvl_dict['name'])
            values.append(nvl_dict['value'])
        nvl_dict = dict(zip(names, values))
        return nvl_dict

    def update_df(self, data):
        '''
        Creates training instances for dataset. picture_url_list expanded to
        max available pictures with each picture url corresponding to features
        in common with same listing (i.e., because there are multiple pictures
        per listing, each picture will be its own training instance.
        '''
        for item in data['item']:

            training = {} # TODO something funky going on here
            # NEED TO CREATE EMPTY DICT OUTSIDE OF FOR LOOP?
            picture_url_list = self.extract_picture_url(item)

            '''
            Creates same training instance per photo for
            '''
            for url in picture_url_list:
                remote_url = {'PictureURL':url}
                training.update(remote_url)
                item_id = self.extract_itemId(item)
                training.update(item_id)
                catId = self.extract_catId(item)
                training.update(catId)
                prime_cat_name = self.extract_prime_cat_name(item)
                training.update(prime_cat_name)
                nvl_dict = self.extract_nvl(item)
                training.update(nvl_dict)

        df = pd.json_normalize(training) # TODO FIX INDENT HERE?
        df.to_csv('training.csv', mode='a')

def main():
    '''
    Main program creates/updates a csv file to use for ML training from live
    ebay listings
    '''
    service, pageNumber = input('service and pageNumber:').split()
    service = int(service)
    pageNumber = int(pageNumber)
    finding = FindingApi(service, pageNumber)

    item_id_results = finding.get_ids_from_cats()
    shopping = ShoppingApi()
    data = shopping.conky()
    curate = CurateData()
    curate.update_df(data)

if __name__ == "__main__":
    main()

# Limited to 5000 calls to shopping api per day, and getMultpileitems service maxes out at 20 items
# per call leaving you 100,000 items per day for you pandas dataframe initially. So you'll have
# to divide these up into the categories. This will leave you with about 6.25K results per cat.
# More than enough data for your dataset.

# Need to make sure dataframe gets important stuff outside of nvl in order to
# access values for cross referencing itemIds from calls
# Need to decide if list gets accessed from df or if you're just going to have
# list contents extracted and possibly placed into separate cells/labels

# TODO NEED TO ADD TRY EXCEPT CONDITIONS FOR EVERY CALL MADE TO API SERVICES TO
# TO AVOID HICCUPS WHEN CREATING DATASET