#  -*- coding: utf-8 -*-

# Copyright 2011 Fanficdownloader team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import time
import logging
logger = logging.getLogger(__name__)
import re
import urllib2
import urlparse
import time

from .. import BeautifulSoup as bs
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions

from base_adapter import BaseSiteAdapter, makeDate

def getClass():
    return LiteroticaComAdapter


class LiteroticaComAdapter(BaseSiteAdapter):

    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)
        logger.debug("LiteroticaComAdapter:__init__ - url='%s'" % url)

        self.decode = ["utf8", "Windows-1252"]

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev','lit')

        # normalize to first chapter.  Not sure if they ever have more than 2 digits.
        storyid = self.parsedUrl.path.split('/',)[2]
        if re.match(r'-ch\d\d$',storyid):
            storyid = storyid[:-2]+'01'

        logger.debug('LiteroticaComAdapter:__init__ - url=%s' % url)
        # get storyId from url--url validation guarantees query is only sid=1234
        self.story.setMetadata('storyId',storyid)

        self.origurl = url
        if "//www.i." in self.origurl:
            ## accept m(mobile)url, but use www.
            self.origurl = self.origurl.replace("//www.i.","//www.")
        # normalized story URL.
        self._setURL(url[:url.index('//')+2]+self.getSiteDomain()\
                         +"/s/"+self.story.getMetadata('storyId'))
        

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%m/%d/%y"

    @staticmethod # must be @staticmethod, don't remove it.
    def getSiteDomain():
        # The site domain.  Does have www here, if it uses it.
        return 'www.literotica.com'

    @classmethod
    def getAcceptDomains(cls):
        return ['www.literotica.com', 'www.i.literotica.com']

    @classmethod
    def getSiteExampleURLs(self):
#        return "http://"+self.getSiteDomain()+"/s/on-the-auction-block https://"+self.getSiteDomain()+"/s/on-the-auction-block-ch-02"
        return "http://www.literotica.com/s/story-title https://www.literotica.com/s/story-title"

    def getSiteURLPattern(self):
#        logger.debug('getSiteURLPattern')
        # http://www.literotica.com/s/on-the-auction-block
        # http://www.literotica.com/s/on-the-auction-block?page=2
        # http://www.literotica.com/s/on-the-auction-block-ch-02
#        return re.escape("http://"+self.getSiteDomain()+"/s/") + r"[a-zA-Z0-9_\-]+(\?page=\d+)?$"
#        return re.escape("http://"+self.getSiteDomain()+"/storyxs/stories/") + r"[a-zA-Z0-9_\-]+\.shtml$"
#        return re.escape("http://"+self.getSiteDomain()+"/s/") + r"[a-zA-Z0-9_\-]+$"
        return r"https?://www(\.i)?\.literotica\.com/s/([a-zA-Z0-9_-]+)"


    def getCategories(self, soup):
        if self.getConfig("use_meta_keywords"):
            categories = soup.find("meta", {"name":"keywords"})['content'].split(', ')
    #        if self.story.getMetadata('title') in categories:
    #            categories.remove(self.story.getMetadata('title'))
            categories = [c for c in categories if not self.story.getMetadata('title') in c]
            if self.story.getMetadata('author') in categories:
                categories.remove(self.story.getMetadata('author'))
            logger.debug("Meta = %s" % categories)
            for category in categories:
    #            logger.debug("\tCategory=%s" % category)
                self.story.addToList('eroticatags', category.title())

    def extractChapterUrlsAndMetadata(self):

        if not (self.is_adult or self.getConfig("is_adult")):
            raise exceptions.AdultCheckRequired(self.url)
        
        url1 = self.origurl
        logger.debug("first page URL: "+url1)

        try:
            data1 = self._fetchUrl(url1)
            soup1 = bs.BeautifulSoup(data1)
        except urllib2.HTTPError, e:
            if e.code == 404:
                raise exceptions.StoryDoesNotExist(url1)
            else:
                raise e

        #strip comments from soup
        [comment.extract() for comment in soup1.findAll(text=lambda text:isinstance(text, bs.Comment))]

        # author
        a = soup1.find("span", "b-story-user-y")
        self.story.setMetadata('authorId', urlparse.parse_qs(a.a['href'].split('?')[1])['uid'][0])
        authorurl = a.a['href']
        if authorurl.startswith('//'):
            authorurl = self.parsedUrl.scheme+':'+authorurl
        self.story.setMetadata('authorUrl', authorurl)
        self.story.setMetadata('author', a.text)

        # get the author page
        try:
            dataAuth = self._fetchUrl(authorurl)
            soupAuth = bs.BeautifulSoup(dataAuth)
#            logger.debug(soupAuth)
        except urllib2.HTTPError, e:
            if e.code == 404:
                raise exceptions.StoryDoesNotExist(authorurl)
            else:
                raise e

        ## site has started using //domain.name/asdf urls remove https?: from front
        storyLink = soupAuth.find('a', href=url1[url1.index(':')+1:])

        if storyLink is not None:
            # pull the published date from the author page
            # default values from single link.  Updated below if multiple chapter.
            logger.debug("Found story on the author page.")
            date = storyLink.parent.parent.findAll('td')[-1].text
            self.story.setMetadata('datePublished', makeDate(date, self.dateformat))
            self.story.setMetadata('dateUpdated',makeDate(date, self.dateformat))

        # find num of pages
        # find a "3 Pages:" string on the page and parse it
        pgs = soup1.find("span", "b-pager-caption-t r-d45").string.split(' ')[0]

        # If there are multiple pages, find and request the last page
        if "1" != pgs:
            logger.debug("last page number: "+pgs)
            try:
                data2 = self._fetchUrl(url1, {'page': pgs})
                soup2 = bs.BeautifulSoup(data2)
                [comment.extract() for comment in soup2.findAll(text=lambda text:isinstance(text, bs.Comment))]
            except urllib2.HTTPError, e:
                if e.code == 404:
                    # TODO: Probably should reformat this
                    raise exceptions.StoryDoesNotExist(url1, {'page': pgs})
                else:
                    raise e
        else:
            #If we're already on the last page, copy the soup
            soup2 = soup1

        # parse out the list of chapters
        chaps = soup2.find('div', id='b-series')
        if chaps:  # may be one post only
            #self.chapterUrls = [(ch.a.text, ch.a['href']) for ch in chaps.findAll('li')]

            # if there are chapters, lets pull them and title from the
            # author page because *this* chapter is omitted from the
            # list on the last page.
            row = storyLink.parent.parent.previousSibling
            while row['class'] != 'ser-ttl':
                row = row.previousSibling

            seriesTitle = stripHTML(row)
            if seriesTitle:
                # this regex is deliberately greedy. We want to get the biggest match before a ':'
                seriesTitle = re.match('(.*):[^:]*$', seriesTitle).group(1)
            else:
                seriesTitle = soup1.h1.string
            self.story.setMetadata('title', seriesTitle)

            # now chapter list.  Assumed oldest to newest.
            self.chapterUrls = []
            row = row.nextSibling
                
            self.story.setMetadata('datePublished',makeDate(stripHTML(row.find('td',{'class':'dt'})), self.dateformat))
            while row['class'] == 'sl':
                chapter_title = row.a.string
                logger.debug('\tChapter Name: "%s"' % chapter_title)
                chapter_title = chapter_title[len(seriesTitle) + 1:]
                logger.debug('\tChapter: "%s"' % chapter_title)
                if chapter_title.lower().startswith('ch.'):
                    try:
                        chapter_title = 'Chapter %d' % int(chapter_title.split(' ')[1])
                    except:
                        chapter_title = 'Chapter %s' % chapter_title.split(' ')[1]
                elif chapter_title.lower().startswith('pt.'):
                    chapter_title = 'Part %s' % int(chapter_title.split(' ')[1])
                else:
                    chapter_title = 'Chapter %s' % int(len(self.chapterUrls) + 1)

                # pages include full URLs.
                chapurl = row.a['href']
                if chapurl.startswith('//'):
                    chapurl = self.parsedUrl.scheme+':'+chapurl
                logger.debug("Chapter URL: "+chapurl)
                self.chapterUrls.append((chapter_title, chapurl))
                if not row.nextSibling:
                    break
                row = row.nextSibling

            row = row.previousSibling
            self.story.setMetadata('dateUpdated',makeDate(stripHTML(row.find('td',{'class':'dt'})), self.dateformat))
                
        else:  # if one post only
            self.chapterUrls = [(soup1.h1.string, url1)]
            self.story.setMetadata('title', soup1.h1.string)

        # normalize on first chapter URL.
        self._setURL(self.chapterUrls[0][1])

        # reset storyId to first chapter.
        self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])

        self.story.setMetadata('numChapters', len(self.chapterUrls))

        self.story.setMetadata('category', soup1.find('div', 'b-breadcrumbs').findAll('a')[1].string)
        self.getCategories(soup1)
        self.story.setMetadata('description', soup1.find('meta', {'name': 'description'})['content'])

        return


    def getPageText(self, soup, url):
        logger.debug('Getting page text')
#        div = soup.find('div', {'id':'main'})
#        div = soup.find('div', {'class':'b-story-body-x x-r15'})
#        div = soup.find('div', {'class':'b-story-body-x'})
        div = soup.find('div', 'b-story-body-x').p
#        logger.debug(div)
        div.name = 'div'
#        logger.debug(div)
        div = self.utf8FromSoup(url, div)
#        logger.debug(div)

#        for a in div.findAll('div', {'class':('b-breadcrumbs', 'b-story-header', 'b-story-stats-block', 'b-sidebar')}):
#            a.extract()
#        
#        for a in div.findAll('div', {'id':'sbar-l-wrp'}):
#            a.extract() #        div.find('div').extract
        
#        logger.debug(div)
        fullhtml = str(div)
        fullhtml = re.sub(r'<br />\s*<br />', r'</p><p>', fullhtml)
        fullhtml = re.sub(r'^<div>', r'', fullhtml)
        fullhtml = re.sub(r'</div>$', r'', fullhtml)
#        logger.debug(fullhtml)
        return fullhtml

    def getChapterText(self, url):

        logger.debug('Getting chapter text from: %s' % url)

        soup = bs.BeautifulSoup(self._fetchUrl(url),
                                     selfClosingTags=('br','hr')) # otherwise soup eats the br/hr tags.
#        logger.debug(soup)
        pages = soup.find('select', {'name' : 'page'})
        if pages:
            page_nums = [page.text for page in pages.findAll('option')]
        else:
            logger.debug('No extra pages')

        self.getCategories(soup)

        fullhtml = "" 
        chapter_description = soup.find("meta", {"name" : "description"})['content']
        logger.debug("\tChapter description: %s" % chapter_description)
        fullhtml += '<p><b>Description:</b> %s</p><hr />' % chapter_description
        if pages:
            for page_no in page_nums:
                page_url = url +  "?page=%s" % page_no
                logger.debug("page_url= %s" % page_url)
                page_soup = bs.BeautifulSoup(self._fetchUrl(page_url), selfClosingTags=('br','hr'))
#                if page_no == 1:
                fullhtml += self.getPageText(page_soup, url)
        else:
            fullhtml += self.getPageText(soup, url)
        
#        fullhtml = self.utf8FromSoup(url, bs.BeautifulSoup(fullhtml))
#        fullhtml = re.sub(r'^<div>', r'', fullhtml)
#        fullhtml = re.sub(r'</div>$', r'', fullhtml)
#        if None == div:
#            raise exceptions.FailedToDownload("Error downloading Chapter: %s!  Missing required element!" % url)

        return fullhtml

