"""
open/dulcinea/lib/util.py
"""
from datetime import datetime
from distutils.fancy_getopt import wrap_text
from durus.utils import byte_string
from formatter import AbstractFormatter, DumbWriter, AS_IS, NullFormatter
from os.path import isdir, dirname, abspath
from qp.lib.spec import unicode_string
from qpy import stringify, xml, xml_quote
import re
import time
import sys

if sys.version < "3":
    from __builtin__ import unichr
    from StringIO import StringIO
    from htmllib import HTMLParser
    from urllib import urlretrieve, urlopen
    from htmlentitydefs import name2codepoint
else:
    from io import StringIO
    from html.parser import HTMLParser as PlainHTMLParser
    from urllib.request import urlretrieve, urlopen
    # Patch bug in formatter module.
    import formatter
    def list_filter(*args):
        return list(filter(*args))
    formatter.filter = list_filter
    from html.entities import name2codepoint
    unichr = chr

    class HTMLParser (PlainHTMLParser):

        def __init__(self, formatter):
            PlainHTMLParser.__init__(self)
            self.formatter = formatter
            self.nofill = 0
            self.in_head = False
            self.list_stack = []

        # Overridable -- finish processing of start+end tag: <tag.../>
        def handle_startendtag(self, tag, attrs):
            self.handle_starttag(tag, attrs)
            self.handle_endtag(tag)

        # Overridable -- handle start tag
        def handle_starttag(self, tag, attrs):
            tag = tag.lower()
            if self.in_head:
                return
            if tag == 'head':
                self.in_head = True
            elif tag == 'p':
                self.formatter.end_paragraph(1)
            elif tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6'):
                self.formatter.end_paragraph(1)
                self.formatter.push_font((tag, 0, 1, 0))
            elif tag == 'pre':
                self.formatter.end_paragraph(1)
                self.formatter.push_font((AS_IS, AS_IS , AS_IS, 1))
                self.nofill = self.nofill + 1
            elif tag == 'br':
                self.formatter.add_line_break()
            elif tag == 'hr':
                self.formatter.add_hor_rule()
            elif tag == 'blockquote':
                self.formatter.end_paragraph(1)
                self.formatter.push_margin('blockquote')
            elif tag == 'address':
                self.formatter.end_paragraph(0)
                self.formatter.push_font((AS_IS, 1, AS_IS, AS_IS))
            elif tag == 'ul':
                self.formatter.end_paragraph(not self.list_stack)
                self.formatter.push_margin('ul')
                self.list_stack.append(['ul', '*', 0])
            elif tag == 'li':
                self.formatter.end_paragraph(0)
                if self.list_stack:
                    [dummy, label, counter] = top = self.list_stack[-1]
                    top[2] = counter = counter+1
                else:
                    label, counter = '*', 0
                    self.formatter.add_label_data(label, counter)
            elif tag == 'ol':
                self.formatter.end_paragraph(not self.list_stack)
                self.formatter.push_margin('ol')
                label = '1.'
                for a, v in attrs:
                    if a == 'type':
                        if len(v) == 1:
                            v = v + '.'
                        label = v
                self.list_stack.append(['ol', label, 0])
            elif tag == 'dl':
                self.formatter.end_paragraph(1)
                self.list_stack.append(['dl', '', 0])
            elif tag == 'dt':
                self.ddpop()
            elif tag == 'dd':
                self.ddpop()
                self.formatter.push_margin('dd')
                self.list_stack.append(['dd', '', 0])
            elif tag in ('i', 'em', 'cite'):
                self.formatter.push_font((AS_IS, 1, AS_IS, AS_IS))
            elif tag in ('b', 'strong'):
                self.formatter.push_font((AS_IS, AS_IS, 1, AS_IS))
            elif tag in ('tt', 'code'):
                self.formatter.push_font((AS_IS, AS_IS, AS_IS, 1))
            #else: print('start', tag)

        def handle_endtag(self, tag):
            tag = tag.lower()
            if self.in_head:
                if tag == 'head':
                    self.in_head = False
                return
            elif tag == 'p':
                self.formatter.end_paragraph(1)
            elif tag == 'pre':
                self.formatter.end_paragraph(1)
                self.formatter.pop_font()
                self.nofill = max(0, self.nofill - 1)
            elif tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6'):
                self.formatter.end_paragraph(1)
                self.formatter.pop_font()
            elif tag == 'blockquote':
                self.formatter.end_paragraph(1)
                self.formatter.push_margin('blockquote')
            elif tag == 'address':
                self.formatter.end_paragraph(0)
                self.formatter.pop_font()
            elif tag in ('ul', 'ol'):
                if self.list_stack:
                    del self.list_stack[-1]
                self.formatter.end_paragraph(not self.list_stack)
                self.formatter.pop_margin()
            elif tag == 'dl':
                self.ddpop(1)
                if self.list_stack:
                    del self.list_stack[-1]
            elif tag in ('i', 'em', 'tt', 'strong', 'b', 'cite', 'code'):
                self.formatter.pop_font()
            #else: print('end', tag)

        def ddpop(self, bl=0):
            self.formatter.end_paragraph(bl)
            if self.list_stack:
                if self.list_stack[-1][0] == 'dd':
                    del self.list_stack[-1]
                    self.formatter.pop_margin()

        def handle_charref(self, name):
            self.formatter.add_literal_data('#' + name + ';')

        def handle_entityref(self, name):
            if name in self.entitydefs:
                self.handle_data(self.entitydefs[name])
            else:
                self.handle_data('&' + name +';')

        def handle_data(self, data):
            if not self.in_head:
                if self.nofill:
                     self.formatter.add_literal_data(data)
                else:
                     self.formatter.add_flowing_data(data)

def as_unicode(s):
    if isinstance(s, unicode_string):
        return s
    elif isinstance(s, byte_string):
        return s.decode('utf-8')
    return unicode_string(s, 'utf-8')

join_unicode = as_unicode('').join
urlretrieve, urlopen, StringIO # quiet checker

class UnicodeStringIO (object):
    """A StringIO that forces all strings to Unicode
    """
    def __init__(self):
        self.buflist = []

    def getvalue(self):
        return join_unicode([as_unicode(line) for line in self.buflist])

    def write(self, s):
        self.buflist.append(s)


entitydefs_as_unichr = {}
for (name, codepoint) in name2codepoint.items():
    entitydefs_as_unichr[name] = unichr(codepoint)

class EntityCharHTMLParser (HTMLParser):
    """An HTMLParser that replaces HTML entities with their Unicode characters.
    """
    entitydefs = entitydefs_as_unichr

def html2txt(text):
    """(any) -> str
    """
    sio = UnicodeStringIO()
    parser = EntityCharHTMLParser(AbstractFormatter(DumbWriter(sio)))
    parser.feed(stringify(text))
    return sio.getvalue()

def _htmlescape_except(text, safe_tags):
    """(text) -> xml

    Quotes 'text' for use in an HTML page, except for the tags
    listed in 'safe_tags' which are considered safe (e.g. <p>, <b>).
    Both upper and lower case versions of the tags will be applied
    """
    if text is None:
        return None
    text = stringify(xml_quote(text))
    for tag in safe_tags:
        tag_re = re.compile(stringify(xml_quote(tag)), re.IGNORECASE)
        text = tag_re.sub(tag, text)
    return xml(text)

# re to automatically hyperlink email addresses and URLs
_link_re = re.compile(r"""\b(
    # email address
      [\w.-]+ # local part
      @
      [\w.-]+\.[\w]{2,4} # domain
    |
    # URL
      (?:https?://|www\.) # must start with http or www
      [\w.-]+\.[\w]{2,4} # domain
      (?::\d+)? # optional port
      (?:/[\w#$%&+,-./:;=?@\[\]^_|~]*)? # optional path
   )""", re.VERBOSE)

def activate_links(text, links=None):
    """(text:str|None, links:[(url, htext)]) -> xml|None
    Returns an xml_quoted version of text, with things that look like email
    addresses and URLs turned into hyperlinks. links is a list of two-tuples.
    If the url in links appears in text it is replaced with htext.
    """
    def _link_replace(m):
        text = url = m.group(0)
        extra = ""
        if text.find("@") == -1:
            if text[-1] in ".,":
                # don't include as part of the URL (easier to handle here
                # than in the regex)
                extra = text[-1]
                url = text = text[:-1]
            if not text.startswith("http"):
                url = "http://" + text
        else:
            if not text.startswith("mailto"):
                url = "mailto:" + text
        if links:
            for known_url, known_text in links:
                if url == known_url:
                    text = known_text
        return '<a href="%s">%s</a>%s' % (url, text, extra)

    if text is None:
        return None
    return xml(_link_re.sub(_link_replace, stringify(xml_quote(text))))

SAFE_TAGS = ['<p>', '</p>', '<b>', '</b>', '<i>', '</i>', '<ul>', '</ul>',
             '<ol>', '</ol>', '<li>', '</li>', '<br>', '<br />', '<pre>',
             '</pre>', '<strong>', '</strong>', '<dl>', '</dl>', '<dt>',
             '</dt>', '<dd>', '</dd>']

def format_text(text, safe_tags=SAFE_TAGS, links=None):
    """(text : string, safe_tags : [str]) -> xml

    Convert an ASCII string containing text into HTML.
    The resulting HTML has been reformatted in the following ways:
       * <, &, and > are escaped
       * Things that look like email addresses and URLs will be turned into
         hyperlinks.
       * If safe_tags is defined, it is expected to be a list of tags
         considered to be safe (e.g. <p>, <b>, etc).  These tags will
         not be escaped.
    """
    return activate_links(_htmlescape_except(text, safe_tags), links)

def sanitize_url(url):
    """(url:string) -> string
    Try to ensure a URL is well-formed, by adding http:// if it isn't present.
    """
    if url is None:
        return None
    if '@' in url and not url.startswith('mailto:'):
        # assume it's an e-mail address
        url = "mailto:" + url
    elif url.find(":") == -1:
        # assume http:// is missing
        url = "http://" + url
    return url

_paragraph_re = re.compile('\n\n+')

def split_paragraphs(text):
    return _paragraph_re.split(text)

def wrap_paragraphs(text):
    """(text) -> string
    Wrap a sequence of paragraphs for output as plain text.
    """
    if text is None:
        return ""
    return '\n\n'.join([wrap_paragraph(paragraph)
                        for paragraph in split_paragraphs(text)])

def wrap_paragraph(text):
    line_length = 70
    if isinstance(text, unicode_string):
        lines = [text.__class__(x, 'utf-8')
                 for x in wrap_text(text.encode('utf-8'), line_length)]
    else:
        lines = wrap_text(text, line_length)
    return '\n'.join(lines)

def insert_paragraph_tags(text):
    """(text:string|None) -> xml|None

    Prepare a text field for display as HTML.
    Currently this just HTML quotes the string and then inserts
    <p> tags at blank lines.
    """
    if text is None:
        return None
    return xml(
        '<p>' +
        _paragraph_re.sub('</p><p>', stringify(xml_quote(text))) +
        '</p>')

def datetime_to_int(date_time):
    """(date_time:datetime) -> int
    Returns the number of seconds since the epoch.
    """
    return int(time.mktime(date_time.timetuple()))

def beginning_of_next_month(date_time):
    """(date_time:datetime) -> datetime
    Return a datetime for the exact beginning of the month
    following the given date_time.
    """
    year = date_time.year
    month = date_time.month + 1
    if month == 13:
        year += 1
        month = 1
    return datetime(year=year, month=month, day=1, hour=0, minute=0,
                    second=0, microsecond=0, tzinfo=date_time.tzinfo)

def is_new(persistent_object):
    """(persistent_object : durus.persistent.Persistent) -> boolean
    """
    return persistent_object._p_connection is None

def get_module_directory(module):
    result = abspath(module.__file__)
    if isdir(result):
        return result
    else:
        return dirname(result)

def static(module, path):
    return get_module_directory(module) + '/' + path

def get_id(x):
    return x.get_id()

class HTMLSafetyParser (HTMLParser):

    allowed_tags = set(['p', 'b', 'i', 'ul', 'ol', 'li', 'br', 'pre', 'strong',
        'dl', 'dd', 'dt', 'div', 'span', 'img', 'em', 'b', 'a', 'font', 'blockquote', 'hr',
         'sup', 'sub', 'strike'])

    allowed_attrs = set(['style', 'class', 'src', 'href', 'width', 'height', 'size', 'face', 'title', 'alt'])

    def __init__(self, *args, **kwargs):
        HTMLParser.__init__(self, *args, **kwargs)
        self.risks = []

    def get_risks(self):
        return self.risks

    def unknown_starttag(self, tag, attrs):
        if tag.lower() not in self.allowed_tags:
            self.risks.append('The "%s" element is not allowed.' % tag.upper())
        for attr, val in attrs:
            if attr.lower() not in self.allowed_attrs:
                self.risks.append('The "%s" attribute is not allowed.' % attr.upper())

    def handle_starttag(self, tag, doer, attrs):
        self.unknown_starttag(tag, attrs)


def get_html_risks(html):
    parser = HTMLSafetyParser(NullFormatter())
    parser.feed(html)
    return parser.get_risks()