HEX

File: //lib/python3.6/site-packages/textile/utils.py
from __future__ import unicode_literals
import six

try:
    import regex as re
except ImportError:
    import re

from six.moves import urllib, html_parser
urlparse = urllib.parse.urlparse
HTMLParser = html_parser.HTMLParser

from collections import OrderedDict

from xml.etree import ElementTree

from textile.regex_strings import valign_re_s, halign_re_s


def decode_high(text):
    """Decode encoded HTML entities."""
    h = HTMLParser()
    text = '&#{0};'.format(text)
    return h.unescape(text)

def encode_high(text):
    """Encode the text so that it is an appropriate HTML entity."""
    return ord(text)

def encode_html(text, quotes=True):
    """Return text that's safe for an HTML attribute."""
    a = (
        ('&', '&amp;'),
        ('<', '&lt;'),
        ('>', '&gt;'))

    if quotes:
        a = a + (("'", '&#39;'),
                 ('"', '&quot;'))

    for k, v in a:
        text = text.replace(k, v)
    return text

def generate_tag(tag, content, attributes=None):
    """Generate a complete html tag using the ElementTree module.  tag and
    content are strings, the attributes argument is a dictionary.  As
    a convenience, if the content is ' /', a self-closing tag is generated."""
    content = six.text_type(content)
    # In PY2, ElementTree tostringlist only works with bytes, not with
    # unicode().
    enc = 'unicode'
    if six.PY2:
        enc = 'UTF-8'
    if not tag:
        return content
    element = ElementTree.Element(tag, attrib=attributes)
    # FIXME: Kind of an ugly hack.  There *must* be a cleaner way.  I tried
    # adding text by assigning it to element_tag.text.  That results in
    # non-ascii text being html-entity encoded.  Not bad, but not entirely
    # matching php-textile either.
    element_tag = ElementTree.tostringlist(element, encoding=enc,
            method='html')
    if six.PY2:
        element_tag = [v.decode(enc) for v in element_tag]
    element_tag.insert(len(element_tag) - 1, content)
    element_text = ''.join(element_tag)
    return element_text

def has_raw_text(text):
    """checks whether the text has text not already enclosed by a block tag"""
    # The php version has orders the below list of tags differently.  The
    # important thing to note here is that the pre must occur before the p or
    # else the regex module doesn't properly match pre-s. It only matches the
    # p in pre.
    r = re.compile(r'<(pre|p|blockquote|div|form|table|ul|ol|dl|h[1-6])[^>]*?>.*</\1>',
                   re.S).sub('', text.strip()).strip()
    r = re.compile(r'<(hr|br)[^>]*?/>').sub('', r)
    return '' != r

def is_rel_url(url):
    """Identify relative urls."""
    (scheme, netloc) = urlparse(url)[0:2]
    return not scheme and not netloc

def is_valid_url(url):
    parsed = urlparse(url)
    if parsed.scheme == '':
        return True
    return False

def list_type(list_string):
    listtypes = {
        list_string.startswith('*'): 'u',
        list_string.startswith('#'): 'o',
        (not list_string.startswith('*') and not list_string.startswith('#')):
        'd'
    }
    return listtypes.get(True, False)

def normalize_newlines(string):
    out = string.strip()
    out = re.sub(r'\r\n?', '\n', out)
    out = re.compile(r'^[ \t]*\n', flags=re.M).sub('\n', out)
    out = re.sub(r'"$', '" ', out)
    return out

def parse_attributes(block_attributes, element=None, include_id=True, restricted=False):
    vAlign = {'^': 'top', '-': 'middle', '~': 'bottom'}
    hAlign = {'<': 'left', '=': 'center', '>': 'right', '<>': 'justify'}
    style = []
    aclass = ''
    lang = ''
    colspan = ''
    rowspan = ''
    block_id = ''
    span = ''
    width = ''
    result = OrderedDict()

    if not block_attributes:
        return result

    matched = block_attributes
    if element == 'td':
        m = re.search(r'\\(\d+)', matched)
        if m:
            colspan = m.group(1)

        m = re.search(r'/(\d+)', matched)
        if m:
            rowspan = m.group(1)

    if element == 'td' or element == 'tr':
        m = re.search(r'(^{0})'.format(valign_re_s), matched)
        if m:
            style.append("vertical-align:{0}".format(vAlign[m.group(1)]))

    if not restricted:
        m = re.search(r'\{([^}]*)\}', matched)
        if m:
            style.extend(m.group(1).rstrip(';').split(';'))
            matched = matched.replace(m.group(0), '')

    m = re.search(r'\[([^\]]+)\]', matched, re.U)
    if m:
        lang = m.group(1)
        matched = matched.replace(m.group(0), '')

    m = re.search(r'\(([^()]+)\)', matched, re.U)
    if m:
        aclass = m.group(1)
        matched = matched.replace(m.group(0), '')

    m = re.search(r'([(]+)', matched)
    if m:
        style.append("padding-left:{0}em".format(len(m.group(1))))
        matched = matched.replace(m.group(0), '')

    m = re.search(r'([)]+)', matched)
    if m:
        style.append("padding-right:{0}em".format(len(m.group(1))))
        matched = matched.replace(m.group(0), '')

    m = re.search(r'({0})'.format(halign_re_s), matched)
    if m:
        style.append("text-align:{0}".format(hAlign[m.group(1)]))

    m = re.search(r'^(.*)#(.*)$', aclass)
    if m:
        block_id = m.group(2)
        aclass = m.group(1)

    if element == 'col':
        pattern = r'(?:\\(\d+)\.?)?\s*(\d+)?'
        csp = re.match(pattern, matched)
        span, width = csp.groups()

    if colspan:
        result['colspan'] = colspan

    if style:
        # Previous splits that created style may have introduced extra
        # whitespace into the list elements.  Clean it up.
        style = [x.strip() for x in style]
        result['style'] = '{0};'.format("; ".join(style))
    if aclass:
        result['class'] = aclass
    if block_id and include_id:
        result['id'] = block_id
    if lang:
        result['lang'] = lang
    if rowspan:
        result['rowspan'] = rowspan
    if span:
        result['span'] = span
    if width:
        result['width'] = width
    return result

def pba(block_attributes, element=None, include_id=True, restricted=False):
    """Parse block attributes."""
    attrs = parse_attributes(block_attributes, element, include_id, restricted)
    if not attrs:
        return ''
    result = ' '.join(['{0}="{1}"'.format(k, v) for k, v in attrs.items()])
    return ' {0}'.format(result)