View Single Post
Old 02-25-2012, 10:06 PM   #3
nimblebooks
Enthusiast
nimblebooks began at the beginning.
 
Posts: 28
Karma: 10
Join Date: May 2010
Device: Kindle
So it looks to me that the function doing all the file rewriting is rewrite_links which is defined in base.py and removes all absolute links. Then it looks for a CSS file and cssutils parses that. Is this broadly correct?


Code:
def rewrite_links(root, link_repl_func, resolve_base_href=False):
    '''
    Rewrite all the links in the document.  For each link
    ``link_repl_func(link)`` will be called, and the return value
    will replace the old link.

    Note that links may not be absolute (unless you first called
    ``make_links_absolute()``), and may be internal (e.g.,
    ``'#anchor'``).  They can also be values like
    ``'mailto:email'`` or ``'javascript:expr'``.

    If the ``link_repl_func`` returns None, the attribute or
    tag text will be removed completely.
    '''
    from cssutils import parseString, parseStyle, replaceUrls, log
    log.setLevel(logging.WARN)

    if resolve_base_href:
        resolve_base_href(root)
    for el, attrib, link, pos in iterlinks(root, find_links_in_css=False):
        new_link = link_repl_func(link.strip())
        if new_link == link:
            continue
        if new_link is None:
            # Remove the attribute or element content
            if attrib is None:
                el.text = ''
            else:
                del el.attrib[attrib]
            continue
        if attrib is None:
            new = el.text[:pos] + new_link + el.text[pos+len(link):]
            el.text = new
        else:
            cur = el.attrib[attrib]
            if not pos and len(cur) == len(link):
                # Most common case
                el.attrib[attrib] = new_link
            else:
                new = cur[:pos] + new_link + cur[pos+len(link):]
                el.attrib[attrib] = new

    def set_property(v):
        if v.CSS_PRIMITIVE_VALUE == v.cssValueType and \
           v.CSS_URI == v.primitiveType:
                v.setStringValue(v.CSS_URI,
                        link_repl_func(v.getStringValue()))

    for el in root.iter():
        try:
            tag = el.tag
        except UnicodeDecodeError:
            continue

        if tag == XHTML('style') and el.text and \
                (_css_url_re.search(el.text) is not None or '@import' in
                        el.text):
            stylesheet = parseString(el.text)
            replaceUrls(stylesheet, link_repl_func)
            repl = stylesheet.cssText
            if isbytestring(repl):
                repl = repl.decode('utf-8')
            el.text = '\n'+ repl + '\n'

        if 'style' in el.attrib:
            text = el.attrib['style']
            if _css_url_re.search(text) is not None:
                try:
                    stext = parseStyle(text)
                except:
                    # Parsing errors are raised by cssutils
                    continue
                for p in stext.getProperties(all=True):
                    v = p.cssValue
                    if v.CSS_VALUE_LIST == v.cssValueType:
                        for item in v:
                            set_property(item)
                    elif v.CSS_PRIMITIVE_VALUE == v.cssValueType:
                        set_property(v)
                repl = stext.cssText.replace('\n', ' ').replace('\r',
                        ' ')
                if isbytestring(repl):
                    repl = repl.decode('utf-8')
                el.attrib['style'] = repl
nimblebooks is offline   Reply With Quote