You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
205 lines
7.3 KiB
205 lines
7.3 KiB
"""Premailer for emencia.django.newsletter
|
|
Used for converting a page with CSS inline and links corrected.
|
|
Based on http://www.peterbe.com/plog/premailer.py"""
|
|
import re
|
|
from urllib2 import urlopen
|
|
from lxml.html import parse
|
|
from lxml.html import tostring
|
|
|
|
|
|
_css_comments = re.compile(r'/\*.*?\*/', re.MULTILINE | re.DOTALL)
|
|
_regex = re.compile('((.*?){(.*?)})', re.DOTALL | re.M)
|
|
_semicolon_regex = re.compile(';(\s+)')
|
|
_colon_regex = re.compile(':(\s+)')
|
|
|
|
|
|
def _merge_styles(old, new, class_=''):
|
|
"""
|
|
if ::
|
|
old = 'font-size:1px; color: red'
|
|
and ::
|
|
new = 'font-size:2px; font-weight: bold'
|
|
then ::
|
|
return 'color: red; font-size:2px; font-weight: bold'
|
|
|
|
In other words, the new style bits replace the old ones.
|
|
|
|
The @class_ parameter can be something like ':hover' and if that
|
|
is there, you split up the style with '{...} :hover{...}'
|
|
Note: old could be something like '{...} ::first-letter{...}'
|
|
"""
|
|
news = {}
|
|
for k, v in [x.strip().split(':', 1) for x in new.split(';') if x.strip()]:
|
|
news[k.strip()] = v.strip()
|
|
|
|
groups = {}
|
|
grouping_regex = re.compile('([:\-\w]*){([^}]+)}')
|
|
grouped_split = grouping_regex.findall(old)
|
|
if grouped_split:
|
|
for old_class, old_content in grouped_split:
|
|
olds = {}
|
|
for k, v in [x.strip().split(':', 1)
|
|
for x in old_content.split(';') if x.strip()]:
|
|
olds[k.strip()] = v.strip()
|
|
groups[old_class] = olds
|
|
else:
|
|
olds = {}
|
|
for k, v in [x.strip().split(':', 1)
|
|
for x in old.split(';') if x.strip()]:
|
|
olds[k.strip()] = v.strip()
|
|
groups[''] = olds
|
|
|
|
# Perform the merge
|
|
merged = news
|
|
for k, v in groups.get(class_, {}).items():
|
|
if k not in merged:
|
|
merged[k] = v
|
|
groups[class_] = merged
|
|
|
|
if len(groups) == 1:
|
|
return '; '.join(['%s:%s' % (k, v)
|
|
for (k, v) in groups.values()[0].items()])
|
|
else:
|
|
all = []
|
|
for class_, mergeable in sorted(groups.items(),
|
|
lambda x, y: cmp(x[0].count(':'), y[0].count(':'))):
|
|
all.append('%s{%s}' % (class_,
|
|
'; '.join(['%s:%s' % (k, v)
|
|
for (k, v)
|
|
in mergeable.items()])))
|
|
return ' '.join([x for x in all if x != '{}'])
|
|
|
|
|
|
class PremailerError(Exception):
|
|
pass
|
|
|
|
|
|
class Premailer(object):
|
|
"""Premailer for converting a webpage
|
|
to be e-mail ready"""
|
|
|
|
def __init__(self, url, include_star_selectors=False):
|
|
self.url = url
|
|
try:
|
|
self.page = parse(self.url).getroot()
|
|
except:
|
|
raise PremailerError('Could not parse the html')
|
|
|
|
self.include_star_selectors = include_star_selectors
|
|
|
|
def transform(self):
|
|
"""Do some transformations to self.page
|
|
for being e-mail compliant"""
|
|
self.page.make_links_absolute(self.url)
|
|
|
|
self.inline_rules(self.get_page_rules())
|
|
self.clean_page()
|
|
# Do it a second time for correcting
|
|
# ressources added by inlining.
|
|
# Will not work as expected if medias
|
|
# are located in other domain.
|
|
self.page.make_links_absolute(self.url)
|
|
|
|
return tostring(self.page.body)
|
|
|
|
def get_page_rules(self):
|
|
"""Retrieve CSS rules in the <style> markups
|
|
and in the external CSS files"""
|
|
rules = []
|
|
for style in self.page.cssselect('style'):
|
|
css_body = tostring(style)
|
|
css_body = css_body.split('>')[1].split('</')[0]
|
|
these_rules, these_leftover = self._parse_style_rules(css_body)
|
|
rules.extend(these_rules)
|
|
|
|
for external_css in self.page.cssselect('link'):
|
|
attr = external_css.attrib
|
|
if attr.get('rel', '').lower() == 'stylesheet' and \
|
|
attr.get('href'):
|
|
media = attr.get('media', 'screen')
|
|
for media_allowed in ('all', 'screen', 'projection'):
|
|
if media_allowed in media:
|
|
css = urlopen(attr['href']).read()
|
|
rules.extend(self._parse_style_rules(css)[0])
|
|
break
|
|
|
|
return rules
|
|
|
|
def inline_rules(self, rules):
|
|
"""Apply in the page inline the CSS rules"""
|
|
for selector, style in rules:
|
|
class_ = ''
|
|
if ':' in selector:
|
|
selector, class_ = re.split(':', selector, 1)
|
|
class_ = ':%s' % class_
|
|
|
|
for item in self.page.cssselect(selector):
|
|
old_style = item.attrib.get('style', '')
|
|
new_style = _merge_styles(old_style, style, class_)
|
|
item.attrib['style'] = new_style
|
|
self._style_to_basic_html_attributes(item, new_style)
|
|
|
|
def clean_page(self):
|
|
"""Clean the page of useless parts"""
|
|
for elem in self.page.xpath('//@class'):
|
|
parent = elem.getparent()
|
|
del parent.attrib['class']
|
|
for elem in self.page.cssselect('style'):
|
|
elem.getparent().remove(elem)
|
|
for elem in self.page.cssselect('script'):
|
|
elem.getparent().remove(elem)
|
|
|
|
def _parse_style_rules(self, css_body):
|
|
leftover = []
|
|
rules = []
|
|
css_body = _css_comments.sub('', css_body)
|
|
for each in _regex.findall(css_body.strip()):
|
|
__, selectors, bulk = each
|
|
bulk = _semicolon_regex.sub(';', bulk.strip())
|
|
bulk = _colon_regex.sub(':', bulk.strip())
|
|
if bulk.endswith(';'):
|
|
bulk = bulk[:-1]
|
|
for selector in [x.strip()
|
|
for x in selectors.split(',') if x.strip()]:
|
|
if ':' in selector:
|
|
# A pseudoclass
|
|
leftover.append((selector, bulk))
|
|
continue
|
|
elif selector == '*' and not self.include_star_selectors:
|
|
continue
|
|
|
|
rules.append((selector, bulk))
|
|
|
|
return rules, leftover
|
|
|
|
def _style_to_basic_html_attributes(self, element, style_content):
|
|
"""Given an element and styles like
|
|
'background-color:red; font-family:Arial' turn some of that into HTML
|
|
attributes. like 'bgcolor', etc.
|
|
Note, the style_content can contain pseudoclasses like:
|
|
'{color:red; border:1px solid green} :visited{border:1px solid green}'
|
|
"""
|
|
if style_content.count('}') and \
|
|
style_content.count('{') == style_content.count('{'):
|
|
style_content = style_content.split('}')[0][1:]
|
|
|
|
attributes = {}
|
|
for key, value in [x.split(':') for x in style_content.split(';')
|
|
if len(x.split(':')) == 2]:
|
|
key = key.strip()
|
|
|
|
if key == 'text-align':
|
|
attributes['align'] = value.strip()
|
|
elif key == 'background-color':
|
|
attributes['bgcolor'] = value.strip()
|
|
elif key == 'width':
|
|
value = value.strip()
|
|
if value.endswith('px'):
|
|
value = value[:-2]
|
|
attributes['width'] = value
|
|
|
|
for key, value in attributes.items():
|
|
if key in element.attrib:
|
|
# Already set, don't dare to overwrite
|
|
continue
|
|
element.attrib[key] = value
|
|
|