"""Premailer for newsletter Used for converting a page with CSS inline and links corrected. Based on http://www.peterbe.com/plog/premailer.py""" import re from urllib2 import urlopen from lxml.html import parse from lxml.html import tostring _css_comments = re.compile(r'/\*.*?\*/', re.MULTILINE | re.DOTALL) _regex = re.compile('((.*?){(.*?)})', re.DOTALL | re.M) _semicolon_regex = re.compile(';(\s+)') _colon_regex = re.compile(':(\s+)') def _merge_styles(old, new, class_=''): """ if :: old = 'font-size:1px; color: red' and :: new = 'font-size:2px; font-weight: bold' then :: return 'color: red; font-size:2px; font-weight: bold' In other words, the new style bits replace the old ones. The @class_ parameter can be something like ':hover' and if that is there, you split up the style with '{...} :hover{...}' Note: old could be something like '{...} ::first-letter{...}' """ news = {} for k, v in [x.strip().split(':', 1) for x in new.split(';') if x.strip()]: news[k.strip()] = v.strip() groups = {} grouping_regex = re.compile('([:\-\w]*){([^}]+)}') grouped_split = grouping_regex.findall(old) if grouped_split: for old_class, old_content in grouped_split: olds = {} for k, v in [x.strip().split(':', 1) for x in old_content.split(';') if x.strip()]: olds[k.strip()] = v.strip() groups[old_class] = olds else: olds = {} for k, v in [x.strip().split(':', 1) for x in old.split(';') if x.strip()]: olds[k.strip()] = v.strip() groups[''] = olds # Perform the merge merged = news for k, v in groups.get(class_, {}).items(): if k not in merged: merged[k] = v groups[class_] = merged if len(groups) == 1: return '; '.join(['%s:%s' % (k, v) for (k, v) in groups.values()[0].items()]) else: all = [] for class_, mergeable in sorted(groups.items(), lambda x, y: cmp(x[0].count(':'), y[0].count(':'))): all.append('%s{%s}' % (class_, '; '.join(['%s:%s' % (k, v) for (k, v) in mergeable.items()]))) return ' '.join([x for x in all if x != '{}']) class PremailerError(Exception): pass class Premailer(object): """Premailer for converting a webpage to be e-mail ready""" def __init__(self, url, include_star_selectors=False): self.url = url try: self.page = parse(self.url).getroot() except: raise PremailerError('Could not parse the html') self.include_star_selectors = include_star_selectors def transform(self): """Do some transformations to self.page for being e-mail compliant""" self.page.make_links_absolute(self.url) self.inline_rules(self.get_page_rules()) self.clean_page() # Do it a second time for correcting # ressources added by inlining. # Will not work as expected if medias # are located in other domain. self.page.make_links_absolute(self.url) return tostring(self.page.body) def get_page_rules(self): """Retrieve CSS rules in the