from __future__ import unicode_literals from configparser import RawConfigParser from html.parser import HTMLParser from collections import defaultdict from os.path import join, dirname class Attributes(dict): def __getitem__(self, name): try: return super(Attributes, self).__getitem__(name) except KeyError: return '' class ConfigParser(RawConfigParser, object): def get(self, section, option): value = super(ConfigParser, self).get(section, option) return value.replace('\\n', '\n') class HTML2BBCode(HTMLParser): def __init__(self, config=None): HTMLParser.__init__(self) self.config = ConfigParser(allow_no_value=True) self.config.read(join(dirname(__file__), 'bbdata.conf')) if config: self.config.read(config) def handle_starttag(self, tag, attrs): if self.config.has_section(tag): self.attrs[tag].append(dict(attrs)) self.data.append( self.config.get(tag, 'start') % Attributes(attrs or {})) if self.config.has_option(tag, 'expand'): self.expand_starttags(tag) def handle_endtag(self, tag): if self.config.has_section(tag): self.data.append(self.config.get(tag, 'end')) if self.config.has_option(tag, 'expand'): self.expand_endtags(tag) self.attrs[tag].pop() def handle_data(self, data): self.data.append(data) def feed(self, data): self.data = [] self.attrs = defaultdict(list) HTMLParser.feed(self, data) return ''.join(self.data) def expand_starttags(self, tag): for expand in self.get_expands(tag): if expand in self.attrs[tag][-1]: self.data.append( self.config.get(expand, 'start') % self.attrs[tag][-1]) def expand_endtags(self, tag): for expand in reversed(self.get_expands(tag)): if expand in self.attrs[tag][-1]: self.data.append( self.config.get(expand, 'end') % self.attrs[tag][-1]) def get_expands(self, tag): expands = self.config.get(tag, 'expand').split(',') return map(lambda x: x.strip(), expands) if __name__ == '__main__': import doctest doctest.testmod()