summaryrefslogtreecommitdiff
path: root/chimere/templatetags/sanitize.py
blob: 75df318746af18a42661de6ef07c1004f66bb42d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from django import template
from bs4 import BeautifulSoup, Comment
import re

register = template.Library()


def sanitize(value, allowed_tags):
    """Argument should be in form 'tag2:attr1:attr2 tag2:attr1 tag3', where tags
    are allowed HTML tags, and attrs are the allowed attributes for that tag.
    """
    js_regex = re.compile(r'[\s]*(&#x.{1,7})?'.join(list('javascript')))
    allowed_tags = [tag.split(':') for tag in allowed_tags.split()]
    allowed_tags = dict((tag[0], tag[1:]) for tag in allowed_tags)

    soup = BeautifulSoup(value)
    for comment in soup.findAll(text=lambda text: isinstance(text, Comment)):
        comment.extract()

    for tag in soup.findAll(True):
        if tag.name not in allowed_tags:
            tag.hidden = True
        else:
            tag.attrs = {
                attr: js_regex.sub('', tag.attrs[attr]) for attr in tag.attrs
                if attr in allowed_tags[tag.name]}
    return soup.renderContents().decode('utf8')

register.filter(sanitize)