blob: 75df318746af18a42661de6ef07c1004f66bb42d (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from django import template
from bs4 import BeautifulSoup, Comment
import re
register = template.Library()
def sanitize(value, allowed_tags):
"""Argument should be in form 'tag2:attr1:attr2 tag2:attr1 tag3', where tags
are allowed HTML tags, and attrs are the allowed attributes for that tag.
"""
js_regex = re.compile(r'[\s]*(&#x.{1,7})?'.join(list('javascript')))
allowed_tags = [tag.split(':') for tag in allowed_tags.split()]
allowed_tags = dict((tag[0], tag[1:]) for tag in allowed_tags)
soup = BeautifulSoup(value)
for comment in soup.findAll(text=lambda text: isinstance(text, Comment)):
comment.extract()
for tag in soup.findAll(True):
if tag.name not in allowed_tags:
tag.hidden = True
else:
tag.attrs = {
attr: js_regex.sub('', tag.attrs[attr]) for attr in tag.attrs
if attr in allowed_tags[tag.name]}
return soup.renderContents().decode('utf8')
register.filter(sanitize)
|