Skip to content

Commit 890b0b6

Browse files
committed
[1.2.X] Fixed #14235 - UnicodeDecodeError in CSRF middleware
Thanks to jbg for the report. This changeset essentially backs out [13698] in favour of a method that sanitizes the token rather than escaping it. Backport of [13732] from trunk. git-svn-id: http://code.djangoproject.com/svn/django/branches/releases/1.2.X@13733 bcc190cf-cafb-0310-a4f2-bffc1f526a37
1 parent 43988e9 commit 890b0b6

File tree

3 files changed

+37
-14
lines changed

3 files changed

+37
-14
lines changed

django/middleware/csrf.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from django.core.urlresolvers import get_callable
1414
from django.utils.cache import patch_vary_headers
1515
from django.utils.hashcompat import md5_constructor
16-
from django.utils.html import escape
1716
from django.utils.safestring import mark_safe
1817

1918
_POST_FORM_RE = \
@@ -53,8 +52,8 @@ def _make_legacy_session_token(session_id):
5352

5453
def get_token(request):
5554
"""
56-
Returns the the CSRF token required for a POST form. No assumptions should
57-
be made about what characters might be in the CSRF token.
55+
Returns the the CSRF token required for a POST form. The token is an
56+
alphanumeric value.
5857
5958
A side effect of calling this function is to make the the csrf_protect
6059
decorator and the CsrfViewMiddleware add a CSRF cookie and a 'Vary: Cookie'
@@ -65,6 +64,17 @@ def get_token(request):
6564
return request.META.get("CSRF_COOKIE", None)
6665

6766

67+
def _sanitize_token(token):
68+
# Allow only alphanum, and ensure we return a 'str' for the sake of the post
69+
# processing middleware.
70+
token = re.sub('[^a-zA-Z0-9]', '', str(token.decode('ascii', 'ignore')))
71+
if token == "":
72+
# In case the cookie has been truncated to nothing at some point.
73+
return _get_new_csrf_key()
74+
else:
75+
return token
76+
77+
6878
class CsrfViewMiddleware(object):
6979
"""
7080
Middleware that requires a present and correct csrfmiddlewaretoken
@@ -90,7 +100,10 @@ def accept():
90100
# request, so it's available to the view. We'll store it in a cookie when
91101
# we reach the response.
92102
try:
93-
request.META["CSRF_COOKIE"] = request.COOKIES[settings.CSRF_COOKIE_NAME]
103+
# In case of cookies from untrusted sources, we strip anything
104+
# dangerous at this point, so that the cookie + token will have the
105+
# same, sanitized value.
106+
request.META["CSRF_COOKIE"] = _sanitize_token(request.COOKIES[settings.CSRF_COOKIE_NAME])
94107
cookie_is_new = False
95108
except KeyError:
96109
# No cookie, so create one. This will be sent with the next
@@ -235,7 +248,7 @@ def add_csrf_field(match):
235248
"""Returns the matched <form> tag plus the added <input> element"""
236249
return mark_safe(match.group() + "<div style='display:none;'>" + \
237250
"<input type='hidden' " + idattributes.next() + \
238-
" name='csrfmiddlewaretoken' value='" + escape(csrf_token) + \
251+
" name='csrfmiddlewaretoken' value='" + csrf_token + \
239252
"' /></div>")
240253

241254
# Modify any POST forms

django/template/defaulttags.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from django.template import get_library, Library, InvalidTemplateLibrary
1010
from django.template.smartif import IfParser, Literal
1111
from django.conf import settings
12-
from django.utils.html import escape
1312
from django.utils.encoding import smart_str, smart_unicode
1413
from django.utils.safestring import mark_safe
1514

@@ -43,7 +42,7 @@ def render(self, context):
4342
if csrf_token == 'NOTPROVIDED':
4443
return mark_safe(u"")
4544
else:
46-
return mark_safe(u"<div style='display:none'><input type='hidden' name='csrfmiddlewaretoken' value='%s' /></div>" % escape(csrf_token))
45+
return mark_safe(u"<div style='display:none'><input type='hidden' name='csrfmiddlewaretoken' value='%s' /></div>" % csrf_token)
4746
else:
4847
# It's very probable that the token is missing because of
4948
# misconfiguration, so we raise a warning

tests/regressiontests/csrf_tests/tests.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,14 @@
66
from django.views.decorators.csrf import csrf_exempt, csrf_view_exempt
77
from django.core.context_processors import csrf
88
from django.contrib.sessions.middleware import SessionMiddleware
9-
from django.utils.html import escape
109
from django.utils.importlib import import_module
1110
from django.conf import settings
1211
from django.template import RequestContext, Template
1312

1413
# Response/views used for CsrfResponseMiddleware and CsrfViewMiddleware tests
1514
def post_form_response():
16-
resp = HttpResponse(content="""
17-
<html><body><form method="post"><input type="text" /></form></body></html>
15+
resp = HttpResponse(content=u"""
16+
<html><body><h1>\u00a1Unicode!<form method="post"><input type="text" /></form></body></html>
1817
""", mimetype="text/html")
1918
return resp
2019

@@ -58,8 +57,9 @@ def is_secure(self):
5857

5958
class CsrfMiddlewareTest(TestCase):
6059
# The csrf token is potentially from an untrusted source, so could have
61-
# characters that need escaping
62-
_csrf_id = "<1>"
60+
# characters that need dealing with.
61+
_csrf_id_cookie = "<1>\xc2\xa1"
62+
_csrf_id = "1"
6363

6464
# This is a valid session token for this ID and secret key. This was generated using
6565
# the old code that we're to be backwards-compatible with. Don't use the CSRF code
@@ -74,7 +74,7 @@ def _get_GET_no_csrf_cookie_request(self):
7474

7575
def _get_GET_csrf_cookie_request(self):
7676
req = TestingHttpRequest()
77-
req.COOKIES[settings.CSRF_COOKIE_NAME] = self._csrf_id
77+
req.COOKIES[settings.CSRF_COOKIE_NAME] = self._csrf_id_cookie
7878
return req
7979

8080
def _get_POST_csrf_cookie_request(self):
@@ -104,7 +104,7 @@ def _get_POST_session_request_no_token(self):
104104
return req
105105

106106
def _check_token_present(self, response, csrf_id=None):
107-
self.assertContains(response, "name='csrfmiddlewaretoken' value='%s'" % escape(csrf_id or self._csrf_id))
107+
self.assertContains(response, "name='csrfmiddlewaretoken' value='%s'" % (csrf_id or self._csrf_id))
108108

109109
# Check the post processing and outgoing cookie
110110
def test_process_response_no_csrf_cookie(self):
@@ -290,6 +290,17 @@ def test_token_node_no_csrf_cookie(self):
290290
resp = token_view(req)
291291
self.assertEquals(u"", resp.content)
292292

293+
def test_token_node_empty_csrf_cookie(self):
294+
"""
295+
Check that we get a new token if the csrf_cookie is the empty string
296+
"""
297+
req = self._get_GET_no_csrf_cookie_request()
298+
req.COOKIES[settings.CSRF_COOKIE_NAME] = ""
299+
CsrfViewMiddleware().process_view(req, token_view, (), {})
300+
resp = token_view(req)
301+
302+
self.assertNotEqual(u"", resp.content)
303+
293304
def test_token_node_with_csrf_cookie(self):
294305
"""
295306
Check that CsrfTokenNode works when a CSRF cookie is set

0 commit comments

Comments
 (0)