Eric Wong
2011-May-18 21:13 UTC
[Mongrel-development] [PATCH 0/2] strip leading/trailing linear whitespace in headers
Hello, I''ve pushed the following two changes to my git repo, based on "master" of git://github.com/fauna/mongrel.git [PATCH 1/2] ragel.rake: rebuild on http11_parser_common.rl changes [PATCH 2/2] strip trailing and leading linear whitespace in headers The first patch is a trivial rake dependency fix. I could definitely use a more pairs of eyes to review my second patch which I''ve included inline below. I''m also planning this for the unicorn and kcar projects. RFC 2616, section 4.2:> The field-content does not include any leading or trailing LWS: > linear white space occurring before the first non-whitespace > character of the field-value or after the last non-whitespace > character of the field-value. Such leading or trailing LWS MAY be > removed without changing the semantics of the field value. Any LWS > that occurs between field-content MAY be replaced with a single SP > before interpreting the field value or forwarding the message > downstream.--- You can pull from my repo here: git pull git://bogomips.org/mongrel.git http11-lws If you like web browsers (I don''t) you can view the changes here: http://bogomips.org/mongrel.git?h=http11-lws ext/http11/http11_parser.c | 435 +++++++++++++++++++----------------- ext/http11/http11_parser_common.rl | 5 +- test/test_http11.rb | 66 ++++++ 3 files changed, 304 insertions(+), 202 deletions(-) (removed ext/http11/http11_parser.c diff for brevity) diff --git a/ext/http11/http11_parser_common.rl b/ext/http11/http11_parser_common.rl index 53c805f..2ef1d31 100644 --- a/ext/http11/http11_parser_common.rl +++ b/ext/http11/http11_parser_common.rl @@ -18,6 +18,7 @@ uchar = (unreserved | escape); pchar = (uchar | ":" | "@" | "&" | "=" | "+"); tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\"" | "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t"); + lws = (" " | "\t"); # elements token = (ascii -- (CTL | tspecials)); @@ -43,9 +44,9 @@ field_name = ( token -- ":" )+ >start_field $snake_upcase_field %write_field; - field_value = any* >start_value %write_value; + field_value = (""|(any*(any -- lws))) >start_value %write_value; - message_header = field_name ":" " "* field_value :> CRLF; + message_header = field_name ":" lws* field_value lws* :> CRLF; Request = Request_Line ( message_header )* ( CRLF @done ); diff --git a/test/test_http11.rb b/test/test_http11.rb index da311af..2b55909 100644 --- a/test/test_http11.rb +++ b/test/test_http11.rb @@ -81,6 +81,72 @@ class HttpParserTest < Test::Unit::TestCase assert_equal ''posts-17408'', req[''FRAGMENT''] end + def test_leading_tab + parser = HttpParser.new + req = {} + get = "GET / HTTP/1.1\r\nHost:\texample.com\r\n\r\n" + assert_nothing_raised do + parser.execute(req, get, 0) + end + assert parser.finished? + assert_equal ''example.com'', req[''HTTP_HOST''] + end + + def test_trailing_whitespace + parser = HttpParser.new + req = {} + get = "GET / HTTP/1.1\r\nHost: example.com \r\n\r\n" + assert_nothing_raised do + parser.execute(req, get, 0) + end + assert parser.finished? + assert_equal ''example.com'', req[''HTTP_HOST''] + end + + def test_trailing_tab + parser = HttpParser.new + req = {} + get = "GET / HTTP/1.1\r\nHost: example.com\t\r\n\r\n" + assert_nothing_raised do + parser.execute(req, get, 0) + end + assert parser.finished? + assert_equal ''example.com'', req[''HTTP_HOST''] + end + + def test_trailing_multiple_linear_whitespace + parser = HttpParser.new + req = {} + get = "GET / HTTP/1.1\r\nHost: example.com\t \t \t\r\n\r\n" + assert_nothing_raised do + parser.execute(req, get, 0) + end + assert parser.finished? + assert_equal ''example.com'', req[''HTTP_HOST''] + end + + def test_embedded_linear_whitespace_ok + parser = HttpParser.new + req = {} + get = "GET / HTTP/1.1\r\nX-Space: hello\t world\t \r\n\r\n" + assert_nothing_raised do + parser.execute(req, get, 0) + end + assert parser.finished? + assert_equal "hello\t world", req["HTTP_X_SPACE"] + end + + def test_empty_header + parser = HttpParser.new + req = {} + get = "GET / HTTP/1.1\r\nHost: \r\n\r\n" + assert_nothing_raised do + parser.execute(req, get, 0) + end + assert parser.finished? + assert_equal '''', req[''HTTP_HOST''] + end + # lame random garbage maker def rand_data(min, max, readable=true) count = min + ((rand(max)+1) *10).to_i -- Eric Wong