Hello, I borrowed dovecot parser for email addresses and I'm using it in my new perl module Email::Address::XS for parsing & formatting list of email addresses. That perl module is available at github [1]. During implementation and testing I found bugs in dovecot code. So I'm sending my patches (together with tests) which I'm using in my perl module. Lot of other (normal & corner) test cases are part of that perl module [2], so if you are interested feel free to reuse them. Because writing new test cases for dovecot is hard for me, I'm not going to do it. In perl for perl modules it is a lot of easier for me. [1] - https://github.com/pali/Email-Address-XS [2] - https://github.com/pali/Email-Address-XS/blob/master/t/Email-Address-XS.t Changes since v1: * Updated description with test example * Rebased on top of master branch Pali Roh?r (7): lib-mail: message_address_write: Fix generating empty group list lib-mail: message_address_write: Fix generating group list with empty name lib-mail: parse_addr_spec: Like in rfc822_skip_comment() check if last_comment is not NULL lib-mail: parse_addr_spec: Email address without local-part is invalid lib-mail: parse_mailbox: Set display name instead mailbox when parsing failed lib-mail: message_address_write: Quote and escape strings if needed lib-mail: Update tests for message address src/lib-mail/message-address.c | 99 +++++++++++++++++++++++++++++++---- src/lib-mail/test-message-address.c | 11 +++- 2 files changed, 98 insertions(+), 12 deletions(-) -- 1.7.9.5
Pali Rohár
2016-Jun-05 13:48 UTC
[PATCH v2 1/7] lib-mail: message_address_write: Fix generating empty group list
Empty group list ends with ": " not with ", ". Test case: { { name = NULL, mailbox = "group", domain = NULL }, { name = NULL, mailbox = NULL, domain = NULL } } converts to: group:; --- src/lib-mail/message-address.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/lib-mail/message-address.c b/src/lib-mail/message-address.c index 36cb483..efa91fd 100644 --- a/src/lib-mail/message-address.c +++ b/src/lib-mail/message-address.c @@ -340,6 +340,7 @@ message_address_parse(pool_t pool, const unsigned char *data, size_t size, void message_address_write(string_t *str, const struct message_address *addr) { + const char *tmp; bool first = TRUE, in_group = FALSE; /* a) mailbox at domain @@ -365,7 +366,12 @@ void message_address_write(string_t *str, const struct message_address *addr) i_assert(addr->mailbox == NULL); /* cut out the ", " */ - str_truncate(str, str_len(str)-2); + tmp = str_c(str)+str_len(str)-2; + i_assert((tmp[0] == ',' || tmp[0] == ':') && tmp[1] == ' '); + if (tmp[0] == ',' && tmp[1] == ' ') + str_truncate(str, str_len(str)-2); + else if (tmp[0] == ':' && tmp[1] == ' ') + str_truncate(str, str_len(str)-1); str_append_c(str, ';'); } -- 1.7.9.5
Pali Rohár
2016-Jun-05 13:48 UTC
[PATCH v2 2/7] lib-mail: message_address_write: Fix generating group list with empty name
Empty name for group list must be quoted. Test case: { { name = NULL, mailbox = "", domain = NULL }, { name = NULL, mailbox = NULL, domain = NULL } } converts to: "":; --- src/lib-mail/message-address.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/lib-mail/message-address.c b/src/lib-mail/message-address.c index efa91fd..9ce4a55 100644 --- a/src/lib-mail/message-address.c +++ b/src/lib-mail/message-address.c @@ -357,8 +357,12 @@ void message_address_write(string_t *str, const struct message_address *addr) if (!in_group) { /* beginning of group. mailbox is the group name, others are NULL. */ - if (addr->mailbox != NULL) + if (addr->mailbox != NULL && *addr->mailbox != '\0') { str_append(str, addr->mailbox); + } else { + /* empty group name needs to be quoted */ + str_append(str, "\"\""); + } str_append(str, ": "); first = TRUE; } else { -- 1.7.9.5
Pali Rohár
2016-Jun-05 13:48 UTC
[PATCH v2 3/7] lib-mail: parse_addr_spec: Like in rfc822_skip_comment() check if last_comment is not NULL
This will fix possible NULL pointer dereference when caller does not set last_comment. --- src/lib-mail/message-address.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/lib-mail/message-address.c b/src/lib-mail/message-address.c index 9ce4a55..4e86185 100644 --- a/src/lib-mail/message-address.c +++ b/src/lib-mail/message-address.c @@ -158,7 +158,8 @@ static int parse_addr_spec(struct message_address_parser_context *ctx) /* addr-spec = local-part "@" domain */ int ret, ret2; - str_truncate(ctx->parser.last_comment, 0); + if (ctx->parser.last_comment != NULL) + str_truncate(ctx->parser.last_comment, 0); ret = parse_local_part(ctx); if (ret != 0 && *ctx->parser.data == '@') { @@ -167,9 +168,11 @@ static int parse_addr_spec(struct message_address_parser_context *ctx) ret = ret2; } - if (str_len(ctx->parser.last_comment) > 0) { - ctx->addr.name - p_strdup(ctx->pool, str_c(ctx->parser.last_comment)); + if (ctx->parser.last_comment != NULL) { + if (str_len(ctx->parser.last_comment) > 0) { + ctx->addr.name + p_strdup(ctx->pool, str_c(ctx->parser.last_comment)); + } } return ret; } -- 1.7.9.5
Pali Rohár
2016-Jun-05 13:48 UTC
[PATCH v2 4/7] lib-mail: parse_addr_spec: Email address without local-part is invalid
Add explicit invalid_syntax flag also when end of input occure because address is without domain invalid and in this case it was not correctly propagated. --- src/lib-mail/message-address.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/lib-mail/message-address.c b/src/lib-mail/message-address.c index 4e86185..93b7c83 100644 --- a/src/lib-mail/message-address.c +++ b/src/lib-mail/message-address.c @@ -162,6 +162,10 @@ static int parse_addr_spec(struct message_address_parser_context *ctx) str_truncate(ctx->parser.last_comment, 0); ret = parse_local_part(ctx); + if (ret <= 0) { + /* end of input or parsing local-part failed */ + ctx->addr.invalid_syntax = TRUE; + } if (ret != 0 && *ctx->parser.data == '@') { ret2 = parse_domain(ctx); if (ret2 <= 0) -- 1.7.9.5
Pali Rohár
2016-Jun-05 13:48 UTC
[PATCH v2 5/7] lib-mail: parse_mailbox: Set display name instead mailbox when parsing failed
It does not make sense to set mailbox without domain on incorrect input. Rather set display name which is more likely useable value. Test case: test is parsed as: { name = "test", mailbox = NULL, domain = NULL } --- src/lib-mail/message-address.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/lib-mail/message-address.c b/src/lib-mail/message-address.c index 93b7c83..54d4ee1 100644 --- a/src/lib-mail/message-address.c +++ b/src/lib-mail/message-address.c @@ -205,6 +205,10 @@ static int parse_mailbox(struct message_address_parser_context *ctx) /* nope, should be addr-spec */ ctx->parser.data = start; ret = parse_addr_spec(ctx); + if (ctx->addr.invalid_syntax && !ctx->addr.name && ctx->addr.mailbox && !ctx->addr.domain) { + ctx->addr.name = ctx->addr.mailbox; + ctx->addr.mailbox = NULL; + } } if (ret < 0) -- 1.7.9.5
Pali Rohár
2016-Jun-05 13:48 UTC
[PATCH v2 6/7] lib-mail: message_address_write: Quote and escape strings if needed
ATEXT characters must be properly quoted when are in phrase. Test case: { name = "test\"test", mailbox = "user", domain = "host" } converts to: "test\"test" <user at host> --- src/lib-mail/message-address.c | 66 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 4 deletions(-) diff --git a/src/lib-mail/message-address.c b/src/lib-mail/message-address.c index 54d4ee1..7d6356c 100644 --- a/src/lib-mail/message-address.c +++ b/src/lib-mail/message-address.c @@ -2,6 +2,7 @@ #include "lib.h" #include "str.h" +#include "strescape.h" #include "message-parser.h" #include "message-address.h" #include "rfc822-parser.h" @@ -32,6 +33,49 @@ static void add_address(struct message_address_parser_context *ctx) ctx->last_addr = addr; } +/* quote with "" and escape all '\', '"' and "'" characters if need */ +static void str_append_maybe_escape(string_t *dest, const char *cstr, bool escape_dot) +{ + const char *p; + + /* see if we need to quote it */ + for (p = cstr; *p != '\0'; p++) { + if (!IS_ATEXT(*p) && (escape_dot || *p != '.')) + break; + } + + if (*p == '\0') { + str_append_data(dest, cstr, (size_t) (p - cstr)); + return; + } + + /* see if we need to escape it */ + for (p = cstr; *p != '\0'; p++) { + if (IS_ESCAPED_CHAR(*p)) + break; + } + + if (*p == '\0') { + /* only quote */ + str_append_c(dest, '"'); + str_append_data(dest, cstr, (size_t) (p - cstr)); + str_append_c(dest, '"'); + return; + } + + /* quote and escape */ + str_append_c(dest, '"'); + str_append_data(dest, cstr, (size_t) (p - cstr)); + + for (; *p != '\0'; p++) { + if (IS_ESCAPED_CHAR(*p)) + str_append_c(dest, '\\'); + str_append_c(dest, *p); + } + + str_append_c(dest, '"'); +} + static int parse_local_part(struct message_address_parser_context *ctx) { int ret; @@ -369,7 +413,14 @@ void message_address_write(string_t *str, const struct message_address *addr) /* beginning of group. mailbox is the group name, others are NULL. */ if (addr->mailbox != NULL && *addr->mailbox != '\0') { - str_append(str, addr->mailbox); + /* check for MIME encoded-word */ + if (strstr(addr->mailbox, "=?")) + /* MIME encoded-word MUST NOT appear within a 'quoted-string' + so escaping and quoting of phrase is not possible, instead + use obsolete RFC822 phrase syntax which allow spaces */ + str_append(str, addr->mailbox); + else + str_append_maybe_escape(str, addr->mailbox, TRUE); } else { /* empty group name needs to be quoted */ str_append(str, "\"\""); @@ -396,7 +447,7 @@ void message_address_write(string_t *str, const struct message_address *addr) /* no name and no route. use only mailbox at domain */ i_assert(addr->mailbox != NULL); - str_append(str, addr->mailbox); + str_append_maybe_escape(str, addr->mailbox, FALSE); str_append_c(str, '@'); str_append(str, addr->domain); } else { @@ -404,7 +455,14 @@ void message_address_write(string_t *str, const struct message_address *addr) i_assert(addr->mailbox != NULL); if (addr->name != NULL) { - str_append(str, addr->name); + /* check for MIME encoded-word */ + if (strstr(addr->name, "=?")) + /* MIME encoded-word MUST NOT appear within a 'quoted-string' + so escaping and quoting of phrase is not possible, instead + use obsolete RFC822 phrase syntax which allow spaces */ + str_append(str, addr->name); + else + str_append_maybe_escape(str, addr->name, TRUE); str_append_c(str, ' '); } str_append_c(str, '<'); @@ -412,7 +470,7 @@ void message_address_write(string_t *str, const struct message_address *addr) str_append(str, addr->route); str_append_c(str, ':'); } - str_append(str, addr->mailbox); + str_append_maybe_escape(str, addr->mailbox, FALSE); str_append_c(str, '@'); str_append(str, addr->domain); str_append_c(str, '>'); -- 1.7.9.5
Pali Rohár
2016-Jun-05 13:48 UTC
[PATCH v2 7/7] lib-mail: Update tests for message address
--- src/lib-mail/test-message-address.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/lib-mail/test-message-address.c b/src/lib-mail/test-message-address.c index 9bbf29e..bf85806 100644 --- a/src/lib-mail/test-message-address.c +++ b/src/lib-mail/test-message-address.c @@ -20,11 +20,13 @@ static void test_message_address(void) static const char *input[] = { "user at domain", NULL, "<user at domain>", "user at domain", - "foo bar <user at domain>", NULL, - "\"foo bar\" <user at domain>", "foo bar <user at domain>", + "foo bar <user at domain>", "\"foo bar\" <user at domain>", + "\"foo bar\" <user at domain>", NULL, + "\"foo: <a at b>;,\" <user at domain>", NULL, "<@route:user at domain>", NULL, "<@route at route2:user at domain>", "<@route, at route2:user at domain>", "hello <@route , at route2:user at domain>", "hello <@route, at route2:user at domain>", + "hello", NULL, "user (hello)", NULL, "hello <user>", NULL, "@domain", NULL @@ -40,9 +42,11 @@ static void test_message_address(void) { NULL, NULL, NULL, "user", "domain", FALSE }, { NULL, "foo bar", NULL, "user", "domain", FALSE }, { NULL, "foo bar", NULL, "user", "domain", FALSE }, + { NULL, "foo: <a at b>;,", NULL, "user", "domain", FALSE }, { NULL, NULL, "@route", "user", "domain", FALSE }, { NULL, NULL, "@route, at route2", "user", "domain", FALSE }, { NULL, "hello", "@route, at route2", "user", "domain", FALSE }, + { NULL, "hello", NULL, "", "", TRUE }, { NULL, "hello", NULL, "user", "", TRUE }, { NULL, "hello", NULL, "user", "", TRUE }, { NULL, NULL, NULL, "", "domain", TRUE } @@ -104,10 +108,13 @@ static void test_message_address(void) str_append(group, "group:;"); addr = message_address_parse(pool_datastack_create(), str_data(group), str_len(group), UINT_MAX, FALSE); + str_truncate(str, 0); + message_address_write(str, addr); test_assert(addr != NULL && cmp_addr(addr, &group_prefix)); addr = addr->next; test_assert(addr != NULL && addr->next == NULL && cmp_addr(addr, &group_suffix)); + test_assert(strcmp(str_c(str), "group:;") == 0); test_end(); } -- 1.7.9.5