Hi!
I've now seen this assertion failure from lmtp (most frequent), pop3
(next frequent), and imap (rarely). It didn't start happening until the
number of accounts on the server was increased into the 10k-25k range.
It happens when index->file_lock is a read lock and mail_index_lock is
called asking for a write lock. Dovecot 2.x dies at the assertion on
line 62 of mail-index-lock.c If that assertion were removed, it would
likely die at the assertion on line 80.
Looking at the mail-index-lock.c from dovecot 1.2.15, it looks like the
1.2 series would enter the else case of the "kind of kludgy" block,
and
end up calling file_lock_try_update(), which seems like what needs to
happen. Dovecot 2.x only does a file_lock_try_update() when unlocking,
to demote a write lock to a read lock (according to grep).
# 2.0.5 (6ea01abfaea9): /etc/dovecot/dovecot.conf
# OS: Linux 2.6.35.6 x86_64 Debian 5.0.6
Oct 9 23:02:59 stor29 dovecot: lmtp(4793, user at example.com): Panic:
file mail-index-lock.c: line 62 (mail_index_lock): assertion failed:
(index->lock_type == F_UNLCK)
Oct 9 23:02:59 stor29 dovecot: lmtp(4793, user at example.com): Error: Raw
backtrace: /usr/lib/dovecot/libdovecot.so.0 [0x7f03e16ff842]
-> /usr/lib/dovecot/libdovecot.so.0 [0x7f03e16ff8aa]
-> /usr/lib/dovecot/libdovecot.so.0(i_error+0) [0x7f03e16ffc53]
-> /usr/lib/dovecot/libdovecot-storage.so.0 [0x7f03e1bce244]
->
/usr/lib/dovecot/libdovecot-storage.so.0(mail_index_try_lock_exclusive+0x20)
[0x7f03e1bce2d0] ->
/usr/lib/dovecot/libdovecot-storage.so.0(mail_index_write+0x31d)
[0x7f03e1be287d] ->
/usr/lib/dovecot/libdovecot-storage.so.0(mail_index_sync_commit+0x166)
[0x7f03e1bda3d6] -> /usr/lib/dovecot/libdovecot-storage.so.0 [0x7f03e1ba1d16]
->
/usr/lib/dovecot/libdovecot-storage.so.0(maildir_transaction_save_commit_pre+0x4d2)
[0x7f03e1b9d0b2] -> /usr/lib/dovecot/libdovecot-storage.so.0 [0x7f03e1b98213]
->
/usr/lib/dovecot/libdovecot-storage.so.0(mail_index_transaction_commit_full+0x97)
[0x7f03e1bd2017] ->
/usr/lib/dovecot/libdovecot-storage.so.0(index_transaction_commit+0x8b)
[0x7f03e1b97e9b] -> /usr/lib/dovecot/modules/lib10_quota_plugin.so
[0x7f03e0b44bd1] ->
/usr/lib/dovecot/libdovecot-storage.so.0(mailbox_transaction_commit_get_changes+0x3d)
[0x7f03e1b7722d] ->
/usr/lib/dovecot/libdovecot-lda.so.0(mail_deliver_save+0x176) [0x7f03e1942e96]
-> /usr/lib/dovecot/libdovecot-lda.so.0(mail_deliver+0xe3) [0x7f03e1943193]
-> dovecot/lmtp [0x40569e] ->
/usr/lib/dovecot/libdovecot.so.0(io_loop_handler_run+0xcb) [0x7f03e170a93b]
-> /usr/lib/dovecot/libdovecot.so.0(io_loop_run+0x18) [0x7f03e1709a58] ->
/usr/lib/dovecot/libdovecot.so.0(master_service_run+0x13) [0x7f03e16f8f93] ->
dovecot/lmtp(main+0x159) [0x404419] -> /lib/libc.so.6(__libc_start_main+0xe6)
[0x7f03e0f831a6] -> dovecot/lmtp [0x4041f9]
Oct 9 23:02:59 stor29 dovecot: master: Error: service(lmtp): child 4793
killed with signal 6 (core not dumped)
Here's a backtrace from a different lmtp process that I attached gdb to,
since lmtp didn't want to dump core:
#0 0x00007fe8054c7ed5 in raise () from /lib/libc.so.6
No symbol table info available.
#1 0x00007fe8054c93f3 in abort () from /lib/libc.so.6
No symbol table info available.
#2 0x00007fe805c30855 in default_fatal_finish (type=<value optimized
out>,
status=0) at failures.c:178
backtrace = 0x1a50bd0 "/usr/lib/dovecot/libdovecot.so.0
[0x7fe805c30842] -> /usr/lib/dovecot/libdovecot.so.0 [0x7fe805c308aa]
-> /usr/lib/dovecot/libdovecot.so.0(i_error+0) [0x7fe805c30c53]
-> /usr/lib/dovecot/libdovecot-st"...
#3 0x00007fe805c308aa in i_internal_fatal_handler
(type=LOG_TYPE_PANIC,
status=0, fmt=<value optimized out>, args=<value optimized out>)
at failures.c:607
No locals.
#4 0x00007fe805c30c53 in i_panic (format=<value optimized out>)
at failures.c:244
args = {{gp_offset = 40, fp_offset = 48,
overflow_arg_area = 0x7fff71f8cc40, reg_save_area = 0x7fff71f8cb80}}
#5 0x00007fe8060ff244 in mail_index_lock (index=0x1cf0f80,
lock_type=1,
timeout_secs=<value optimized out>, lock_id_r=0x7fff71f8cdac)
at mail-index-lock.c:62
ret = <value optimized out>
__FUNCTION__ = "mail_index_lock"
#6 0x00007fe8060ff2d0 in mail_index_try_lock_exclusive (index=0x4c10,
lock_id_r=<value optimized out>) at mail-index-lock.c:146
ret = <value optimized out>
#7 0x00007fe80611387d in mail_index_write (index=0x1cf0f80,
want_rotate=false)
at mail-index-write.c:226
map = (struct mail_index_map *) 0x1d0baf0
st = {st_dev = 37632, st_ino = 12898927743, st_nlink = 1,
st_mode = 33152, st_uid = 2500, st_gid = 2500, pad0 = 0, st_rdev = 0,
st_size = 260200, st_blksize = 4096, st_blocks = 512, st_atim = {
tv_sec = 1286434371, tv_nsec = 554727597}, st_mtim = {tv_sec 1286611467,
tv_nsec = 18456193}, st_ctim = {tv_sec = 1286611467, tv_nsec 18456193},
__unused = {0, 0, 0}}
lock_id = 0
ret = <value optimized out>
__FUNCTION__ = "mail_index_write"
#8 0x00007fe80610b3d6 in mail_index_sync_commit (_ctx=0x1cfd908)
at mail-index-sync.c:837
ctx = (struct mail_index_sync_ctx *) 0x1cd4590
index = (struct mail_index *) 0x1cf0f80
next_uid = 0
want_rotate = 16
index_undeleted = false
delete_index = false
ret = 0
#9 0x00007fe8060d2d16 in maildir_sync_index_finish (ctx=0x1cfd8f0,
success=<value optimized out>) at maildir-sync-index.c:332
mbox = (struct maildir_mailbox *) 0x1aa2af0
time_diff = <value optimized out>
ret = 0
#10 0x00007fe8060ce0b2 in maildir_transaction_save_commit_pre
(_ctx=0x1d47a40)
at maildir-save.c:984
_t = (struct mailbox_transaction_context *) 0x1d0bd80
sync_flags = <value optimized out>
ret = <value optimized out>
__FUNCTION__ = "maildir_transaction_save_commit_pre"
#11 0x00007fe8060c9213 in index_transaction_index_commit (
index_trans=0x1afbad0, result_r=0x7fff71f8d090) at
index-transaction.c:27
it = (struct index_transaction_context *) 0x1d0bd80
ibox = (struct index_mailbox_context *) 0x1aa2eb8
ret = <value optimized out>
__FUNCTION__ = "index_transaction_index_commit"
#12 0x00007fe806103017 in mail_index_transaction_commit_full (
_t=0x7fff71f8d0b8, result_r=0x7fff71f8d090) at
mail-index-transaction.c:234
t = (struct mail_index_transaction *) 0x1afbad0
index = (struct mail_index *) 0x1cf0f80
index_undeleted = false
#13 0x00007fe8060c8e9b in index_transaction_commit (t=0x1d0bd80,
changes_r=0x7fff71f8d160) at index-transaction.c:123
box = (struct mailbox *) 0x1aa2af0
itrans = (struct mail_index_transaction *) 0x0
result = {log_file_seq = 0, log_file_offset = 0, commit_size 0,
ignored_modseq_changes = 0}
ret = <value optimized out>
#14 0x00007fe805075bd1 in quota_mailbox_transaction_commit
(ctx=0x1d0bd80,
changes_r=0x7fff71f8d160) at quota-storage.c:94
qt = (struct quota_transaction_context *) 0x1cfd610
#15 0x00007fe8060a822d in mailbox_transaction_commit_get_changes (
_t=<value optimized out>, changes_r=0x7fff71f8d160) at
mail-storage.c:1215
_data_stack_cur_id = 3
t = (struct mailbox_transaction_context *) 0x1d0bd80
ret = <value optimized out>
#16 0x00007fe805e73e96 in mail_deliver_save (ctx=0x7fff71f8d250,
mailbox=0x407ba1 "INBOX", flags=<value optimized out>,
keywords=<value optimized out>, storage_r=0x7fff71f8d2c8)
at mail-deliver.c:219
open_ctx = {user = 0x1ce6980, lda_mailbox_autocreate = false,
lda_mailbox_autosubscribe = false}
box = (struct mailbox *) 0x1aa2af0
trans_flags = <value optimized out>
t = (struct mailbox_transaction_context *) 0x0
save_ctx = (struct mail_save_context *) 0x0
kw = (struct mail_keywords *) 0x0
error = MAIL_ERROR_NONE
mailbox_name = 0x407ba1 "INBOX"
errstr = <value optimized out>
changes = {pool = 0x1cd3fb0, uid_validity = 1233133548,
saved_uids = {
arr = {buffer = 0x1cd3fd0, element_size = 8}, v = 0x1cd3fd0,
v_modifiable = 0x1cd3fd0}, ignored_modseq_changes = 0}
range = <value optimized out>
ret = 0
__FUNCTION__ = "mail_deliver_save"
#17 0x00007fe805e74193 in mail_deliver (ctx=0x7fff71f8d250,
storage_r=0x7fff71f8d2c8) at mail-deliver.c:289
ret = -1
#18 0x000000000040569e in client_input_data_handle (client=0x1a87520)
at commands.c:476
data = <value optimized out>
size = 1662
ret = <value optimized out>
#19 0x00007fe805c3b93b in io_loop_handler_run (ioloop=<value optimized
out>)
at ioloop-epoll.c:212
ctx = (struct ioloop_handler_context *) 0x1a5f480
event = (const struct epoll_event *) 0x1a5f4f0
list = (struct io_list *) 0x1a60170
io = (struct io_file *) 0x1b5aab0
tv = {tv_sec = 299, tv_usec = 998518}
t_id = 2
msecs = <value optimized out>
ret = 1
call = <value optimized out>
#20 0x00007fe805c3aa58 in io_loop_run (ioloop=0x1a585b0) at ioloop.c:350
No locals.
#21 0x00007fe805c29f93 in master_service_run (service=0x1a58480,
callback=0x4c10) at master-service.c:496
22 0x0000000000404419 in main (argc=1, argv=0x1a58370) at main.c:120
set_roots = {0x6098a0, 0x408320, 0x0}
service_flags = <value optimized out>
storage_service_flags = 675
c = <value optimized out>
And here's index and index->file_lock from mail_index_lock:
(gdb) p *index
$6 = {dir = 0x1b54380 "/w/c/3/d/808008/Maildir",
prefix = 0x1d04430 "dovecot.index", cache = 0x1d0b8b0, log
0x1c6afb0,
open_count = 2, flags = MAIL_INDEX_OPEN_FLAG_CREATE,
fsync_mode = FSYNC_MODE_OPTIMIZED, fsync_mask = 0, mode = 384,
gid = 4294967295, gid_origin = 0x1b5e270 "/w/c/3/d/808008/Maildir",
extension_pool = 0x1c32130, extensions = {arr = {buffer = 0x1c32150,
element_size = 64}, v = 0x1c32150, v_modifiable = 0x1c32150},
sync_lost_handlers = {arr = {buffer = 0x1c06e60, element_size = 8},
v = 0x1c06e60, v_modifiable = 0x1c06e60},
filepath = 0x1bd71b0 "/w/c/3/d/808008/Maildir/dovecot.index", fd 14,
map = 0x1d0baf0, indexid = 1286433474, inconsistency_id = 0,
last_read_log_file_seq = 3, last_read_log_file_head_offset = 32852,
last_read_log_file_tail_offset = 32852, last_read_stat = {st_dev 37632,
st_ino = 12898927743, st_nlink = 1, st_mode = 33152, st_uid = 2500,
st_gid = 2500, pad0 = 0, st_rdev = 0, st_size = 260200, st_blksize 4096,
st_blocks = 512, st_atim = {tv_sec = 1286434371, tv_nsec 554727597},
st_mtim = {tv_sec = 1286611467, tv_nsec = 18456193}, st_ctim = {
tv_sec = 1286611467, tv_nsec = 18456193}, __unused = {0, 0, 0}},
fsck_log_head_file_seq = 0, fsck_log_head_file_offset = 0,
sync_commit_result = 0x0, lock_type = 0, shared_lock_count = 1,
excl_lock_count = 0, lock_id_counter = 4,
lock_method = FILE_LOCK_METHOD_FCNTL, max_lock_timeout_secs 4294967295,
file_lock = 0x1b5e290, dotlock = 0x0, keywords_pool = 0x1b5dfb0,
keywords = {
arr = {buffer = 0x1b5e1a0, element_size = 8}, v = 0x1b5e1a0,
v_modifiable = 0x1b5e1a0}, keywords_hash = 0x1c6af60,
keywords_ext_id = 0,
modseq_ext_id = 1, module_contexts = {arr = {buffer = 0x1ac22b0,
element_size = 8}, v = 0x1ac22b0, v_modifiable = 0x1ac22b0},
error = 0x0, nodiskspace = 0, index_lock_timeout = 0,
index_delete_requested = 0, index_deleted = 0, log_sync_locked = 1,
readonly = 0, mapping = 0, syncing = 1, need_recreate = 0,
index_min_write = 0, modseqs_enabled = 0, initial_create = 0,
initial_mapped = 1}
(gdb) p *index->file_lock
$7 = {fd = 14, path = 0x1d0bac0
"/w/c/3/d/808008/Maildir/dovecot.index",
lock_type = 0, lock_method = FILE_LOCK_METHOD_FCNTL}
I can easily reproduce this currently if more information is needed.
--
Ryan Murray <rmurray at cyberhqz.com>