Alexander Moisseev
2016-May-16 06:27 UTC
SIS: dovecot creates duplicated attachments (sometimes)
SIS deduplication was broken in 2.2.16 and has been fixed with
https://github.com/dovecot/core/commit/3b39022ea0513363241cf852b7d454c841584ea1
but still sometimes (just several times in a month or so) dovecot creates
duplicated attachments.
As you can see in directory listings below all attachments was created at
(about) the same time, but one of them has different inode number.
It easy to discover using
https://github.com/moisseev/doveadm-tools/blob/master/bin/dsisck
# dsisck -n
mail_uid=vmail
mail_attachment_dir=/vmail/attachments
==> Checking SIS...
# ln -f
./30/b3/30b367c584a123eee59478adf3e4f4c9e1226545-c56eae04a67c3157287f01003d96bafd
./30/b3/30b367c584a123eee59478adf3e4f4c9e1226545-7ad0a411d17c31572b7a01003d96bafd
# mkdir -p -m 700 ./64/8f/hashes && ln
./64/8f/648f5cfa27af6d20c8570fdcaeab997663e15105-55430d2cd4432c571cb600003d96bafd
./64/8f/hashes/648f5cfa27af6d20c8570fdcaeab997663e15105
# ln -f
./6f/3f/6f3fa3e4d374a9c80d07af54960ce0e7adb2e0fe-b1935817ed753557546700003d96bafd
./6f/3f/6f3fa3e4d374a9c80d07af54960ce0e7adb2e0fe-72da1f32e38e3557947b00003d96bafd
# ln -f
./dd/d9/ddd97aa6f624d4f54968d2c4956fc3a9d796b31b-6bf7c122fefb3157ad3500003d96bafd
./dd/d9/ddd97aa6f624d4f54968d2c4956fc3a9d796b31b-90fd1415e7e53157112800003d96bafd
--------------------------------------------------------------
Unexpected objects found: 0
Attachments processed: 43500
Different attachments with similar hashes skipped: 0
Attachments deduplicated: 3
File system blocks freed up: 1800
Attachment deduplication attempts failed: 0
Hash files created: 1
Hash files creation attempts failed: 0
Hash files re-linked: 0
Hash files re-link attempts failed: 0
Orphaned hash files: 0
Invalid hash files (not a regular file): 0
--------------------------------------------------------------
# cd /vmail/attachments
# ls -li ./30/b3/ ./30/b3/hashes/
./30/b3/:
total 1996
7464083 -rw------- 1 vmail vmail 337719 May 10 09:16
30b367c584a123eee59478adf3e4f4c9e1226545-7ad0a411d17c31572b7a01003d96bafd
7464082 -rw------- 6 vmail vmail 337719 May 10 09:16
30b367c584a123eee59478adf3e4f4c9e1226545-bb6eae04a67c3157287f01003d96bafd
7464082 -rw------- 6 vmail vmail 337719 May 10 09:16
30b367c584a123eee59478adf3e4f4c9e1226545-bd6eae04a67c3157287f01003d96bafd
7464082 -rw------- 6 vmail vmail 337719 May 10 09:16
30b367c584a123eee59478adf3e4f4c9e1226545-bf6eae04a67c3157287f01003d96bafd
7464082 -rw------- 6 vmail vmail 337719 May 10 09:16
30b367c584a123eee59478adf3e4f4c9e1226545-c36eae04a67c3157287f01003d96bafd
7464082 -rw------- 6 vmail vmail 337719 May 10 09:16
30b367c584a123eee59478adf3e4f4c9e1226545-c56eae04a67c3157287f01003d96bafd
7464084 drwx------ 2 vmail vmail 512 May 10 09:16 hashes
./30/b3/hashes/:
total 332
7464082 -rw------- 6 vmail vmail 337719 May 10 09:16
30b367c584a123eee59478adf3e4f4c9e1226545
# ls -li ./6f/3f/ ./6f/3f/hashes/
./6f/3f/:
total 644
15088414 -rw------- 1 vmail vmail 326656 May 13 11:23
6f3fa3e4d374a9c80d07af54960ce0e7adb2e0fe-72da1f32e38e3557947b00003d96bafd
15088422 -rw------- 2 vmail vmail 326656 May 13 11:23
6f3fa3e4d374a9c80d07af54960ce0e7adb2e0fe-b1935817ed753557546700003d96bafd
15088448 drwx------ 2 vmail vmail 512 May 13 11:23 hashes
./6f/3f/hashes/:
total 320
15088422 -rw------- 2 vmail vmail 326656 May 13 11:23
6f3fa3e4d374a9c80d07af54960ce0e7adb2e0fe
# ls -li ./dd/d9/ ./dd/d9/hashes/
./dd/d9/:
total 748
80548 -rw------- 3 vmail vmail 250640 May 10 18:19
ddd97aa6f624d4f54968d2c4956fc3a9d796b31b-68f7c122fefb3157ad3500003d96bafd
80548 -rw------- 3 vmail vmail 250640 May 10 18:19
ddd97aa6f624d4f54968d2c4956fc3a9d796b31b-6bf7c122fefb3157ad3500003d96bafd
80547 -rw------- 1 vmail vmail 250640 May 10 18:19
ddd97aa6f624d4f54968d2c4956fc3a9d796b31b-90fd1415e7e53157112800003d96bafd
80549 drwx------ 2 vmail vmail 512 May 10 18:19 hashes
./dd/d9/hashes/:
total 248
80548 -rw------- 3 vmail vmail 250640 May 10 18:19
ddd97aa6f624d4f54968d2c4956fc3a9d796b31b
# doveconf -n
# 2.2.24 (a82c823): /usr/local/etc/dovecot/dovecot.conf
# Pigeonhole version 0.4.14 (099a97c)
# OS: FreeBSD 10.3-RELEASE i386
auth_default_realm = example.com
auth_mechanisms = digest-md5 cram-md5 plain apop
doveadm_password = # hidden, use -P to show it
first_valid_gid = 1000
first_valid_uid = 1000
lda_mailbox_autosubscribe = yes
listen = *
mail_attachment_dir = /vmail/attachments
mail_gid = vmail
mail_home = /vmail/%d/%n
mail_location = mdbox:~/mdbox
mail_plugins = quota zlib acl
mail_shared_explicit_inbox = yes
mail_uid = vmail
managesieve_notify_capability = mailto
managesieve_sieve_capability = fileinto reject envelope encoded-character
vacation subaddress comparator-i;ascii-numeric relational regex imap4flags copy
include variables body enotify environment mailbox date index ihave duplicate
mime foreverypart extracttext
namespace {
list = children
location = mdbox:%%h/mdbox:INDEXPVT=~/mdbox/shared/%%u
prefix = shared/%%u/
separator = /
subscriptions = no
type = shared
}
namespace inbox {
inbox = yes
location mailbox Archives {
special_use = \Archive
}
mailbox Drafts {
special_use = \Drafts
}
mailbox Junk {
auto = subscribe
special_use = \Junk
}
mailbox Sent {
special_use = \Sent
}
mailbox Trash {
special_use = \Trash
}
prefix separator = /
}
passdb {
args = scheme=plain username_format=%n
/usr/local/etc/dovecot/dovecot.auth/%d.passwd
driver = passwd-file
}
plugin {
acl = vfile
acl_shared_dict = file:/vmail/shared-mailboxes.db
antispam_backend = mailtrain
antispam_mail_notspam = report_ham
antispam_mail_sendmail = /usr/local/libexec/dovecot/dovecot-lda
antispam_mail_sendmail_args = -d;spam at example.com;-m
antispam_mail_spam = report_spam
antispam_spam = Junk
antispam_trash = Trash;train_ham;train_prob;train_spam
quota = dict:User quota::file:%h/dovecot-quota
quota_rule = *:storage=2G
quota_rule2 = Trash:storage=+10%%
quota_rule3 = Spam:storage=+20%%
quota_status_nouser = DUNNO
quota_status_success = DUNNO
sieve_after = /usr/local/etc/dovecot/sieve/sieve.after
sieve_before = /usr/local/etc/dovecot/sieve/sieve.before
sieve_vacation_min_period = 0
zlib_save = gz
zlib_save_level = 3
}
postmaster_address = postmaster at example.com
protocols = imap lmtp sieve pop3
quota_full_tempfail = yes
service auth {
unix_listener /var/spool/postfix/private/auth {
group = postfix
mode = 0660
user = postfix
}
unix_listener auth-userdb {
user = vmail
}
}
service config {
unix_listener config {
mode = 0600
user = vmail
}
}
service imap-login {
inet_listener imaps {
port = 0
}
process_limit = 200
}
service lmtp {
unix_listener /var/spool/postfix/private/dovecot-lmtp {
group = postfix
mode = 0660
user = postfix
}
user = vmail
}
service pop3-login {
inet_listener pop3s {
port = 0
}
}
service quota-status {
client_limit = 1
executable = quota-status -p postfix
unix_listener /var/spool/postfix/private/quota-status {
user = postfix
}
}
ssl_cert = </etc/ssl/certs/mx.example.com.crt
ssl_key = </etc/ssl/private/mx.example.com.key
userdb {
args = username_format=%n /usr/local/etc/dovecot/dovecot.auth/%d.passwd
driver = passwd-file
}
verbose_proctitle = yes
protocol imap {
mail_plugins = quota zlib acl antispam imap_quota imap_zlib imap_acl
}
protocol lmtp {
mail_plugins = quota zlib acl sieve
}
Alexander Moisseev
2018-Jul-20 07:25 UTC
SIS: dovecot creates duplicated attachments (sometimes)
It's been 2 years since my previous post, but the issue is still present (Dovecot 2.3.2). Long story short, SIS attachment deduplication was broken in Dovecot v2.2.16. When it was fixed in v2.2.22 I wrote this script to deduplicate attachments on my servers back: https://github.com/moisseev/doveadm-tools/blob/master/bin/dsisck Sometimes I run the script and it finds new duplicated attachments (no too many). If you check these attachments, you find out that the attachments has the same hash, the same size, match if compare them byte by byte, were created at the same time, but one of them has a different inode number. For instance, 3 days ago it was: Attachments processed: 31535 Duplicated attachments: 370 Missed hash files: 1313 And today: Attachments processed: 32368 Duplicated attachments: 381 Missed hash files: 1317 So, 11 new duplicates and 4 new missed hashes. Can someone suggest a way to debug this problem somehow? As a lot of time has passed since my previous post, I repost it below. On 5/16/2016 9:27 AM, Alexander Moisseev wrote:> SIS deduplication was broken in 2.2.16 and has been fixed with https://github.com/dovecot/core/commit/3b39022ea0513363241cf852b7d454c841584ea1 > but still sometimes (just several times in a month or so) dovecot creates duplicated attachments. > > As you can see in directory listings below all attachments was created at (about) the same time, but one of them has different inode number. > > > It easy to discover using https://github.com/moisseev/doveadm-tools/blob/master/bin/dsisck > > # dsisck -n > > mail_uid=vmail > mail_attachment_dir=/vmail/attachments > > ==> Checking SIS... > > # ln -f ./30/b3/30b367c584a123eee59478adf3e4f4c9e1226545-c56eae04a67c3157287f01003d96bafd ./30/b3/30b367c584a123eee59478adf3e4f4c9e1226545-7ad0a411d17c31572b7a01003d96bafd > # mkdir -p -m 700 ./64/8f/hashes && ln ./64/8f/648f5cfa27af6d20c8570fdcaeab997663e15105-55430d2cd4432c571cb600003d96bafd ./64/8f/hashes/648f5cfa27af6d20c8570fdcaeab997663e15105 > # ln -f ./6f/3f/6f3fa3e4d374a9c80d07af54960ce0e7adb2e0fe-b1935817ed753557546700003d96bafd ./6f/3f/6f3fa3e4d374a9c80d07af54960ce0e7adb2e0fe-72da1f32e38e3557947b00003d96bafd > # ln -f ./dd/d9/ddd97aa6f624d4f54968d2c4956fc3a9d796b31b-6bf7c122fefb3157ad3500003d96bafd ./dd/d9/ddd97aa6f624d4f54968d2c4956fc3a9d796b31b-90fd1415e7e53157112800003d96bafd > > -------------------------------------------------------------- > ?Unexpected objects found: 0 > > ?Attachments processed: 43500 > ?Different attachments with similar hashes skipped: 0 > > ?Attachments deduplicated: 3 > ?File system blocks freed up: 1800 > ?Attachment deduplication attempts failed: 0 > > ?Hash files created: 1 > ?Hash files creation attempts failed: 0 > > ?Hash files re-linked: 0 > ?Hash files re-link attempts failed: 0 > > ?Orphaned hash files: 0 > ?Invalid hash files (not a regular file): 0 > -------------------------------------------------------------- > > # cd /vmail/attachments > > > # ls -li ./30/b3/ ./30/b3/hashes/ > ./30/b3/: > total 1996 > 7464083 -rw-------? 1 vmail? vmail? 337719 May 10 09:16 30b367c584a123eee59478adf3e4f4c9e1226545-7ad0a411d17c31572b7a01003d96bafd > 7464082 -rw-------? 6 vmail? vmail? 337719 May 10 09:16 30b367c584a123eee59478adf3e4f4c9e1226545-bb6eae04a67c3157287f01003d96bafd > 7464082 -rw-------? 6 vmail? vmail? 337719 May 10 09:16 30b367c584a123eee59478adf3e4f4c9e1226545-bd6eae04a67c3157287f01003d96bafd > 7464082 -rw-------? 6 vmail? vmail? 337719 May 10 09:16 30b367c584a123eee59478adf3e4f4c9e1226545-bf6eae04a67c3157287f01003d96bafd > 7464082 -rw-------? 6 vmail? vmail? 337719 May 10 09:16 30b367c584a123eee59478adf3e4f4c9e1226545-c36eae04a67c3157287f01003d96bafd > 7464082 -rw-------? 6 vmail? vmail? 337719 May 10 09:16 30b367c584a123eee59478adf3e4f4c9e1226545-c56eae04a67c3157287f01003d96bafd > 7464084 drwx------? 2 vmail? vmail???? 512 May 10 09:16 hashes > > ./30/b3/hashes/: > total 332 > 7464082 -rw-------? 6 vmail? vmail? 337719 May 10 09:16 30b367c584a123eee59478adf3e4f4c9e1226545 > > # ls -li ./6f/3f/ ./6f/3f/hashes/ > ./6f/3f/: > total 644 > 15088414 -rw-------? 1 vmail? vmail? 326656 May 13 11:23 6f3fa3e4d374a9c80d07af54960ce0e7adb2e0fe-72da1f32e38e3557947b00003d96bafd > 15088422 -rw-------? 2 vmail? vmail? 326656 May 13 11:23 6f3fa3e4d374a9c80d07af54960ce0e7adb2e0fe-b1935817ed753557546700003d96bafd > 15088448 drwx------? 2 vmail? vmail???? 512 May 13 11:23 hashes > > ./6f/3f/hashes/: > total 320 > 15088422 -rw-------? 2 vmail? vmail? 326656 May 13 11:23 6f3fa3e4d374a9c80d07af54960ce0e7adb2e0fe > > # ls -li ./dd/d9/ ./dd/d9/hashes/ > ./dd/d9/: > total 748 > 80548 -rw-------? 3 vmail? vmail? 250640 May 10 18:19 ddd97aa6f624d4f54968d2c4956fc3a9d796b31b-68f7c122fefb3157ad3500003d96bafd > 80548 -rw-------? 3 vmail? vmail? 250640 May 10 18:19 ddd97aa6f624d4f54968d2c4956fc3a9d796b31b-6bf7c122fefb3157ad3500003d96bafd > 80547 -rw-------? 1 vmail? vmail? 250640 May 10 18:19 ddd97aa6f624d4f54968d2c4956fc3a9d796b31b-90fd1415e7e53157112800003d96bafd > 80549 drwx------? 2 vmail? vmail???? 512 May 10 18:19 hashes > > ./dd/d9/hashes/: > total 248 > 80548 -rw-------? 3 vmail? vmail? 250640 May 10 18:19 ddd97aa6f624d4f54968d2c4956fc3a9d796b31b > > > # doveconf -n > # 2.2.24 (a82c823): /usr/local/etc/dovecot/dovecot.conf > # Pigeonhole version 0.4.14 (099a97c) > # OS: FreeBSD 10.3-RELEASE i386 > auth_default_realm = example.com > auth_mechanisms = digest-md5 cram-md5 plain apop > doveadm_password =? # hidden, use -P to show it > first_valid_gid = 1000 > first_valid_uid = 1000 > lda_mailbox_autosubscribe = yes > listen = * > mail_attachment_dir = /vmail/attachments > mail_gid = vmail > mail_home = /vmail/%d/%n > mail_location = mdbox:~/mdbox > mail_plugins = quota zlib acl > mail_shared_explicit_inbox = yes > mail_uid = vmail > managesieve_notify_capability = mailto > managesieve_sieve_capability = fileinto reject envelope encoded-character vacation subaddress comparator-i;ascii-numeric relational regex imap4flags copy include variables body enotify environment mailbox date index ihave duplicate mime foreverypart extracttext > namespace { > ? list = children > ? location = mdbox:%%h/mdbox:INDEXPVT=~/mdbox/shared/%%u > ? prefix = shared/%%u/ > ? separator = / > ? subscriptions = no > ? type = shared > } > namespace inbox { > ? inbox = yes > ? location > ? mailbox Archives { > ??? special_use = \Archive > ? } > ? mailbox Drafts { > ??? special_use = \Drafts > ? } > ? mailbox Junk { > ??? auto = subscribe > ??? special_use = \Junk > ? } > ? mailbox Sent { > ??? special_use = \Sent > ? } > ? mailbox Trash { > ??? special_use = \Trash > ? } > ? prefix > ? separator = / > } > passdb { > ? args = scheme=plain username_format=%n /usr/local/etc/dovecot/dovecot.auth/%d.passwd > ? driver = passwd-file > } > plugin { > ? acl = vfile > ? acl_shared_dict = file:/vmail/shared-mailboxes.db > ? antispam_backend = mailtrain > ? antispam_mail_notspam = report_ham > ? antispam_mail_sendmail = /usr/local/libexec/dovecot/dovecot-lda > ? antispam_mail_sendmail_args = -d;spam at example.com;-m > ? antispam_mail_spam = report_spam > ? antispam_spam = Junk > ? antispam_trash = Trash;train_ham;train_prob;train_spam > ? quota = dict:User quota::file:%h/dovecot-quota > ? quota_rule = *:storage=2G > ? quota_rule2 = Trash:storage=+10%% > ? quota_rule3 = Spam:storage=+20%% > ? quota_status_nouser = DUNNO > ? quota_status_success = DUNNO > ? sieve_after = /usr/local/etc/dovecot/sieve/sieve.after > ? sieve_before = /usr/local/etc/dovecot/sieve/sieve.before > ? sieve_vacation_min_period = 0 > ? zlib_save = gz > ? zlib_save_level = 3 > } > postmaster_address = postmaster at example.com > protocols = imap lmtp sieve pop3 > quota_full_tempfail = yes > service auth { > ? unix_listener /var/spool/postfix/private/auth { > ??? group = postfix > ??? mode = 0660 > ??? user = postfix > ? } > ? unix_listener auth-userdb { > ??? user = vmail > ? } > } > service config { > ? unix_listener config { > ??? mode = 0600 > ??? user = vmail > ? } > } > service imap-login { > ? inet_listener imaps { > ??? port = 0 > ? } > ? process_limit = 200 > } > service lmtp { > ? unix_listener /var/spool/postfix/private/dovecot-lmtp { > ??? group = postfix > ??? mode = 0660 > ??? user = postfix > ? } > ? user = vmail > } > service pop3-login { > ? inet_listener pop3s { > ??? port = 0 > ? } > } > service quota-status { > ? client_limit = 1 > ? executable = quota-status -p postfix > ? unix_listener /var/spool/postfix/private/quota-status { > ??? user = postfix > ? } > } > ssl_cert = </etc/ssl/certs/mx.example.com.crt > ssl_key = </etc/ssl/private/mx.example.com.key > userdb { > ? args = username_format=%n /usr/local/etc/dovecot/dovecot.auth/%d.passwd > ? driver = passwd-file > } > verbose_proctitle = yes > protocol imap { > ? mail_plugins = quota zlib acl antispam imap_quota imap_zlib imap_acl > } > protocol lmtp { > ? mail_plugins = quota zlib acl sieve > }