Author: fw
Date: 2008-11-20 15:15:24 +0000 (Thu, 20 Nov 2008)
New Revision: 10437
Modified:
bin/update-nvd
Log:
bin/update-nvd: Remove duplicates before loading NVD data
Modified: bin/update-nvd
==================================================================---
bin/update-nvd 2008-11-20 12:07:00 UTC (rev 10436)
+++ bin/update-nvd 2008-11-20 15:15:24 UTC (rev 10437)
@@ -34,6 +34,14 @@
data += nvd.parse(f)
f.close()
+# For some reason, NVD adds duplicates, so we need to get rid of them.
+# Sort afterwords to increase locality in the insert process.
+deduplicate = {}
+for x in data:
+ deduplicate[x[0]] = x
+data = deduplicate.values()
+data.sort()
+
cursor = db.writeTxn()
if incremental:
db.updateNVD(cursor, data)