start work on a dedup feature

--dedup will scan the DB for existing entries with the same
message-id and not insert them again if found. Could be
used for re-importing after the ID generator has changed
or in case of unicode bugs.
diff --git a/tools/import-mbox.py b/tools/import-mbox.py
index 5c62d20..59dbf95 100644
--- a/tools/import-mbox.py
+++ b/tools/import-mbox.py
@@ -74,6 +74,8 @@
 resendTo = None
 timeout = 600
 fromFilter = None
+dedup = False
+dedupped = 0
 
 # Fetch config
 config = configparser.RawConfigParser()
@@ -238,7 +240,8 @@
 
             count = 0
             LEY = EY
-
+            
+            
             for message in messages:
                 # If --filter is set, discard any messages not matching by continuing to next email
                 if fromFilter and 'from' in message and message['from'].find(fromFilter) == -1:
@@ -261,6 +264,31 @@
                     break
 
                 json, contents = foo.compute_updates(list_override, private, message)
+                
+                # If --dedup is active, try to filter out any messages that already exist
+                if json and dedup and message.get('message-id', None):
+                    res = es.search(
+                        index=iname,
+                        doc_type="mbox",
+                        size = 1,
+                        body = {
+                            'query': {
+                                'bool': {
+                                    'must': [
+                                        {
+                                            'term': {
+                                                'message-id': message.get('message-id', None)
+                                            }
+                                        }
+                                    ]
+                                }
+                            }
+                        }
+                    )
+                    if res and len(res['hits']['hits']) > 0:
+                        print("Dedupping %s" % json['message-id'])
+                        dedupped += 1
+                        continue
 
                 if json:
                     json_source = {
@@ -354,6 +382,8 @@
                    help='If no text/plain is found, try to parse HTML using html2text')
 parser.add_argument('--requirelid', dest='requirelid', action='store_true',
                    help='Require a List ID to be present, ignore otherwise')
+parser.add_argument('--dedup', dest='dedup', action='store_true',
+                   help='Try to dedup messages based on ID before importing')
 parser.add_argument('--ignorebody', dest='ibody', type=str, nargs=1,
                    help='Optional email bodies to treat as empty (in conjunction with --html2text)')
 parser.add_argument('--resend', dest='resend', type=str, nargs=1,
@@ -387,6 +417,8 @@
     quickmode = args.quick
 if args.private:
     private = args.private
+if args.dedup:
+    dedup = args.dedup
 if args.ext:
     extension = args.ext[0]
 if args.html2text:
@@ -593,3 +625,5 @@
     t.join()
 
 print("All done! %u records inserted/updated after %u seconds. %u records were bad and ignored" % (y, int(time.time() - start), baddies))
+if dedupped > 0:
+    print("%u records were not inserted due to deduplication" % dedupped)