tools/setup.py - incubator-ponymail - Git at Google

 #!/usr/bin/env python3
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 import argparse
 import importlib.util
 import logging
 import os
 import os.path
 import shutil
 import sys

 if sys.version_info <= (3, 3):
     print("This script requires Python 3.4 or higher")
     sys.exit(-1)

 # Check for all required python packages
 wanted_pkgs = [
     'elasticsearch',# used by setup.py, archiver.py and elastic.py
     'formatflowed', # used by archiver.py
     'netaddr',      # used by archiver.py
     'certifi',      # used by archiver.py and elastic.py
     ]

 missing_pkgs = list(wanted_pkgs) # copy to avoid corruption
 for pkg in wanted_pkgs:
     if importlib.util.find_spec(pkg):
         missing_pkgs.remove(pkg)

 if missing_pkgs:
     print("It looks like you need to install some python modules first")
     print("The following packages are required: ")
     for pkg in missing_pkgs:
         print(" - %s" % pkg)
     print("You may use your package manager, or run the following command:")
     print("pip3 install %s" % " ".join(missing_pkgs))
     sys.exit(-1)


 # at this point we can assume elasticsearch is present
 from elasticsearch import Elasticsearch
 from elasticsearch import ElasticsearchException
 from elasticsearch import ConnectionError as ES_ConnectionError
 from elasticsearch import VERSION as ES_VERSION
 ES_MAJOR = ES_VERSION[0]

 # CLI arg parsing
 parser = argparse.ArgumentParser(description='Command line options.')

 parser.add_argument('--defaults', dest='defaults', action='store_true',
                    help='Use default settings')
 parser.add_argument('--dbprefix', dest='dbprefix')
 parser.add_argument('--clobber', dest='clobber', action='store_true',
                    help='Allow overwrite of ponymail.cfg & ../site/api/lib/config.lua (default: create *.tmp if either exists)')
 parser.add_argument('--dbhost', dest='dbhost', type=str,
                    help='ES backend hostname')
 parser.add_argument('--dbport', dest='dbport', type=str,
                    help='DB port')
 parser.add_argument('--dbname', dest='dbname', type=str,
                    help='ES DB name')
 parser.add_argument('--dbshards', dest='dbshards', type=int,
                    help='DB Shard Count')
 parser.add_argument('--dbreplicas', dest='dbreplicas', type=int,
                   help='DB Replica Count')
 parser.add_argument('--mailserver', dest='mailserver', type=str,
                    help='Host name of outgoing mail server')
 parser.add_argument('--mldom', dest='mldom', type=str,
                    help='Domains to accept mail for via UI')
 parser.add_argument('--wordcloud', dest='wc', action='store_true',
                    help='Enable word cloud')
 parser.add_argument('--skiponexist', dest='soe', action='store_true',
                    help='Skip setup if ES index exists')
 parser.add_argument('--noindex', dest='noi', action='store_true',
                    help="Don't make an ES index, assume it exists")
 parser.add_argument('--nocloud', dest='nwc', action='store_true',
                    help='Do not enable word cloud')
 parser.add_argument('--generator', dest='generator', type=str,
                    help='Document ID Generator to use (legacy, medium, cluster, full)')
 args = parser.parse_args()

 print("Welcome to the Pony Mail setup script!")
 print("Let's start by determining some settings...")
 print("")


 hostname = ""
 port = 0
 dbname = ""
 mlserver = ""
 mldom = ""
 wc = ""
 genname = ""
 wce = False
 shards = 0
 replicas = -1
 urlPrefix = None

 # If called with --defaults (like from Docker), use default values
 if args.defaults:
     hostname = "localhost"
     port = 9200
     dbname = "ponymail"
     mlserver = "localhost"
     mldom = "example.org"
     wc = "Y"
     wce = True
     shards = 1
     replicas = 0
     genname = "cluster"
     urlPrefix = ''

 # Accept CLI args, copy them
 if args.dbprefix:
     urlPrefix = args.dbprefix
 if args.dbhost:
     hostname = args.dbhost
 if args.dbport:
     port = int(args.dbport)
 if args.dbname:
     dbname = args.dbname
 if args.mailserver:
     mlserver = args.mailserver
 if args.mldom:
     mldom = args.mldom
 if args.wc:
     wc = args.wc
 if args.nwc:
     wc = False
 if args.dbshards:
     shards = args.dbshards
 if args.dbreplicas:
     replicas = args.dbreplicas
 if args.generator:
     genname = args.generator

 while hostname == "":
     hostname = input("What is the hostname of the ElasticSearch server? (e.g. localhost): ")

 while urlPrefix == None:
     urlPrefix = input("Database URL prefix if any (hit enter if none): ")

 while port < 1:
     try:
         port = int(input("What port is ElasticSearch listening on? (normally 9200): "))
     except ValueError:
         pass

 while dbname == "":
     dbname = input("What would you like to call the mail index (e.g. ponymail): ")

 while mlserver == "":
     mlserver = input("What is the hostname of the outgoing mailserver? (e.g. mail.foo.org): ")

 while mldom == "":
     mldom = input("Which domains would you accept mail to from web-replies? (e.g. foo.org or *): ")

 while wc == "":
     wc = input("Would you like to enable the word cloud feature? (Y/N): ")
     if wc.lower() == "y":
         wce = True

 while genname == "":
     gens = ['legacy', 'medium', 'cluster', 'full']
     print ("Please select a document ID generator:")
     print("1  LEGACY: The original document generator for v/0.1-0.8 (no longer recommended)")
     print("2  MEDIUM: The medium comprehensive generator for v/0.9 (no longer recommended)")
     print("3  REDUNDANT: Near-full message digest, discard MTA trail (recommended for clustered setups)")
     print("4  FULL: Full message digest with MTA trail (recommended for single-node setups).")
     try:
         gno = int(input("Please select a generator [1-4]: "))
         if gno <= len(gens) and gens[gno-1]:
             genname = gens[gno-1]
     except ValueError:
         pass

 while shards < 1:
     try:
         shards = int(input("How many shards for the ElasticSearch index? "))
     except ValueError:
         pass

 while replicas < 0:
     try:
         replicas = int(input("How many replicas for each shard? "))
     except ValueError:
         pass

 print("Okay, I got all I need, setting up Pony Mail...")

 def createIndex():
     # Check if index already exists
     if es.indices.exists(dbname):
         if args.soe:
             print("ElasticSearch index '%s' already exists and SOE set, exiting quietly" % dbname)
             sys.exit(0)
         else:
             print("Error: ElasticSearch index '%s' already exists!" % dbname)
             sys.exit(-1)

     print("Creating index " + dbname)

     settings = {
         "number_of_shards" :   shards,
         "number_of_replicas" : replicas
     }

     mappings = {
         "mbox" : {
           "properties" : {
             "@import_timestamp" : {
               "type" : "date",
               "format" : "yyyy/MM/dd HH:mm:ss||yyyy/MM/dd"
             },
             "attachments" : {
               "properties" : {
                 "content_type" : {
                   "type" : "string",
                   "index" : "not_analyzed"
                 },
                 "filename" : {
                   "type" : "string",
                   "index" : "not_analyzed"
                 },
                 "hash" : {
                   "type" : "string",
                   "index" : "not_analyzed"
                 },
                 "size" : {
                   "type" : "long"
                 }
               }
             },
             "body" : {
               "type" : "string"
             },
             "cc": {
               "type": "string"
             },
             "date" : {
               "type" : "date",
               "store" : True,
               "format" : "yyyy/MM/dd HH:mm:ss",
               "index" : "not_analyzed"
             },
             "epoch" : { # number of seconds since the epoch
               "type" : "long",
               "index" : "not_analyzed"
             },
             "from" : {
               "type" : "string"
             },
             "from_raw" : {
               "type" : "string",
               "index" : "not_analyzed"
             },
             "in-reply-to" : {
               "type" : "string",
               "index" : "not_analyzed"
             },
             "list" : {
               "type" : "string"
             },
             "list_raw" : {
               "type" : "string",
               "index" : "not_analyzed"
             },
             "message-id" : {
               "type" : "string",
               "index" : "not_analyzed"
             },
             "mid" : {
               "type" : "string"
             },
             "private" : {
               "type" : "boolean"
             },
             "references" : {
               "type" : "string"
             },
             "subject" : {
               "type" : "string",
               "fielddata": True # dropped later if DB_MAJOR==2
             },
             "to" : {
               "type" : "string"
             }
           }
         },
         "attachment" : {
           "properties" : {
             "source" : {
               "type" : "binary"
             }
           }
         },
         "mbox_source" : {
           "_all": {
             "enabled": False # this doc type is not searchable
           },
           "properties" : {
             "source" : {
               "type" : "binary"
             },
             "message-id" : {
               "type" : "string",
               "index" : "not_analyzed"
             },
             "mid" : {
               "type" : "string"
             }
           }
         },
         "mailinglists" : {
           "_all": {
             "enabled": False # this doc type is not searchable
           },
           "properties" : {
             "description" : {
               "type" : "string",
               "index" : "not_analyzed"
             },
             "list" : {
               "type" : "string",
 #               "index" : "not_analyzed"
             },
             "name" : {
               "type" : "string",
               "index" : "not_analyzed"
             }
           }
         },
         "account" : {
           "_all": {
             "enabled": False # this doc type is not searchable
           },
           "properties" : {
             "cid" : {
               "type" : "string",
               "index" : "not_analyzed"
             },
             "credentials" : {
               "properties" : {
                 "altemail" : {
                   "type" : "object"
                 },
                 "email" : {
                   "type" : "string",
                   "index" : "not_analyzed"
                 },
                 "fullname" : {
                   "type" : "string",
                   "index" : "not_analyzed"
                 },
                 "uid" : {
                   "type" : "string",
                   "index" : "not_analyzed"
                 }
               }
             },
             "internal" : {
               "properties" : {
                 "cookie" : {
                   "type" : "string",
                   "index" : "not_analyzed"
                 },
                 "ip" : {
                   "type" : "string",
                   "index" : "not_analyzed"
                 },
                 "oauth_used" : {
                   "type" : "string",
                   "index" : "not_analyzed"
                 }
               }
             },
             "request_id" : {
               "type" : "string",
               "index" : "not_analyzed"
             }
           }
         },
         "notifications" : {
           "_all": {
             "enabled": False # this doc type is not searchable
           },
           "properties" : {
             "date" : {
               "type" : "date",
               "store" : True,
               "format" : "yyyy/MM/dd HH:mm:ss"
             },
             "epoch" : {
               "type" : "long"
             },
             "from" : {
               "type" : "string",
 #               "index" : "not_analyzed"
             },
             "in-reply-to" : {
               "type" : "string",
                "index" : "not_analyzed"
             },
             "list" : {
               "type" : "string",
 #               "index" : "not_analyzed"
             },
             "message-id" : {
               "type" : "string",
               "index" : "not_analyzed"
             },
             "mid" : {
               "type" : "string",
 #               "index" : "not_analyzed"
             },
             "private" : {
               "type" : "boolean"
             },
             "recipient" : {
               "type" : "string",
               "index" : "not_analyzed"
             },
             "seen" : {
               "type" : "long"
             },
             "subject" : {
               "type" : "string",
               "fielddata": True # dropped later if DB_MAJOR==2
 #               "index" : "not_analyzed"
             },
             "to" : {
               "type" : "string",
 #               "index" : "not_analyzed"
             },
             "type" : {
               "type" : "string",
               "index" : "not_analyzed"
             }
           }
         }
     }

     if DB_MAJOR == 2: # ES 2 handles fielddata differently
         del mappings['mbox']['properties']['subject']['fielddata']
         del mappings['notifications']['properties']['subject']['fielddata']

     res = es.indices.create(index = dbname, body = {
                 "mappings" : mappings,
                 "settings": settings
             }
         )

     print("Index created! %s " % res)

 # we need to connect to database to determine the engine version
 es = Elasticsearch([
     {
         'host': hostname,
         'port': port,
         'use_ssl': False,
         'url_prefix': urlPrefix
     }],
     max_retries=5,
     retry_on_timeout=True
     )

 # elasticsearch logs lots of warnings on retries/connection failure
 logging.getLogger("elasticsearch").setLevel(logging.ERROR)

 try:
     DB_VERSION=es.info()['version']['number']
 except ES_ConnectionError:
     print("WARNING: Connection error: could not determine the engine version.")
     DB_VERSION='0.0.0'

 DB_MAJOR=int(DB_VERSION.split('.')[0])
 print("Versions: library %d (%s), engine %d (%s)" % (ES_MAJOR, '.'.join(map(str,ES_VERSION)) , DB_MAJOR, DB_VERSION))

 if not DB_MAJOR == ES_MAJOR:
     print("WARNING: library version does not agree with engine version!")

 if DB_MAJOR == 0: # not known
     if args.noi:
         # allow setup to be used without engine running
         print("Could not determine the engine version. Assume it is the same as the library version.")
         DB_MAJOR = ES_MAJOR
     else:
         # if we cannot connect to get the version, we cannot create the index later
         print("Could not connect to the engine. Fatal.")
         sys.exit(1)

 if not args.noi:
     try:
         createIndex()
     except ElasticsearchException as e:
         print("Index creation failed: %s" % e)
         sys.exit(1)

 ponymail_cfg = 'ponymail.cfg'
 if not args.clobber and os.path.exists(ponymail_cfg):
     print("%s exists and clobber is not set" % ponymail_cfg)
     ponymail_cfg = 'ponymail.cfg.tmp'

 print("Writing importer config (%s)" % ponymail_cfg)

 with open(ponymail_cfg, "w") as f:
     f.write("""
 ###############################################################
 # Pony Mail Configuration file

 # Main ES configuration
 [elasticsearch]
 hostname:               %s
 dbname:                 %s
 port:                   %u
 ssl:                    false

 #uri:                   url_prefix

 #user:                  username
 #password:              password

 #%s

 #backup:                database name

 [archiver]
 generator:              %s

 [debug]
 #cropout:               string to crop from list-id

 ###############################################################
             """ % (hostname, dbname, port,
                    'wait:                  active shard count' if DB_MAJOR == 5 else 'write:                 consistency level (default quorum)', genname))

 config_path = "../site/api/lib"
 config_file = "config.lua"
 if not args.clobber and os.path.exists(os.path.join(config_path,config_file)):
     print("%s exists and clobber is not set" % config_file)
     config_file = "config.lua.tmp"
 print("mod_lua configuration (%s)" % config_file)
 with open(os.path.join(config_path,config_file), "w") as f:
     f.write("""
 local config = {
     es_url = "http://%s:%u/%s/",
     mailserver = "%s",
 --  mailport = 1025, -- override the default port (25)
     accepted_domains = "%s",
     wordcloud = %s,
     email_footer = nil, -- see the docs for how to set this up.
     full_headers = false,
     maxResults = 5000, -- max emails to return in one go. Might need to be bumped for large lists
 --  stats_maxBody = 200, -- max size of body snippet returned by stats.lua
 --  stats_wordExclude = ".|..|...", -- patterns to exclude from word cloud generated by stats.lua
     admin_oauth = {}, -- list of domains that may do administrative oauth (private list access)
                      -- add 'www.googleapis.com' to the list for google oauth to decide, for instance.
     oauth_fields = { -- used for specifying individual oauth handling parameters.
 -- for example:
 --        internal = {
 --            email = 'CAS-EMAIL',
 --            name = 'CAS-NAME',
 --            uid = 'REMOTE-USER',
 --            env = 'subprocess' -- use environment vars instead of request headers
 --        }
     },
 --  allow_insecure_cookie = true, -- override the default (false) - only use for test installations
 --  no_association = {}, -- domains that are not allowed for email association
 --  listsDisplay = 'regex', -- if defined, hide list names that don't match the regex
 --  debug = false, -- whether to return debug information
     antispam = true  -- Whether or not to add anti-spam measures aimed at anonymous users.
 }
 return config
             """ % (hostname, port, dbname, mlserver, mldom, "true" if wce else "false"))

 print("Copying sample JS config to config.js (if needed)...")
 if not os.path.exists("../site/js/config.js") and os.path.exists("../site/js/config.js.sample"):
     shutil.copy("../site/js/config.js.sample", "../site/js/config.js")


 print("All done, Pony Mail should...work now :)")
 print("If you are using an external mail inbound server, \nmake sure to copy archiver.py and ponymail.cfg to it")
	#!/usr/bin/env python3
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import argparse
	import importlib.util
	import logging
	import os
	import os.path
	import shutil
	import sys

	if sys.version_info <= (3, 3):
	print("This script requires Python 3.4 or higher")
	sys.exit(-1)

	# Check for all required python packages
	wanted_pkgs = [
	'elasticsearch',# used by setup.py, archiver.py and elastic.py
	'formatflowed', # used by archiver.py
	'netaddr', # used by archiver.py
	'certifi', # used by archiver.py and elastic.py
	]

	missing_pkgs = list(wanted_pkgs) # copy to avoid corruption
	for pkg in wanted_pkgs:
	if importlib.util.find_spec(pkg):
	missing_pkgs.remove(pkg)

	if missing_pkgs:
	print("It looks like you need to install some python modules first")
	print("The following packages are required: ")
	for pkg in missing_pkgs:
	print(" - %s" % pkg)
	print("You may use your package manager, or run the following command:")
	print("pip3 install %s" % " ".join(missing_pkgs))
	sys.exit(-1)


	# at this point we can assume elasticsearch is present
	from elasticsearch import Elasticsearch
	from elasticsearch import ElasticsearchException
	from elasticsearch import ConnectionError as ES_ConnectionError
	from elasticsearch import VERSION as ES_VERSION
	ES_MAJOR = ES_VERSION[0]

	# CLI arg parsing
	parser = argparse.ArgumentParser(description='Command line options.')

	parser.add_argument('--defaults', dest='defaults', action='store_true',
	help='Use default settings')
	parser.add_argument('--dbprefix', dest='dbprefix')
	parser.add_argument('--clobber', dest='clobber', action='store_true',
	help='Allow overwrite of ponymail.cfg & ../site/api/lib/config.lua (default: create *.tmp if either exists)')
	parser.add_argument('--dbhost', dest='dbhost', type=str,
	help='ES backend hostname')
	parser.add_argument('--dbport', dest='dbport', type=str,
	help='DB port')
	parser.add_argument('--dbname', dest='dbname', type=str,
	help='ES DB name')
	parser.add_argument('--dbshards', dest='dbshards', type=int,
	help='DB Shard Count')
	parser.add_argument('--dbreplicas', dest='dbreplicas', type=int,
	help='DB Replica Count')
	parser.add_argument('--mailserver', dest='mailserver', type=str,
	help='Host name of outgoing mail server')
	parser.add_argument('--mldom', dest='mldom', type=str,
	help='Domains to accept mail for via UI')
	parser.add_argument('--wordcloud', dest='wc', action='store_true',
	help='Enable word cloud')
	parser.add_argument('--skiponexist', dest='soe', action='store_true',
	help='Skip setup if ES index exists')
	parser.add_argument('--noindex', dest='noi', action='store_true',
	help="Don't make an ES index, assume it exists")
	parser.add_argument('--nocloud', dest='nwc', action='store_true',
	help='Do not enable word cloud')
	parser.add_argument('--generator', dest='generator', type=str,
	help='Document ID Generator to use (legacy, medium, cluster, full)')
	args = parser.parse_args()

	print("Welcome to the Pony Mail setup script!")
	print("Let's start by determining some settings...")
	print("")


	hostname = ""
	port = 0
	dbname = ""
	mlserver = ""
	mldom = ""
	wc = ""
	genname = ""
	wce = False
	shards = 0
	replicas = -1
	urlPrefix = None

	# If called with --defaults (like from Docker), use default values
	if args.defaults:
	hostname = "localhost"
	port = 9200
	dbname = "ponymail"
	mlserver = "localhost"
	mldom = "example.org"
	wc = "Y"
	wce = True
	shards = 1
	replicas = 0
	genname = "cluster"
	urlPrefix = ''

	# Accept CLI args, copy them
	if args.dbprefix:
	urlPrefix = args.dbprefix
	if args.dbhost:
	hostname = args.dbhost
	if args.dbport:
	port = int(args.dbport)
	if args.dbname:
	dbname = args.dbname
	if args.mailserver:
	mlserver = args.mailserver
	if args.mldom:
	mldom = args.mldom
	if args.wc:
	wc = args.wc
	if args.nwc:
	wc = False
	if args.dbshards:
	shards = args.dbshards
	if args.dbreplicas:
	replicas = args.dbreplicas
	if args.generator:
	genname = args.generator

	while hostname == "":
	hostname = input("What is the hostname of the ElasticSearch server? (e.g. localhost): ")

	while urlPrefix == None:
	urlPrefix = input("Database URL prefix if any (hit enter if none): ")

	while port < 1:
	try:
	port = int(input("What port is ElasticSearch listening on? (normally 9200): "))
	except ValueError:
	pass

	while dbname == "":
	dbname = input("What would you like to call the mail index (e.g. ponymail): ")

	while mlserver == "":
	mlserver = input("What is the hostname of the outgoing mailserver? (e.g. mail.foo.org): ")

	while mldom == "":
	mldom = input("Which domains would you accept mail to from web-replies? (e.g. foo.org or *): ")

	while wc == "":
	wc = input("Would you like to enable the word cloud feature? (Y/N): ")
	if wc.lower() == "y":
	wce = True

	while genname == "":
	gens = ['legacy', 'medium', 'cluster', 'full']
	print ("Please select a document ID generator:")
	print("1 LEGACY: The original document generator for v/0.1-0.8 (no longer recommended)")
	print("2 MEDIUM: The medium comprehensive generator for v/0.9 (no longer recommended)")
	print("3 REDUNDANT: Near-full message digest, discard MTA trail (recommended for clustered setups)")
	print("4 FULL: Full message digest with MTA trail (recommended for single-node setups).")
	try:
	gno = int(input("Please select a generator [1-4]: "))
	if gno <= len(gens) and gens[gno-1]:
	genname = gens[gno-1]
	except ValueError:
	pass

	while shards < 1:
	try:
	shards = int(input("How many shards for the ElasticSearch index? "))
	except ValueError:
	pass

	while replicas < 0:
	try:
	replicas = int(input("How many replicas for each shard? "))
	except ValueError:
	pass

	print("Okay, I got all I need, setting up Pony Mail...")

	def createIndex():
	# Check if index already exists
	if es.indices.exists(dbname):
	if args.soe:
	print("ElasticSearch index '%s' already exists and SOE set, exiting quietly" % dbname)
	sys.exit(0)
	else:
	print("Error: ElasticSearch index '%s' already exists!" % dbname)
	sys.exit(-1)

	print("Creating index " + dbname)

	settings = {
	"number_of_shards" : shards,
	"number_of_replicas" : replicas
	}

	mappings = {
	"mbox" : {
	"properties" : {
	"@import_timestamp" : {
	"type" : "date",
	"format" : "yyyy/MM/dd HH:mm:ss\|\|yyyy/MM/dd"
	},
	"attachments" : {
	"properties" : {
	"content_type" : {
	"type" : "string",
	"index" : "not_analyzed"
	},
	"filename" : {
	"type" : "string",
	"index" : "not_analyzed"
	},
	"hash" : {
	"type" : "string",
	"index" : "not_analyzed"
	},
	"size" : {
	"type" : "long"
	}
	}
	},
	"body" : {
	"type" : "string"
	},
	"cc": {
	"type": "string"
	},
	"date" : {
	"type" : "date",
	"store" : True,
	"format" : "yyyy/MM/dd HH:mm:ss",
	"index" : "not_analyzed"
	},
	"epoch" : { # number of seconds since the epoch
	"type" : "long",
	"index" : "not_analyzed"
	},
	"from" : {
	"type" : "string"
	},
	"from_raw" : {
	"type" : "string",
	"index" : "not_analyzed"
	},
	"in-reply-to" : {
	"type" : "string",
	"index" : "not_analyzed"
	},
	"list" : {
	"type" : "string"
	},
	"list_raw" : {
	"type" : "string",
	"index" : "not_analyzed"
	},
	"message-id" : {
	"type" : "string",
	"index" : "not_analyzed"
	},
	"mid" : {
	"type" : "string"
	},
	"private" : {
	"type" : "boolean"
	},
	"references" : {
	"type" : "string"
	},
	"subject" : {
	"type" : "string",
	"fielddata": True # dropped later if DB_MAJOR==2
	},
	"to" : {
	"type" : "string"
	}
	}
	},
	"attachment" : {
	"properties" : {
	"source" : {
	"type" : "binary"
	}
	}
	},
	"mbox_source" : {
	"_all": {
	"enabled": False # this doc type is not searchable
	},
	"properties" : {
	"source" : {
	"type" : "binary"
	},
	"message-id" : {
	"type" : "string",
	"index" : "not_analyzed"
	},
	"mid" : {
	"type" : "string"
	}
	}
	},
	"mailinglists" : {
	"_all": {
	"enabled": False # this doc type is not searchable
	},
	"properties" : {
	"description" : {
	"type" : "string",
	"index" : "not_analyzed"
	},
	"list" : {
	"type" : "string",
	# "index" : "not_analyzed"
	},
	"name" : {
	"type" : "string",
	"index" : "not_analyzed"
	}
	}
	},
	"account" : {
	"_all": {
	"enabled": False # this doc type is not searchable
	},
	"properties" : {
	"cid" : {
	"type" : "string",
	"index" : "not_analyzed"
	},
	"credentials" : {
	"properties" : {
	"altemail" : {
	"type" : "object"
	},
	"email" : {
	"type" : "string",
	"index" : "not_analyzed"
	},
	"fullname" : {
	"type" : "string",
	"index" : "not_analyzed"
	},
	"uid" : {
	"type" : "string",
	"index" : "not_analyzed"
	}
	}
	},
	"internal" : {
	"properties" : {
	"cookie" : {
	"type" : "string",
	"index" : "not_analyzed"
	},
	"ip" : {
	"type" : "string",
	"index" : "not_analyzed"
	},
	"oauth_used" : {
	"type" : "string",
	"index" : "not_analyzed"
	}
	}
	},
	"request_id" : {
	"type" : "string",
	"index" : "not_analyzed"
	}
	}
	},
	"notifications" : {
	"_all": {
	"enabled": False # this doc type is not searchable
	},
	"properties" : {
	"date" : {
	"type" : "date",
	"store" : True,
	"format" : "yyyy/MM/dd HH:mm:ss"
	},
	"epoch" : {
	"type" : "long"
	},
	"from" : {
	"type" : "string",
	# "index" : "not_analyzed"
	},
	"in-reply-to" : {
	"type" : "string",
	"index" : "not_analyzed"
	},
	"list" : {
	"type" : "string",
	# "index" : "not_analyzed"
	},
	"message-id" : {
	"type" : "string",
	"index" : "not_analyzed"
	},
	"mid" : {
	"type" : "string",
	# "index" : "not_analyzed"
	},
	"private" : {
	"type" : "boolean"
	},
	"recipient" : {
	"type" : "string",
	"index" : "not_analyzed"
	},
	"seen" : {
	"type" : "long"
	},
	"subject" : {
	"type" : "string",
	"fielddata": True # dropped later if DB_MAJOR==2
	# "index" : "not_analyzed"
	},
	"to" : {
	"type" : "string",
	# "index" : "not_analyzed"
	},
	"type" : {
	"type" : "string",
	"index" : "not_analyzed"
	}
	}
	}
	}

	if DB_MAJOR == 2: # ES 2 handles fielddata differently
	del mappings['mbox']['properties']['subject']['fielddata']
	del mappings['notifications']['properties']['subject']['fielddata']

	res = es.indices.create(index = dbname, body = {
	"mappings" : mappings,
	"settings": settings
	}
	)

	print("Index created! %s " % res)

	# we need to connect to database to determine the engine version
	es = Elasticsearch([
	{
	'host': hostname,
	'port': port,
	'use_ssl': False,
	'url_prefix': urlPrefix
	}],
	max_retries=5,
	retry_on_timeout=True
	)

	# elasticsearch logs lots of warnings on retries/connection failure
	logging.getLogger("elasticsearch").setLevel(logging.ERROR)

	try:
	DB_VERSION=es.info()['version']['number']
	except ES_ConnectionError:
	print("WARNING: Connection error: could not determine the engine version.")
	DB_VERSION='0.0.0'

	DB_MAJOR=int(DB_VERSION.split('.')[0])
	print("Versions: library %d (%s), engine %d (%s)" % (ES_MAJOR, '.'.join(map(str,ES_VERSION)) , DB_MAJOR, DB_VERSION))

	if not DB_MAJOR == ES_MAJOR:
	print("WARNING: library version does not agree with engine version!")

	if DB_MAJOR == 0: # not known
	if args.noi:
	# allow setup to be used without engine running
	print("Could not determine the engine version. Assume it is the same as the library version.")
	DB_MAJOR = ES_MAJOR
	else:
	# if we cannot connect to get the version, we cannot create the index later
	print("Could not connect to the engine. Fatal.")
	sys.exit(1)

	if not args.noi:
	try:
	createIndex()
	except ElasticsearchException as e:
	print("Index creation failed: %s" % e)
	sys.exit(1)

	ponymail_cfg = 'ponymail.cfg'
	if not args.clobber and os.path.exists(ponymail_cfg):
	print("%s exists and clobber is not set" % ponymail_cfg)
	ponymail_cfg = 'ponymail.cfg.tmp'

	print("Writing importer config (%s)" % ponymail_cfg)

	with open(ponymail_cfg, "w") as f:
	f.write("""
	###############################################################
	# Pony Mail Configuration file

	# Main ES configuration
	[elasticsearch]
	hostname: %s
	dbname: %s
	port: %u
	ssl: false

	#uri: url_prefix

	#user: username
	#password: password

	#%s

	#backup: database name

	[archiver]
	generator: %s

	[debug]
	#cropout: string to crop from list-id

	###############################################################
	""" % (hostname, dbname, port,
	'wait: active shard count' if DB_MAJOR == 5 else 'write: consistency level (default quorum)', genname))

	config_path = "../site/api/lib"
	config_file = "config.lua"
	if not args.clobber and os.path.exists(os.path.join(config_path,config_file)):
	print("%s exists and clobber is not set" % config_file)
	config_file = "config.lua.tmp"
	print("mod_lua configuration (%s)" % config_file)
	with open(os.path.join(config_path,config_file), "w") as f:
	f.write("""
	local config = {
	es_url = "http://%s:%u/%s/",
	mailserver = "%s",
	-- mailport = 1025, -- override the default port (25)
	accepted_domains = "%s",
	wordcloud = %s,
	email_footer = nil, -- see the docs for how to set this up.
	full_headers = false,
	maxResults = 5000, -- max emails to return in one go. Might need to be bumped for large lists
	-- stats_maxBody = 200, -- max size of body snippet returned by stats.lua
	-- stats_wordExclude = ".\|..\|...", -- patterns to exclude from word cloud generated by stats.lua
	admin_oauth = {}, -- list of domains that may do administrative oauth (private list access)
	-- add 'www.googleapis.com' to the list for google oauth to decide, for instance.
	oauth_fields = { -- used for specifying individual oauth handling parameters.
	-- for example:
	-- internal = {
	-- email = 'CAS-EMAIL',
	-- name = 'CAS-NAME',
	-- uid = 'REMOTE-USER',
	-- env = 'subprocess' -- use environment vars instead of request headers
	-- }
	},
	-- allow_insecure_cookie = true, -- override the default (false) - only use for test installations
	-- no_association = {}, -- domains that are not allowed for email association
	-- listsDisplay = 'regex', -- if defined, hide list names that don't match the regex
	-- debug = false, -- whether to return debug information
	antispam = true -- Whether or not to add anti-spam measures aimed at anonymous users.
	}
	return config
	""" % (hostname, port, dbname, mlserver, mldom, "true" if wce else "false"))

	print("Copying sample JS config to config.js (if needed)...")
	if not os.path.exists("../site/js/config.js") and os.path.exists("../site/js/config.js.sample"):
	shutil.copy("../site/js/config.js.sample", "../site/js/config.js")


	print("All done, Pony Mail should...work now :)")
	print("If you are using an external mail inbound server, \nmake sure to copy archiver.py and ponymail.cfg to it")