blob: 94b9e231af99ae4ce80537d4c2ad41a301f8135f [file] [log] [blame]
#!/usr/bin/env python3
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import importlib.util
import logging
import os
import os.path
import shutil
import sys
if sys.version_info <= (3, 3):
print("This script requires Python 3.4 or higher")
sys.exit(-1)
# Check for all required python packages
wanted_pkgs = [
'elasticsearch',# used by setup.py, archiver.py and elastic.py
'formatflowed', # used by archiver.py
'netaddr', # used by archiver.py
'certifi', # used by archiver.py and elastic.py
]
missing_pkgs = list(wanted_pkgs) # copy to avoid corruption
for pkg in wanted_pkgs:
if importlib.util.find_spec(pkg):
missing_pkgs.remove(pkg)
if missing_pkgs:
print("It looks like you need to install some python modules first")
print("The following packages are required: ")
for pkg in missing_pkgs:
print(" - %s" % pkg)
print("You may use your package manager, or run the following command:")
print("pip3 install %s" % " ".join(missing_pkgs))
sys.exit(-1)
# at this point we can assume elasticsearch is present
from elasticsearch import Elasticsearch
from elasticsearch import ElasticsearchException
from elasticsearch import ConnectionError as ES_ConnectionError
from elasticsearch import VERSION as ES_VERSION
ES_MAJOR = ES_VERSION[0]
# CLI arg parsing
parser = argparse.ArgumentParser(description='Command line options.')
parser.add_argument('--defaults', dest='defaults', action='store_true',
help='Use default settings')
parser.add_argument('--dbprefix', dest='dbprefix')
parser.add_argument('--clobber', dest='clobber', action='store_true',
help='Allow overwrite of ponymail.cfg & ../site/api/lib/config.lua (default: create *.tmp if either exists)')
parser.add_argument('--dbhost', dest='dbhost', type=str,
help='ES backend hostname')
parser.add_argument('--dbport', dest='dbport', type=str,
help='DB port')
parser.add_argument('--dbname', dest='dbname', type=str,
help='ES DB name')
parser.add_argument('--dbshards', dest='dbshards', type=int,
help='DB Shard Count')
parser.add_argument('--dbreplicas', dest='dbreplicas', type=int,
help='DB Replica Count')
parser.add_argument('--mailserver', dest='mailserver', type=str,
help='Host name of outgoing mail server')
parser.add_argument('--mldom', dest='mldom', type=str,
help='Domains to accept mail for via UI')
parser.add_argument('--wordcloud', dest='wc', action='store_true',
help='Enable word cloud')
parser.add_argument('--skiponexist', dest='soe', action='store_true',
help='Skip setup if ES index exists')
parser.add_argument('--noindex', dest='noi', action='store_true',
help="Don't make an ES index, assume it exists")
parser.add_argument('--nocloud', dest='nwc', action='store_true',
help='Do not enable word cloud')
parser.add_argument('--generator', dest='generator', type=str,
help='Document ID Generator to use (legacy, medium, cluster, full)')
args = parser.parse_args()
print("Welcome to the Pony Mail setup script!")
print("Let's start by determining some settings...")
print("")
hostname = ""
port = 0
dbname = ""
mlserver = ""
mldom = ""
wc = ""
genname = ""
wce = False
shards = 0
replicas = -1
urlPrefix = None
# If called with --defaults (like from Docker), use default values
if args.defaults:
hostname = "localhost"
port = 9200
dbname = "ponymail"
mlserver = "localhost"
mldom = "example.org"
wc = "Y"
wce = True
shards = 1
replicas = 0
genname = "cluster"
urlPrefix = ''
# Accept CLI args, copy them
if args.dbprefix:
urlPrefix = args.dbprefix
if args.dbhost:
hostname = args.dbhost
if args.dbport:
port = int(args.dbport)
if args.dbname:
dbname = args.dbname
if args.mailserver:
mlserver = args.mailserver
if args.mldom:
mldom = args.mldom
if args.wc:
wc = args.wc
if args.nwc:
wc = False
if args.dbshards:
shards = args.dbshards
if args.dbreplicas:
replicas = args.dbreplicas
if args.generator:
genname = args.generator
while hostname == "":
hostname = input("What is the hostname of the ElasticSearch server? (e.g. localhost): ")
while urlPrefix == None:
urlPrefix = input("Database URL prefix if any (hit enter if none): ")
while port < 1:
try:
port = int(input("What port is ElasticSearch listening on? (normally 9200): "))
except ValueError:
pass
while dbname == "":
dbname = input("What would you like to call the mail index (e.g. ponymail): ")
while mlserver == "":
mlserver = input("What is the hostname of the outgoing mailserver? (e.g. mail.foo.org): ")
while mldom == "":
mldom = input("Which domains would you accept mail to from web-replies? (e.g. foo.org or *): ")
while wc == "":
wc = input("Would you like to enable the word cloud feature? (Y/N): ")
if wc.lower() == "y":
wce = True
while genname == "":
gens = ['legacy', 'medium', 'cluster', 'full']
print ("Please select a document ID generator:")
print("1 LEGACY: The original document generator for v/0.1-0.8 (no longer recommended)")
print("2 MEDIUM: The medium comprehensive generator for v/0.9 (no longer recommended)")
print("3 REDUNDANT: Near-full message digest, discard MTA trail (recommended for clustered setups)")
print("4 FULL: Full message digest with MTA trail (recommended for single-node setups).")
try:
gno = int(input("Please select a generator [1-4]: "))
if gno <= len(gens) and gens[gno-1]:
genname = gens[gno-1]
except ValueError:
pass
while shards < 1:
try:
shards = int(input("How many shards for the ElasticSearch index? "))
except ValueError:
pass
while replicas < 0:
try:
replicas = int(input("How many replicas for each shard? "))
except ValueError:
pass
print("Okay, I got all I need, setting up Pony Mail...")
def createIndex():
# Check if index already exists
if es.indices.exists(dbname):
if args.soe:
print("ElasticSearch index '%s' already exists and SOE set, exiting quietly" % dbname)
sys.exit(0)
else:
print("Error: ElasticSearch index '%s' already exists!" % dbname)
sys.exit(-1)
print("Creating index " + dbname)
settings = {
"number_of_shards" : shards,
"number_of_replicas" : replicas
}
mappings = {
"mbox" : {
"properties" : {
"@import_timestamp" : {
"type" : "date",
"format" : "yyyy/MM/dd HH:mm:ss||yyyy/MM/dd"
},
"attachments" : {
"properties" : {
"content_type" : {
"type" : "string",
"index" : "not_analyzed"
},
"filename" : {
"type" : "string",
"index" : "not_analyzed"
},
"hash" : {
"type" : "string",
"index" : "not_analyzed"
},
"size" : {
"type" : "long"
}
}
},
"body" : {
"type" : "string"
},
"cc": {
"type": "string"
},
"date" : {
"type" : "date",
"store" : True,
"format" : "yyyy/MM/dd HH:mm:ss",
"index" : "not_analyzed"
},
"epoch" : { # number of seconds since the epoch
"type" : "long",
"index" : "not_analyzed"
},
"from" : {
"type" : "string"
},
"from_raw" : {
"type" : "string",
"index" : "not_analyzed"
},
"in-reply-to" : {
"type" : "string",
"index" : "not_analyzed"
},
"list" : {
"type" : "string"
},
"list_raw" : {
"type" : "string",
"index" : "not_analyzed"
},
"message-id" : {
"type" : "string",
"index" : "not_analyzed"
},
"mid" : {
"type" : "string"
},
"private" : {
"type" : "boolean"
},
"references" : {
"type" : "string"
},
"subject" : {
"type" : "string",
"fielddata": True # dropped later if DB_MAJOR==2
},
"to" : {
"type" : "string"
}
}
},
"attachment" : {
"properties" : {
"source" : {
"type" : "binary"
}
}
},
"mbox_source" : {
"_all": {
"enabled": False # this doc type is not searchable
},
"properties" : {
"source" : {
"type" : "binary"
},
"message-id" : {
"type" : "string",
"index" : "not_analyzed"
},
"mid" : {
"type" : "string"
}
}
},
"mailinglists" : {
"_all": {
"enabled": False # this doc type is not searchable
},
"properties" : {
"description" : {
"type" : "string",
"index" : "not_analyzed"
},
"list" : {
"type" : "string",
# "index" : "not_analyzed"
},
"name" : {
"type" : "string",
"index" : "not_analyzed"
}
}
},
"account" : {
"_all": {
"enabled": False # this doc type is not searchable
},
"properties" : {
"cid" : {
"type" : "string",
"index" : "not_analyzed"
},
"credentials" : {
"properties" : {
"altemail" : {
"type" : "object"
},
"email" : {
"type" : "string",
"index" : "not_analyzed"
},
"fullname" : {
"type" : "string",
"index" : "not_analyzed"
},
"uid" : {
"type" : "string",
"index" : "not_analyzed"
}
}
},
"internal" : {
"properties" : {
"cookie" : {
"type" : "string",
"index" : "not_analyzed"
},
"ip" : {
"type" : "string",
"index" : "not_analyzed"
},
"oauth_used" : {
"type" : "string",
"index" : "not_analyzed"
}
}
},
"request_id" : {
"type" : "string",
"index" : "not_analyzed"
}
}
},
"notifications" : {
"_all": {
"enabled": False # this doc type is not searchable
},
"properties" : {
"date" : {
"type" : "date",
"store" : True,
"format" : "yyyy/MM/dd HH:mm:ss"
},
"epoch" : {
"type" : "long"
},
"from" : {
"type" : "string",
# "index" : "not_analyzed"
},
"in-reply-to" : {
"type" : "string",
"index" : "not_analyzed"
},
"list" : {
"type" : "string",
# "index" : "not_analyzed"
},
"message-id" : {
"type" : "string",
"index" : "not_analyzed"
},
"mid" : {
"type" : "string",
# "index" : "not_analyzed"
},
"private" : {
"type" : "boolean"
},
"recipient" : {
"type" : "string",
"index" : "not_analyzed"
},
"seen" : {
"type" : "long"
},
"subject" : {
"type" : "string",
"fielddata": True # dropped later if DB_MAJOR==2
# "index" : "not_analyzed"
},
"to" : {
"type" : "string",
# "index" : "not_analyzed"
},
"type" : {
"type" : "string",
"index" : "not_analyzed"
}
}
}
}
if DB_MAJOR == 2: # ES 2 handles fielddata differently
del mappings['mbox']['properties']['subject']['fielddata']
del mappings['notifications']['properties']['subject']['fielddata']
res = es.indices.create(index = dbname, body = {
"mappings" : mappings,
"settings": settings
}
)
print("Index created! %s " % res)
# we need to connect to database to determine the engine version
es = Elasticsearch([
{
'host': hostname,
'port': port,
'use_ssl': False,
'url_prefix': urlPrefix
}],
max_retries=5,
retry_on_timeout=True
)
# elasticsearch logs lots of warnings on retries/connection failure
logging.getLogger("elasticsearch").setLevel(logging.ERROR)
try:
DB_VERSION=es.info()['version']['number']
except ES_ConnectionError:
print("WARNING: Connection error: could not determine the engine version.")
DB_VERSION='0.0.0'
DB_MAJOR=int(DB_VERSION.split('.')[0])
print("Versions: library %d (%s), engine %d (%s)" % (ES_MAJOR, '.'.join(map(str,ES_VERSION)) , DB_MAJOR, DB_VERSION))
if not DB_MAJOR == ES_MAJOR:
print("WARNING: library version does not agree with engine version!")
if DB_MAJOR == 0: # not known
if args.noi:
# allow setup to be used without engine running
print("Could not determine the engine version. Assume it is the same as the library version.")
DB_MAJOR = ES_MAJOR
else:
# if we cannot connect to get the version, we cannot create the index later
print("Could not connect to the engine. Fatal.")
sys.exit(1)
if not args.noi:
try:
createIndex()
except ElasticsearchException as e:
print("Index creation failed: %s" % e)
sys.exit(1)
ponymail_cfg = 'ponymail.cfg'
if not args.clobber and os.path.exists(ponymail_cfg):
print("%s exists and clobber is not set" % ponymail_cfg)
ponymail_cfg = 'ponymail.cfg.tmp'
print("Writing importer config (%s)" % ponymail_cfg)
with open(ponymail_cfg, "w") as f:
f.write("""
###############################################################
# Pony Mail Configuration file
# Main ES configuration
[elasticsearch]
hostname: %s
dbname: %s
port: %u
ssl: false
#uri: url_prefix
#user: username
#password: password
#%s
#backup: database name
[archiver]
generator: %s
[debug]
#cropout: string to crop from list-id
###############################################################
""" % (hostname, dbname, port,
'wait: active shard count' if DB_MAJOR == 5 else 'write: consistency level (default quorum)', genname))
config_path = "../site/api/lib"
config_file = "config.lua"
if not args.clobber and os.path.exists(os.path.join(config_path,config_file)):
print("%s exists and clobber is not set" % config_file)
config_file = "config.lua.tmp"
print("mod_lua configuration (%s)" % config_file)
with open(os.path.join(config_path,config_file), "w") as f:
f.write("""
local config = {
es_url = "http://%s:%u/%s/",
mailserver = "%s",
-- mailport = 1025, -- override the default port (25)
accepted_domains = "%s",
wordcloud = %s,
email_footer = nil, -- see the docs for how to set this up.
full_headers = false,
maxResults = 5000, -- max emails to return in one go. Might need to be bumped for large lists
-- stats_maxBody = 200, -- max size of body snippet returned by stats.lua
-- stats_wordExclude = ".|..|...", -- patterns to exclude from word cloud generated by stats.lua
admin_oauth = {}, -- list of domains that may do administrative oauth (private list access)
-- add 'www.googleapis.com' to the list for google oauth to decide, for instance.
oauth_fields = { -- used for specifying individual oauth handling parameters.
-- for example:
-- internal = {
-- email = 'CAS-EMAIL',
-- name = 'CAS-NAME',
-- uid = 'REMOTE-USER',
-- env = 'subprocess' -- use environment vars instead of request headers
-- }
},
-- allow_insecure_cookie = true, -- override the default (false) - only use for test installations
-- no_association = {}, -- domains that are not allowed for email association
-- listsDisplay = 'regex', -- if defined, hide list names that don't match the regex
-- debug = false, -- whether to return debug information
antispam = true -- Whether or not to add anti-spam measures aimed at anonymous users.
}
return config
""" % (hostname, port, dbname, mlserver, mldom, "true" if wce else "false"))
print("Copying sample JS config to config.js (if needed)...")
if not os.path.exists("../site/js/config.js") and os.path.exists("../site/js/config.js.sample"):
shutil.copy("../site/js/config.js.sample", "../site/js/config.js")
print("All done, Pony Mail should...work now :)")
print("If you are using an external mail inbound server, \nmake sure to copy archiver.py and ponymail.cfg to it")