site/api/stats.lua - incubator-ponymail - Git at Google

 --[[
  Licensed to the Apache Software Foundation (ASF) under one or more
  contributor license agreements.  See the NOTICE file distributed with
  this work for additional information regarding copyright ownership.
  The ASF licenses this file to You under the Apache License, Version 2.0
  (the "License"); you may not use this file except in compliance with
  the License.  You may obtain a copy of the License at

      http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
 ]]--

 -- This is stats.lua - the main stats/ML rendering script for the web.

 local JSON = require 'cjson'
 local elastic = require 'lib/elastic'
 local user = require 'lib/user'
 local aaa = require 'lib/aaa'
 local config = require 'lib/config'
 local cross = require 'lib/cross'
 local utils = require 'lib/utils'

 local BODY_MAXLEN = config.stats_maxBody or 200
 -- words to exclude from word cloud:
 local EXCLUDE = config.stats_wordExclude or ".|..|..."

 local function sortEmail(thread)
     if thread.children and type(thread.children) == "table" then
         table.sort (thread.children, function (k1, k2) return k1.epoch > k2.epoch end )
         for k, v in pairs(thread.children) do
             sortEmail(v)
         end
     end
 end

 -- findSubject: match an email with an earlier one with the same topic
 -- used for orphaned emails
 local function findSubject(gblob, blob, subject, epoch, maxAge)
     local subj = subject:gsub("^[A-Za-z]:%s+", "")
     for k, v in pairs(blob) do
         if v.subject and v.subject == subj and v.epoch < epoch and (not maxAge or (maxAge and v.epoch >= (epoch - (maxAge*86400)))) then
             local mid = v['message-id']
             if gblob[mid] then
                 return gblob[mid]
             end
         end
     end
     return nil
 end

 -- extract canonical email name from from field
 local function extractCanonName(from)
     local name = from:match("([^<]+)%s*<.->") or from:match("%S+@%S+") or from:match("%((.-)%)") or "unknown"
     return name:gsub("\"", ""):gsub("%s+$", "")
 end

 function handle(r)
     cross.contentType(r, "application/json; charset=UTF-8")
     local DEBUG = config.debug or false
     local t = {}
     local START = DEBUG and r:clock() or nil
     local tnow = START
     local get = r:parseargs()
     -- statsOnly: Whether to only send statistical info (for n-grams etc), and not the
     -- thread struct and message bodies
     -- Param: quick
     local statsOnly = get.quick
     -- Param: list=<listname> or '*' (required)
     -- Param: domain=<domain> or '*' (required)
     if not get.list or not get.domain then
         r:puts("{}")
         return cross.OK
     end
     -- emailsOnly: return email summaries only, not derived data:
     -- i.e. omit thread_struct, top 10 participants and word-cloud
     local emailsOnly = get.emailsOnly
     local qs = "*" -- positive query
     local nqs = "" -- negative query
     local dd = "lte=1M"
     local maxresults = config.maxResults or 5000
     local account = user.get(r)
     -- Param: d=nnnnn (numeric)
     -- does not appear to be supported below
     if get.d and tonumber(get.d) and tonumber(get.d) > 0 then
         dd = tonumber(get.d)
     end
     -- Param: q=query
     if get.q and #get.q > 0 then
         local x = {}
         local nx = {}
         local q = get.q:gsub("+", " ") -- apache quirk?
         for k, v in pairs({'from','subject','body'}) do
             local y = {}
             local z = {}
             local words = {}

             -- first, grab all "foo bar" quotes
             for lword in q:gmatch([[("[^"]+")]]) do
                 table.insert(words, lword)
             end
             -- then cut them out of the query
             for _, word in pairs(words) do
                 q = q:gsub('"' .. word:gsub('[.%-%%%?%+]', "%%%1") .. '"', "")
             end

             -- then remaining single words
             for word in q:gmatch("(%S+)") do
                 table.insert(words, word)
             end

             for _, word in pairs(words) do
                 local preface = ""
                 if word:match("^%-") then
                     preface = "-"
                     word = word:sub(2)
                 end
                 if preface == "" then
                     table.insert(y, ("%s:\"%s\""):format(v, r:escape_html( word:gsub("[()\"]+", "") )))
                 else
                     table.insert(z, ("%s:\"%s\""):format(v, r:escape_html( word:gsub("[()\"]+", "") )))
                 end
             end
             if #y > 0 then
                 table.insert(x, "(" .. table.concat(y, " AND ") .. ")")
             end
             if #z > 0 then
                 table.insert(nx, "(" .. table.concat(z, " OR ") .. ")")
             end
         end
         qs = table.concat(x, " OR ")
         if qs == "" then
             qs = "*"
         end
         nqs = table.concat(nx, " OR ")
     end

     local listraw = "<" .. get.list .. "." .. get.domain .. ">"
     local listdata = {
         name = get.list,
         domain = get.domain
     }

     local z = {}
     -- Param: header_from=
     -- Param: header_subject=
     -- Param: header_body=
     -- Param: header_to=
     for k, v in pairs({'from','subject','body', 'to'}) do
         if get['header_' .. v] then
             local word = get['header_' .. v]
             table.insert(z, ("(%s:\"%s\")"):format(v, r:escape_html( word:gsub("[()\"]+", "") )))
         end
     end
     if #z > 0 then
         if #qs > 0 and qs ~= "*" then
             qs = qs .. " AND (" .. table.concat(z, " AND ") .. ")"
         else
             qs = table.concat(z, " AND ")
         end
     end

     -- Debug time point 1
     if DEBUG then
       table.insert(t, r:clock() - tnow)
       tnow = r:clock()
     end

     local daterange = {gt = "now-1M", lte = "now+1d" }
     -- Param: dfrom=.*ddd (days ago to start)
     -- Param: dto=dddd.* (total days to match)
     -- Must both be present
     if get.dfrom and get.dto then
         local ef = tonumber(get.dfrom:match("(%d+)$")) or 0
         local et = tonumber(get.dto:match("^(%d+)")) or 0
         if ef > 0 and et > 0 then
             if et > ef then
                 et = ef
             end
             daterange = {
                 gte = "now-" .. ef .. "d",
                 lte = "now-" .. (ef-et) .. "d"
             }
         end
     end
     if not get.d then
         get.d = dd
     end

     -- d=YYYY-mm translates into s+e being equal to d
     -- Param: d=yyyy-mm
     if not (get.s and get.e) and get.d and get.d:match("^%d+%-%d+$") then
         get.s = get.d
         get.e = get.d
     end
     -- Param: d=.*lte=n[wMyd].* (less than or equal to n days/weeks/etc ago)
     -- from now-nP to now+1d (P=period)
     if get.d then
         local lte = get.d:match("lte=([0-9]+[wMyd])")
         if lte then
             daterange.lte = "now+1d"
             daterange.gte = "now-" .. lte
             daterange.gt = nil
         end
     end
     -- Param: d=.*gte=n[wMyd].* (greater than or equal to n days/weeks/etc ago)
     -- before now-nP (P=period)
     if get.d then
         local gte = get.d:match("gte=([0-9]+[wMyd])")
         if gte then
             daterange.gte = nil
             daterange.gt = nil
             daterange.lte = "now-" .. gte
         end
     end
     -- Param: d=.*dfr=yyyy-mm-dd.* (dates from)
     -- start date for search
     if get.d then
         local y,m,d = get.d:match("dfr=(%d+)%-(%d+)%-(%d+)")
         if y and m and d then
             daterange.gte = ("%04u/%02u/%02u 00:00:00"):format(y,m,d)
             daterange.gt = nil
         end
     end
     -- Param: d=.*dto=yyyy-mm-dd.* (dates up to)
     -- end date for search
     if get.d then
         local y,m,d = get.d:match("dto=(%d+)%-(%d+)%-(%d+)")
         if y and m and d then
             daterange.lte = ("%04u/%02u/%02u 23:59:59"):format(y,m,d)
             daterange.gt = nil
         end
     end
     -- Param: s=yyyy-m[m]
     -- Param: e=yyyy-m[m]
     if get.s and get.e then
         local em = tonumber(get.e:match("%-(%d%d?)$"))
         local ey = tonumber(get.e:match("^(%d%d%d%d)"))
         local ec = utils.lastDayOfMonth(ey, em)
         daterange = {
             gte = get.s:gsub("%-","/").."/01 00:00:00",
             lte = get.e:gsub("%-","/").."/" .. ec .. " 23:59:59",
         }
     end
     local wc = false -- wildcard?
     local sterm = { -- which lists to process
               term = {
                   list_raw = listraw
               }
           }
     if get.list == "*" then
         wc = true
         sterm = {
             regexp = {
                 list = "^[^.]+." .. get.domain .. "$"
             }
         }
     end
     if get.domain == "*" then
         wc = true
         sterm = {
             wildcard = {
                 list = "*"
             }
         }
     end

 --[[ ============ Completed all parameter parsing, now process the data ============ ]]

     -- Check for changes?
     -- Param: since=epoch (optional, defaults to now)
     if get.since then
         local epoch = tonumber(get.since) or os.time()
         local doc = elastic.raw {
             _source = {'message-id'},
             query = {
                 bool = {
                     must = {
                         {
                             range = {
                                 epoch = {
                                     gt = epoch
                                 }
                             }
                         },
                         {
                             range = {
                                 date = daterange
                             }
                         },
                         sterm,
                         {
                             query_string = {
                                 default_field = "subject",
                                 query = qs
                             }
                         }
                     },
                     must_not = {
                         {
                             query_string = {
                                 default_field = "subject",
                                 query = nqs
                             }
                         }
                     }
                 }
             },
             size = 1
         }
         if #doc.hits.hits == 0 then
             r:puts(JSON.encode{
                 changed = false,
                   took = DEBUG and (r:clock() - START) or nil
             })
             return cross.OK
         end
     end

     -- Debug time point 2
     if DEBUG then
       table.insert(t, r:clock() - tnow)
       tnow = r:clock()
     end

     local cloud = nil
     if config.wordcloud and not statsOnly and not emailsOnly then
         cloud = {}
         -- Word cloud!
         local doc = elastic.raw {
             size = 0, -- we don't need the hits themselves
             terminate_after = 100,
             aggs = {
                 cloud = {
                     terms =  {
                         field =  "subject",
                         size = 10,
                         exclude = EXCLUDE
                     }
                 }
             },
             query = {
                 bool = {
                     must = {
                         {
                             range = {
                                 date = daterange
                             }
                         },
                         {
                             query_string = {
                                 default_field = "subject",
                                 query = qs
                             }
                         },
                         sterm,
                         {
                             term = {
                                 private = false
                             }
                         }
                     },
                     must_not = {
                         {
                             query_string = {
                                 default_field = "subject",
                                 query = nqs
                             }
                         }
                     }
                 }
             }
         }
         for x,y in pairs (doc.aggregations.cloud.buckets) do
             cloud[y.key] = y.doc_count
         end
     end
     -- Debug time point 3
     if DEBUG then
       table.insert(t, r:clock() - tnow)
       tnow = r:clock()
     end

     -- Get years active
     local NOWISH = math.floor(os.time()/600)
     local DATESPAN_KEY = "dateSpan:" .. NOWISH .. ":" .. get.list .. "@" .. get.domain
     local datespan = JSON.decode(r:ivm_get(DATESPAN_KEY) or "{}")

     if not (datespan.pubfirst and datespan.publast) then
         local doc = elastic.raw {
             size = 0,
             query = {
                 bool = {
                     must = {
                         {
                             range = {
                                 epoch = {
                                     gt = 0
                                 }
                             }
                         },
                         sterm
                     }
                 }
             },
             aggs = {
                 lists = {
                     terms = {
                         field = "list_raw",
                         size = utils.MAX_LIST_COUNT
                     },
                     aggs = {
                         private = {
                             terms = {
                                 field = "private",
                                 size = 2
                             },
                             aggs = {
                                 first = {
                                     min =  {
                                         field = "epoch"
                                     }
                                 },
                                 last = {
                                     max = {
                                         field = "epoch"
                                     }
                                 }
                             }
                         }
                     }
                 }
             }
         }
         datespan = {}
         datespan.pubfirst = nil
         datespan.publast = nil
         -- find public min and max (buckets will be empty if there are no matching lists)
         for _, list in pairs(doc.aggregations.lists.buckets) do
             for _, private in pairs(list.private.buckets) do
                 if private.key_as_string == "false" then
                     if (datespan.publast == nil) or (private.last.value > datespan.publast) then datespan.publast = private.last.value end
                     if (datespan.pubfirst == nil) or (private.first.value < datespan.pubfirst) then datespan.pubfirst = private.first.value end
                 end
             end
         end
         if datespan.publast == nil then -- did not find any values
             local NOW = os.time()
             datespan.publast = NOW
             datespan.pubfirst = NOW
         end

         -- find private min and max and store them if they could change the public ones
         -- store the list entries under the 'private' key to make them easier to process
         for _, list in pairs(doc.aggregations.lists.buckets) do
             for _, private in pairs(list.private.buckets) do
                 if private.key_as_string == "true" then
                     local prvlast = private.last.value
                     if prvlast > datespan.publast then
                         datespan.private = datespan.private or {}
                         datespan.private[list.key] = datespan.private[list.key] or {}
                         datespan.private[list.key].last = prvlast
                     end
                     local prvfirst = private.first.value
                     if prvfirst < datespan.pubfirst then
                         datespan.private = datespan.private or {}
                         datespan.private[list.key] = datespan.private[list.key] or {}
                         datespan.private[list.key].first = prvfirst
                     end
                 end
             end
         end

         r:ivm_set(DATESPAN_KEY, JSON.encode(datespan))
     end

     -- process the raw list data:
     -- get the first and last dates, adjusting as necessary to allow for private lists
     local first = datespan.pubfirst
     local last = datespan.publast
     for lid, prvdates in pairs(datespan.private or {}) do
         if aaa.canAccessList(r, lid, account) then
            if prvdates.first and prvdates.first < first then first = prvdates.first end
            if prvdates.last and prvdates.last > last then last = prvdates.last end
         end
     end

     -- extract years and months for response (using UTC, i.e. leading !)
     datespan.firstYear = tonumber(os.date("!%Y", first))
     datespan.firstMonth = tonumber(os.date("!%m", first))

     datespan.lastYear = tonumber(os.date("!%Y", last))
     datespan.lastMonth = tonumber(os.date("!%m", last))

     -- Debug time point 4
     if DEBUG then
       table.insert(t, r:clock() - tnow)
       tnow = r:clock()
     end

     -- Get threads
     local threads = {}
     local emails = {}
     local emls = {}
     local senders = {}

     local dhh = {}

     -- construct thread query
     local squery = {
             _source = {'message-id','in-reply-to','from','subject','epoch','references','list_raw', 'private', 'attachments', 'body'},
             query = {
                 bool = {
                     must = {
                         {
                             range = {
                                 date = daterange
                             }
                         },
                         sterm,
                         {
                             query_string = {
                                 default_field = "subject",
                                 query = qs
                             }
                         }
                 },
                     must_not = {
                         {
                             query_string = {
                                 default_field = "subject",
                                 query = nqs
                             }
                         }
                     }
                 }
             },
             sort = {
                 {
                     epoch = {
                         order = "desc"
                     }
                 }
             },
             size = maxresults
         }

     -- If max results limit is beyond the limit, we have to do a scroll to fetch it.
     if maxresults > elastic.MAX_RESULT_WINDOW then
         squery.size = elastic.MAX_RESULT_WINDOW -- limit the maximum batch sizes
         local js, sid = elastic.scroll(squery)
         while js and js.hits and js.hits.hits and #js.hits.hits > 0 do -- scroll as long as we get new results
             for k, v in pairs(js.hits.hits) do
                 table.insert(dhh, v)
             end
             js, sid = elastic.scroll(sid)
         end
         elastic.clear_scroll(sid) -- we're done with the sid, release it
     -- otherwise, we can just do a standard raw query
     else
         local doc = elastic.raw(squery)
         dhh = doc.hits.hits
     end

     -- Debug time point 5
     if DEBUG then
       table.insert(t, r:clock() - tnow)
       tnow = r:clock()
     end


     for k = #dhh, 1, -1 do
         local v = dhh[k]
         local email = v._source
         if aaa.canAccessDoc(r, email, account) then

             local eml = utils.extractCanonEmail(email.from)
             local gravatar = r:md5(eml:lower())
             email.gravatar = gravatar

             local name = extractCanonName(email.from)
             local eid = ("%s <%s>"):format(name, eml)
             if not statsOnly and not emailsOnly then
                 senders[eid] = senders[eid] or {
                     email = eml,
                     gravatar = gravatar,
                     name = name,
                     count = 0
                 }
                 senders[eid].count = senders[eid].count + 1
             end
             local mid = email['message-id']
             local irt = email['in-reply-to']
             email.id = v._id
             email.irt = irt

             -- only calculate threads if necessary
             if not emailsOnly then
                 emails[mid] = {
                     tid = v._id,
                     nest = 1,
                     epoch = email.epoch,
                     children = {

                     }
                 }

                 if not irt or irt == JSON.null or #irt == 0 then
                     irt = ""
                 end
                 if not emails[irt] and email.references and email.references ~= JSON.null then
                     for ref in email.references:gmatch("([^%s]+)") do
                         if emails[ref] then
                             irt = ref
                             break
                         end
                     end
                 end

                 if not irt or irt == JSON.null or #irt == 0 then
                     irt = email.subject:gsub("^[a-zA-Z]+:%s+", "")
                 end

                 -- If we can't match by in-reply-to or references, match/group by subject, ignoring Re:/Fwd:/etc
                 if not emails[irt] then
                     irt = email.subject:gsub("^[a-zA-Z]+:%s+", "")
                     while irt:match("^[a-zA-Z]+:%s+") do
                         irt = irt:gsub("^[a-zA-Z]+:%s+", "")
                     end
                 end
                 local refpoint = irt or email['references'] or ""
                 local point = emails[irt] or (#refpoint > 0 and findSubject(emails, emls, irt, email.epoch))
                 -- Try a little harder??
                 if not point and email.subject:match("^[A-Za-z]+:%s+") then  -- if this is a 'Re:' or 'Aw:' or 'Fwd:', try to find parent anyway
                     point = findSubject(emails, emls, irt, email.epoch, 30) -- at most, go back 30 days. if not, then they don't belong together...I guess
                 end

                 if point then
                     if point.nest < 50 then
                         point.nest = point.nest + 1
                         table.insert(point.children, emails[mid])
                     end
                 else
                     if (irt ~= JSON.null and #irt > 0) then
                         emails[irt] = {
                             children = {
                                 emails[mid]
                             },
                             nest = 1,
                             epoch = email.epoch,
                             tid = v._id
                         }
                         emails[mid].nest = emails[irt].nest + 1
                         table.insert(threads, emails[irt])
                     else
                         table.insert(threads, emails[mid])
                     end
                 end
                 -- needed for emailsOnly
                 email.references = nil
             end

             email.to = nil
             -- Duplicate of email.irt
             email['in-reply-to'] = nil
             if not account and config.antispam then
                 email.from = email.from:gsub("(%S+)@(%S+)", function(a,b) return a:sub(1,2) .. "..." .. "@" .. b end)
             end
             if email.attachments then
                 email.attachments = #email.attachments
             else
                 email.attachments = 0
             end
             email.body = #email.body < BODY_MAXLEN and email.body or email.body:sub(1, BODY_MAXLEN) .. "..."
             if not statsOnly then
                 table.insert(emls, email)
             else
                 table.insert(emls, {epoch= email.epoch})
             end
         end
     end

     local allparts = 0 -- number of participants
     local top10 = {}

     if not statsOnly and not emailsOnly then
         local stable = {}
         for k, v in pairs(senders) do
             table.insert(stable, v)
         end
         table.sort(stable, function(a,b) return a.count > b.count end )
         allparts = #stable
         for k, v in pairs(stable) do
             if k <= 10 then
                 table.insert(top10, v)
             else
                 break
             end
         end
     end

     -- anonymize emails if not logged in - anti-spam!
     if not account and config.antispam then
         for k, v in pairs(top10) do
             top10[k].email = top10[k].email:gsub("(%S+)@(%S+)", function(a,b) return a:sub(1,2) .. "..." .. "@" .. b end)
         end
     end

     -- Debug time point 6
     if DEBUG then
       table.insert(t, r:clock() - tnow)
       tnow = r:clock()
     end

     sortEmail(threads)

     -- Debug time point 7
     if DEBUG then
       table.insert(t, r:clock() - tnow)
       tnow = r:clock()
     end

     if JSON.encode_max_depth then
         -- TODO see #439
         JSON.encode_max_depth(1000)
     end
     listdata.max = maxresults
     listdata.using_wc = wc
     listdata.no_threads = #threads
     if not statsOnly and not emailsOnly then
         listdata.thread_struct = threads
     end
     listdata.firstYear = datespan.firstYear
     listdata.lastYear = datespan.lastYear
     listdata.firstMonth = datespan.firstMonth
     listdata.lastMonth = datespan.lastMonth
     listdata.list = listraw:gsub("^([^.]+)%.", "%1@"):gsub("[<>]+", "")
     listdata.emails = emls
     listdata.hits = #emls
     listdata.searchlist = listraw
     listdata.participants = top10
     listdata.cloud = cloud
     if DEBUG then
       listdata.took = r:clock() - START
       listdata.debug = t
     end
     listdata.numparts = allparts
     listdata.unixtime = os.time()

     r:puts(JSON.encode(listdata))

     return cross.OK
 end

 cross.start(handle)
	--[[
	Licensed to the Apache Software Foundation (ASF) under one or more
	contributor license agreements. See the NOTICE file distributed with
	this work for additional information regarding copyright ownership.
	The ASF licenses this file to You under the Apache License, Version 2.0
	(the "License"); you may not use this file except in compliance with
	the License. You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

	Unless required by applicable law or agreed to in writing, software
	distributed under the License is distributed on an "AS IS" BASIS,
	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	See the License for the specific language governing permissions and
	limitations under the License.
	]]--

	-- This is stats.lua - the main stats/ML rendering script for the web.

	local JSON = require 'cjson'
	local elastic = require 'lib/elastic'
	local user = require 'lib/user'
	local aaa = require 'lib/aaa'
	local config = require 'lib/config'
	local cross = require 'lib/cross'
	local utils = require 'lib/utils'

	local BODY_MAXLEN = config.stats_maxBody or 200
	-- words to exclude from word cloud:
	local EXCLUDE = config.stats_wordExclude or ".\|..\|..."

	local function sortEmail(thread)
	if thread.children and type(thread.children) == "table" then
	table.sort (thread.children, function (k1, k2) return k1.epoch > k2.epoch end )
	for k, v in pairs(thread.children) do
	sortEmail(v)
	end
	end
	end

	-- findSubject: match an email with an earlier one with the same topic
	-- used for orphaned emails
	local function findSubject(gblob, blob, subject, epoch, maxAge)
	local subj = subject:gsub("^[A-Za-z]:%s+", "")
	for k, v in pairs(blob) do
	if v.subject and v.subject == subj and v.epoch < epoch and (not maxAge or (maxAge and v.epoch >= (epoch - (maxAge*86400)))) then
	local mid = v['message-id']
	if gblob[mid] then
	return gblob[mid]
	end
	end
	end
	return nil
	end

	-- extract canonical email name from from field
	local function extractCanonName(from)
	local name = from:match("([^<]+)%s*<.->") or from:match("%S+@%S+") or from:match("%((.-)%)") or "unknown"
	return name:gsub("\"", ""):gsub("%s+$", "")
	end

	function handle(r)
	cross.contentType(r, "application/json; charset=UTF-8")
	local DEBUG = config.debug or false
	local t = {}
	local START = DEBUG and r:clock() or nil
	local tnow = START
	local get = r:parseargs()
	-- statsOnly: Whether to only send statistical info (for n-grams etc), and not the
	-- thread struct and message bodies
	-- Param: quick
	local statsOnly = get.quick
	-- Param: list=<listname> or '*' (required)
	-- Param: domain=<domain> or '*' (required)
	if not get.list or not get.domain then
	r:puts("{}")
	return cross.OK
	end
	-- emailsOnly: return email summaries only, not derived data:
	-- i.e. omit thread_struct, top 10 participants and word-cloud
	local emailsOnly = get.emailsOnly
	local qs = "*" -- positive query
	local nqs = "" -- negative query
	local dd = "lte=1M"
	local maxresults = config.maxResults or 5000
	local account = user.get(r)
	-- Param: d=nnnnn (numeric)
	-- does not appear to be supported below
	if get.d and tonumber(get.d) and tonumber(get.d) > 0 then
	dd = tonumber(get.d)
	end
	-- Param: q=query
	if get.q and #get.q > 0 then
	local x = {}
	local nx = {}
	local q = get.q:gsub("+", " ") -- apache quirk?
	for k, v in pairs({'from','subject','body'}) do
	local y = {}
	local z = {}
	local words = {}

	-- first, grab all "foo bar" quotes
	for lword in q:gmatch([[("[^"]+")]]) do
	table.insert(words, lword)
	end
	-- then cut them out of the query
	for _, word in pairs(words) do
	q = q:gsub('"' .. word:gsub('[.%-%%%?%+]', "%%%1") .. '"', "")
	end

	-- then remaining single words
	for word in q:gmatch("(%S+)") do
	table.insert(words, word)
	end

	for _, word in pairs(words) do
	local preface = ""
	if word:match("^%-") then
	preface = "-"
	word = word:sub(2)
	end
	if preface == "" then
	table.insert(y, ("%s:\"%s\""):format(v, r:escape_html( word:gsub("[()\"]+", "") )))
	else
	table.insert(z, ("%s:\"%s\""):format(v, r:escape_html( word:gsub("[()\"]+", "") )))
	end
	end
	if #y > 0 then
	table.insert(x, "(" .. table.concat(y, " AND ") .. ")")
	end
	if #z > 0 then
	table.insert(nx, "(" .. table.concat(z, " OR ") .. ")")
	end
	end
	qs = table.concat(x, " OR ")
	if qs == "" then
	qs = "*"
	end
	nqs = table.concat(nx, " OR ")
	end

	local listraw = "<" .. get.list .. "." .. get.domain .. ">"
	local listdata = {
	name = get.list,
	domain = get.domain
	}

	local z = {}
	-- Param: header_from=
	-- Param: header_subject=
	-- Param: header_body=
	-- Param: header_to=
	for k, v in pairs({'from','subject','body', 'to'}) do
	if get['header_' .. v] then
	local word = get['header_' .. v]
	table.insert(z, ("(%s:\"%s\")"):format(v, r:escape_html( word:gsub("[()\"]+", "") )))
	end
	end
	if #z > 0 then
	if #qs > 0 and qs ~= "*" then
	qs = qs .. " AND (" .. table.concat(z, " AND ") .. ")"
	else
	qs = table.concat(z, " AND ")
	end
	end

	-- Debug time point 1
	if DEBUG then
	table.insert(t, r:clock() - tnow)
	tnow = r:clock()
	end

	local daterange = {gt = "now-1M", lte = "now+1d" }
	-- Param: dfrom=.*ddd (days ago to start)
	-- Param: dto=dddd.* (total days to match)
	-- Must both be present
	if get.dfrom and get.dto then
	local ef = tonumber(get.dfrom:match("(%d+)$")) or 0
	local et = tonumber(get.dto:match("^(%d+)")) or 0
	if ef > 0 and et > 0 then
	if et > ef then
	et = ef
	end
	daterange = {
	gte = "now-" .. ef .. "d",
	lte = "now-" .. (ef-et) .. "d"
	}
	end
	end
	if not get.d then
	get.d = dd
	end

	-- d=YYYY-mm translates into s+e being equal to d
	-- Param: d=yyyy-mm
	if not (get.s and get.e) and get.d and get.d:match("^%d+%-%d+$") then
	get.s = get.d
	get.e = get.d
	end
	-- Param: d=.lte=n[wMyd]. (less than or equal to n days/weeks/etc ago)
	-- from now-nP to now+1d (P=period)
	if get.d then
	local lte = get.d:match("lte=([0-9]+[wMyd])")
	if lte then
	daterange.lte = "now+1d"
	daterange.gte = "now-" .. lte
	daterange.gt = nil
	end
	end
	-- Param: d=.gte=n[wMyd]. (greater than or equal to n days/weeks/etc ago)
	-- before now-nP (P=period)
	if get.d then
	local gte = get.d:match("gte=([0-9]+[wMyd])")
	if gte then
	daterange.gte = nil
	daterange.gt = nil
	daterange.lte = "now-" .. gte
	end
	end
	-- Param: d=.dfr=yyyy-mm-dd. (dates from)
	-- start date for search
	if get.d then
	local y,m,d = get.d:match("dfr=(%d+)%-(%d+)%-(%d+)")
	if y and m and d then
	daterange.gte = ("%04u/%02u/%02u 00:00:00"):format(y,m,d)
	daterange.gt = nil
	end
	end
	-- Param: d=.dto=yyyy-mm-dd. (dates up to)
	-- end date for search
	if get.d then
	local y,m,d = get.d:match("dto=(%d+)%-(%d+)%-(%d+)")
	if y and m and d then
	daterange.lte = ("%04u/%02u/%02u 23:59:59"):format(y,m,d)
	daterange.gt = nil
	end
	end
	-- Param: s=yyyy-m[m]
	-- Param: e=yyyy-m[m]
	if get.s and get.e then
	local em = tonumber(get.e:match("%-(%d%d?)$"))
	local ey = tonumber(get.e:match("^(%d%d%d%d)"))
	local ec = utils.lastDayOfMonth(ey, em)
	daterange = {
	gte = get.s:gsub("%-","/").."/01 00:00:00",
	lte = get.e:gsub("%-","/").."/" .. ec .. " 23:59:59",
	}
	end
	local wc = false -- wildcard?
	local sterm = { -- which lists to process
	term = {
	list_raw = listraw
	}
	}
	if get.list == "*" then
	wc = true
	sterm = {
	regexp = {
	list = "^[^.]+." .. get.domain .. "$"
	}
	}
	end
	if get.domain == "*" then
	wc = true
	sterm = {
	wildcard = {
	list = "*"
	}
	}
	end

	--[[ ============ Completed all parameter parsing, now process the data ============ ]]

	-- Check for changes?
	-- Param: since=epoch (optional, defaults to now)
	if get.since then
	local epoch = tonumber(get.since) or os.time()
	local doc = elastic.raw {
	_source = {'message-id'},
	query = {
	bool = {
	must = {
	{
	range = {
	epoch = {
	gt = epoch
	}
	}
	},
	{
	range = {
	date = daterange
	}
	},
	sterm,
	{
	query_string = {
	default_field = "subject",
	query = qs
	}
	}
	},
	must_not = {
	{
	query_string = {
	default_field = "subject",
	query = nqs
	}
	}
	}
	}
	},
	size = 1
	}
	if #doc.hits.hits == 0 then
	r:puts(JSON.encode{
	changed = false,
	took = DEBUG and (r:clock() - START) or nil
	})
	return cross.OK
	end
	end

	-- Debug time point 2
	if DEBUG then
	table.insert(t, r:clock() - tnow)
	tnow = r:clock()
	end

	local cloud = nil
	if config.wordcloud and not statsOnly and not emailsOnly then
	cloud = {}
	-- Word cloud!
	local doc = elastic.raw {
	size = 0, -- we don't need the hits themselves
	terminate_after = 100,
	aggs = {
	cloud = {
	terms = {
	field = "subject",
	size = 10,
	exclude = EXCLUDE
	}
	}
	},
	query = {
	bool = {
	must = {
	{
	range = {
	date = daterange
	}
	},
	{
	query_string = {
	default_field = "subject",
	query = qs
	}
	},
	sterm,
	{
	term = {
	private = false
	}
	}
	},
	must_not = {
	{
	query_string = {
	default_field = "subject",
	query = nqs
	}
	}
	}
	}
	}
	}
	for x,y in pairs (doc.aggregations.cloud.buckets) do
	cloud[y.key] = y.doc_count
	end
	end
	-- Debug time point 3
	if DEBUG then
	table.insert(t, r:clock() - tnow)
	tnow = r:clock()
	end

	-- Get years active
	local NOWISH = math.floor(os.time()/600)
	local DATESPAN_KEY = "dateSpan:" .. NOWISH .. ":" .. get.list .. "@" .. get.domain
	local datespan = JSON.decode(r:ivm_get(DATESPAN_KEY) or "{}")

	if not (datespan.pubfirst and datespan.publast) then
	local doc = elastic.raw {
	size = 0,
	query = {
	bool = {
	must = {
	{
	range = {
	epoch = {
	gt = 0
	}
	}
	},
	sterm
	}
	}
	},
	aggs = {
	lists = {
	terms = {
	field = "list_raw",
	size = utils.MAX_LIST_COUNT
	},
	aggs = {
	private = {
	terms = {
	field = "private",
	size = 2
	},
	aggs = {
	first = {
	min = {
	field = "epoch"
	}
	},
	last = {
	max = {
	field = "epoch"
	}
	}
	}
	}
	}
	}
	}
	}
	datespan = {}
	datespan.pubfirst = nil
	datespan.publast = nil
	-- find public min and max (buckets will be empty if there are no matching lists)
	for _, list in pairs(doc.aggregations.lists.buckets) do
	for _, private in pairs(list.private.buckets) do
	if private.key_as_string == "false" then
	if (datespan.publast == nil) or (private.last.value > datespan.publast) then datespan.publast = private.last.value end
	if (datespan.pubfirst == nil) or (private.first.value < datespan.pubfirst) then datespan.pubfirst = private.first.value end
	end
	end
	end
	if datespan.publast == nil then -- did not find any values
	local NOW = os.time()
	datespan.publast = NOW
	datespan.pubfirst = NOW
	end

	-- find private min and max and store them if they could change the public ones
	-- store the list entries under the 'private' key to make them easier to process
	for _, list in pairs(doc.aggregations.lists.buckets) do
	for _, private in pairs(list.private.buckets) do
	if private.key_as_string == "true" then
	local prvlast = private.last.value
	if prvlast > datespan.publast then
	datespan.private = datespan.private or {}
	datespan.private[list.key] = datespan.private[list.key] or {}
	datespan.private[list.key].last = prvlast
	end
	local prvfirst = private.first.value
	if prvfirst < datespan.pubfirst then
	datespan.private = datespan.private or {}
	datespan.private[list.key] = datespan.private[list.key] or {}
	datespan.private[list.key].first = prvfirst
	end
	end
	end
	end

	r:ivm_set(DATESPAN_KEY, JSON.encode(datespan))
	end

	-- process the raw list data:
	-- get the first and last dates, adjusting as necessary to allow for private lists
	local first = datespan.pubfirst
	local last = datespan.publast
	for lid, prvdates in pairs(datespan.private or {}) do
	if aaa.canAccessList(r, lid, account) then
	if prvdates.first and prvdates.first < first then first = prvdates.first end
	if prvdates.last and prvdates.last > last then last = prvdates.last end
	end
	end

	-- extract years and months for response (using UTC, i.e. leading !)
	datespan.firstYear = tonumber(os.date("!%Y", first))
	datespan.firstMonth = tonumber(os.date("!%m", first))

	datespan.lastYear = tonumber(os.date("!%Y", last))
	datespan.lastMonth = tonumber(os.date("!%m", last))

	-- Debug time point 4
	if DEBUG then
	table.insert(t, r:clock() - tnow)
	tnow = r:clock()
	end

	-- Get threads
	local threads = {}
	local emails = {}
	local emls = {}
	local senders = {}

	local dhh = {}

	-- construct thread query
	local squery = {
	_source = {'message-id','in-reply-to','from','subject','epoch','references','list_raw', 'private', 'attachments', 'body'},
	query = {
	bool = {
	must = {
	{
	range = {
	date = daterange
	}
	},
	sterm,
	{
	query_string = {
	default_field = "subject",
	query = qs
	}
	}
	},
	must_not = {
	{
	query_string = {
	default_field = "subject",
	query = nqs
	}
	}
	}
	}
	},
	sort = {
	{
	epoch = {
	order = "desc"
	}
	}
	},
	size = maxresults
	}

	-- If max results limit is beyond the limit, we have to do a scroll to fetch it.
	if maxresults > elastic.MAX_RESULT_WINDOW then
	squery.size = elastic.MAX_RESULT_WINDOW -- limit the maximum batch sizes
	local js, sid = elastic.scroll(squery)
	while js and js.hits and js.hits.hits and #js.hits.hits > 0 do -- scroll as long as we get new results
	for k, v in pairs(js.hits.hits) do
	table.insert(dhh, v)
	end
	js, sid = elastic.scroll(sid)
	end
	elastic.clear_scroll(sid) -- we're done with the sid, release it
	-- otherwise, we can just do a standard raw query
	else
	local doc = elastic.raw(squery)
	dhh = doc.hits.hits
	end

	-- Debug time point 5
	if DEBUG then
	table.insert(t, r:clock() - tnow)
	tnow = r:clock()
	end


	for k = #dhh, 1, -1 do
	local v = dhh[k]
	local email = v._source
	if aaa.canAccessDoc(r, email, account) then

	local eml = utils.extractCanonEmail(email.from)
	local gravatar = r:md5(eml:lower())
	email.gravatar = gravatar

	local name = extractCanonName(email.from)
	local eid = ("%s <%s>"):format(name, eml)
	if not statsOnly and not emailsOnly then
	senders[eid] = senders[eid] or {
	email = eml,
	gravatar = gravatar,
	name = name,
	count = 0
	}
	senders[eid].count = senders[eid].count + 1
	end
	local mid = email['message-id']
	local irt = email['in-reply-to']
	email.id = v._id
	email.irt = irt

	-- only calculate threads if necessary
	if not emailsOnly then
	emails[mid] = {
	tid = v._id,
	nest = 1,
	epoch = email.epoch,
	children = {

	}
	}

	if not irt or irt == JSON.null or #irt == 0 then
	irt = ""
	end
	if not emails[irt] and email.references and email.references ~= JSON.null then
	for ref in email.references:gmatch("([^%s]+)") do
	if emails[ref] then
	irt = ref
	break
	end
	end
	end

	if not irt or irt == JSON.null or #irt == 0 then
	irt = email.subject:gsub("^[a-zA-Z]+:%s+", "")
	end

	-- If we can't match by in-reply-to or references, match/group by subject, ignoring Re:/Fwd:/etc
	if not emails[irt] then
	irt = email.subject:gsub("^[a-zA-Z]+:%s+", "")
	while irt:match("^[a-zA-Z]+:%s+") do
	irt = irt:gsub("^[a-zA-Z]+:%s+", "")
	end
	end
	local refpoint = irt or email['references'] or ""
	local point = emails[irt] or (#refpoint > 0 and findSubject(emails, emls, irt, email.epoch))
	-- Try a little harder??
	if not point and email.subject:match("^[A-Za-z]+:%s+") then -- if this is a 'Re:' or 'Aw:' or 'Fwd:', try to find parent anyway
	point = findSubject(emails, emls, irt, email.epoch, 30) -- at most, go back 30 days. if not, then they don't belong together...I guess
	end

	if point then
	if point.nest < 50 then
	point.nest = point.nest + 1
	table.insert(point.children, emails[mid])
	end
	else
	if (irt ~= JSON.null and #irt > 0) then
	emails[irt] = {
	children = {
	emails[mid]
	},
	nest = 1,
	epoch = email.epoch,
	tid = v._id
	}
	emails[mid].nest = emails[irt].nest + 1
	table.insert(threads, emails[irt])
	else
	table.insert(threads, emails[mid])
	end
	end
	-- needed for emailsOnly
	email.references = nil
	end

	email.to = nil
	-- Duplicate of email.irt
	email['in-reply-to'] = nil
	if not account and config.antispam then
	email.from = email.from:gsub("(%S+)@(%S+)", function(a,b) return a:sub(1,2) .. "..." .. "@" .. b end)
	end
	if email.attachments then
	email.attachments = #email.attachments
	else
	email.attachments = 0
	end
	email.body = #email.body < BODY_MAXLEN and email.body or email.body:sub(1, BODY_MAXLEN) .. "..."
	if not statsOnly then
	table.insert(emls, email)
	else
	table.insert(emls, {epoch= email.epoch})
	end
	end
	end

	local allparts = 0 -- number of participants
	local top10 = {}

	if not statsOnly and not emailsOnly then
	local stable = {}
	for k, v in pairs(senders) do
	table.insert(stable, v)
	end
	table.sort(stable, function(a,b) return a.count > b.count end )
	allparts = #stable
	for k, v in pairs(stable) do
	if k <= 10 then
	table.insert(top10, v)
	else
	break
	end
	end
	end

	-- anonymize emails if not logged in - anti-spam!
	if not account and config.antispam then
	for k, v in pairs(top10) do
	top10[k].email = top10[k].email:gsub("(%S+)@(%S+)", function(a,b) return a:sub(1,2) .. "..." .. "@" .. b end)
	end
	end

	-- Debug time point 6
	if DEBUG then
	table.insert(t, r:clock() - tnow)
	tnow = r:clock()
	end

	sortEmail(threads)

	-- Debug time point 7
	if DEBUG then
	table.insert(t, r:clock() - tnow)
	tnow = r:clock()
	end

	if JSON.encode_max_depth then
	-- TODO see #439
	JSON.encode_max_depth(1000)
	end
	listdata.max = maxresults
	listdata.using_wc = wc
	listdata.no_threads = #threads
	if not statsOnly and not emailsOnly then
	listdata.thread_struct = threads
	end
	listdata.firstYear = datespan.firstYear
	listdata.lastYear = datespan.lastYear
	listdata.firstMonth = datespan.firstMonth
	listdata.lastMonth = datespan.lastMonth
	listdata.list = listraw:gsub("^([^.]+)%.", "%1@"):gsub("[<>]+", "")
	listdata.emails = emls
	listdata.hits = #emls
	listdata.searchlist = listraw
	listdata.participants = top10
	listdata.cloud = cloud
	if DEBUG then
	listdata.took = r:clock() - START
	listdata.debug = t
	end
	listdata.numparts = allparts
	listdata.unixtime = os.time()

	r:puts(JSON.encode(listdata))

	return cross.OK
	end

	cross.start(handle)