add couch_collate

couch_collate is a nif replacing couch_drv and couch_ejson_compare
binaries to do binary collation using ICU.
diff --git a/c_src/couch_collate.c b/c_src/couch_collate.c
new file mode 100644
index 0000000..c5453d2
--- /dev/null
+++ b/c_src/couch_collate.c
@@ -0,0 +1,281 @@
+/*
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use
+this file except in compliance with the License. You may obtain a copy of the
+License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed
+under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+
+*/
+
+#ifdef DARWIN
+#define U_HIDE_DRAFT_API 1
+#define U_DISABLE_RENAMING 1
+#endif
+
+#include "erl_nif.h"
+#include "unicode/ucol.h"
+#include "unicode/ucasemap.h"
+#include <stdio.h>
+#include <assert.h>
+
+static ERL_NIF_TERM ATOM_TRUE;
+static ERL_NIF_TERM ATOM_FALSE;
+static ERL_NIF_TERM ATOM_NULL;
+
+typedef struct {
+    ErlNifEnv* env;
+    int error;
+    UCollator* coll;
+} ctx_t;
+
+typedef struct {
+    UCollator** collators;
+    int collStackTop;
+    int numCollators;
+    ErlNifMutex* collMutex;
+} priv_data_t;
+
+static ERL_NIF_TERM collate_nif(ErlNifEnv*, int, const ERL_NIF_TERM []);
+static int collate_binary(priv_data_t*, ctx_t*, ERL_NIF_TERM, ERL_NIF_TERM, ERL_NIF_TERM);
+static int on_load(ErlNifEnv*, void**, ERL_NIF_TERM);
+static void on_unload(ErlNifEnv*, void*);
+static __inline void reserve_coll(priv_data_t*, ctx_t*);
+static __inline void release_coll(priv_data_t*, ctx_t*);
+int on_reload(ErlNifEnv*, void**, ERL_NIF_TERM);
+int on_upgrade(ErlNifEnv*, void**, void**, ERL_NIF_TERM);
+
+void
+reserve_coll(priv_data_t* pData, ctx_t *ctx)
+{
+    if (ctx->coll == NULL) {
+        enif_mutex_lock(pData->collMutex);
+        assert(pData->collStackTop < pData->numCollators);
+        ctx->coll = pData->collators[pData->collStackTop];
+        pData->collStackTop += 1;
+        enif_mutex_unlock(pData->collMutex);
+    }
+}
+
+
+void
+release_coll(priv_data_t* pData, ctx_t *ctx)
+{
+    if (ctx->coll != NULL) {
+        enif_mutex_lock(pData->collMutex);
+        pData->collStackTop -= 1;
+        assert(pData->collStackTop >= 0);
+        enif_mutex_unlock(pData->collMutex);
+    }
+}
+
+/* ------------------------------------------------------------------------- */
+
+static ERL_NIF_TERM
+collate_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) {
+    ERL_NIF_TERM term_a          = argv[0];
+    ERL_NIF_TERM term_b          = argv[1];
+    ERL_NIF_TERM term_has_nocase = argv[2];
+
+    ctx_t ctx;
+    int result;
+    priv_data_t* pData;
+
+    ctx.env = env;
+    ctx.error = 0;
+    ctx.coll = NULL;
+
+    pData = (priv_data_t*) enif_priv_data(env);
+
+    result = collate_binary(pData, &ctx, term_a, term_b, term_has_nocase);
+    release_coll(pData, &ctx);
+
+    return enif_make_int(env, result);
+}
+
+int
+collate_binary(priv_data_t* pData, ctx_t* ctx, ERL_NIF_TERM term_a, ERL_NIF_TERM term_b, ERL_NIF_TERM term_has_nocase)
+{
+    ErlNifBinary binA, binB;
+    int has_nocase, response;
+
+    if(!enif_get_int(ctx->env, term_has_nocase, &has_nocase)) {
+        ctx->error = 1;
+        return 0;
+    }
+    if(!enif_inspect_binary(ctx->env, term_a, &binA)) {
+        ctx->error = 1;
+        return 0;
+    }
+    if(!enif_inspect_binary(ctx->env, term_b, &binB)) {
+        ctx->error = 1;
+        return 0;
+    }
+
+    switch(has_nocase) {
+    case 0: /* COLLATE */
+    case 1: /* COLLATE_NO_CASE: */
+        {
+        UErrorCode status = U_ZERO_ERROR;
+        UCharIterator iterA;
+        UCharIterator iterB;
+
+        uiter_setUTF8(&iterA, (const char *) binA.data, (uint32_t) binA.size);
+        uiter_setUTF8(&iterB, (const char *) binB.data, (uint32_t) binB.size);
+
+        /* grab a collator */
+        reserve_coll(pData, ctx);
+
+        if (has_nocase == 1) /* switching this collator to case insensitive */
+          ucol_setAttribute(ctx->coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
+
+        /* by default, it will collate case sensitive */
+        response = ucol_strcollIter(ctx->coll, &iterA, &iterB, &status);
+
+        if (has_nocase == 1) /* puting back this collator to case sensitive */
+          ucol_setAttribute(ctx->coll, UCOL_STRENGTH, UCOL_DEFAULT, &status);
+
+        break;
+        }
+
+    default:
+        response = -1;
+    }
+
+    return response;
+}
+
+/* ------------------------------------------------------------------------- */
+
+int
+on_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM info)
+{
+    UErrorCode status = U_ZERO_ERROR;
+    priv_data_t* pData = (priv_data_t*)enif_alloc(sizeof(priv_data_t));
+    int i, j;
+
+    /* Initialize the structure */
+    pData->collators = NULL;
+    pData->collStackTop = 0;
+    pData->numCollators = 0;
+    pData->collMutex = NULL;
+
+    if (!enif_get_int(env, info, &(pData->numCollators) )) {
+        enif_free((char*)pData);
+        return 1;
+    }
+
+    if (pData->numCollators < 1) {
+        enif_free((char*)pData);
+        return 2;
+    }
+
+    pData->collMutex = enif_mutex_create((char *)"coll_mutex");
+
+    if (pData->collMutex == NULL) {
+        enif_free((char*)pData);
+        return 3;
+    }
+
+    pData->collators = enif_alloc(sizeof(UCollator*) * pData->numCollators);
+
+    if (pData->collators == NULL) {
+        enif_mutex_destroy(pData->collMutex);
+        enif_free((char*)pData);
+        return 4;
+    }
+
+    for (i = 0; i < pData->numCollators; i++) {
+        pData->collators[i] = ucol_open("", &status);
+
+        if (U_FAILURE(status)) {
+            for (j = 0; j < i; j++) {
+                ucol_close(pData->collators[j]);
+            }
+
+            enif_free(pData->collators);
+            enif_mutex_destroy(pData->collMutex);
+
+            enif_free((char*)pData);
+
+            return 5;
+        }
+    }
+
+    ATOM_TRUE = enif_make_atom(env, "true");
+    ATOM_FALSE = enif_make_atom(env, "false");
+    ATOM_NULL = enif_make_atom(env, "null");
+
+    *priv_data = pData;
+
+    return 0;
+}
+
+
+void
+on_unload(ErlNifEnv* env, void* priv_data)
+{
+    priv_data_t* pData = (priv_data_t*)priv_data;
+    if (pData->collators != NULL) {
+        int i;
+
+        for (i = 0; i < pData->numCollators; i++) {
+            ucol_close(pData->collators[i]);
+        }
+
+        enif_free(pData->collators);
+    }
+
+    if (pData->collMutex != NULL) {
+        enif_mutex_destroy(pData->collMutex);
+    }
+
+    enif_free((char*)pData);
+}
+
+int
+on_reload(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM info)
+{
+    return 0;
+}
+
+int
+on_upgrade(ErlNifEnv* env, void** priv_data, void** old_data, ERL_NIF_TERM info)
+{
+    if (*old_data != NULL) {
+        priv_data_t* pData = (priv_data_t*)old_data;
+
+        if (pData->collators != NULL) {
+            int i;
+
+            for (i = 0; i < pData->numCollators; i++) {
+                ucol_close(pData->collators[i]);
+            }
+
+            enif_free(pData->collators);
+        }
+
+        if (pData->collMutex != NULL) {
+            enif_mutex_destroy(pData->collMutex);
+        }
+
+        enif_free((char*)pData);
+    }
+
+    return on_load(env, priv_data, info);
+}
+
+/* ------------------------------------------------------------------------- */
+
+static ErlNifFunc
+nif_funcs[] =
+{
+    {"collate_nif", 3, collate_nif}
+};
+
+ERL_NIF_INIT(couch_collate, nif_funcs, &on_load, &on_reload, &on_upgrade, &on_unload)
diff --git a/rebar.config b/rebar.config
new file mode 100644
index 0000000..7bfd49e
--- /dev/null
+++ b/rebar.config
@@ -0,0 +1,9 @@
+%% -*- tab-width: 4;erlang-indent-level: 4;indent-tabs-mode: nil -*-
+%% ex: ft=erlang ts=4 sw=4 et
+
+{erl_opts, [
+        warnings_as_errors,
+        warn_export_all
+]}.
+
+{pre_hooks, [{clean, "rm -fr ebin priv erl_crash.dump"}]}.
diff --git a/rebar.config.script b/rebar.config.script
new file mode 100644
index 0000000..ff5ef9b
--- /dev/null
+++ b/rebar.config.script
@@ -0,0 +1,46 @@
+%% -*- tab-width: 4;erlang-indent-level: 4;indent-tabs-mode: nil -*-
+%% ex: ft=erlang ts=4 sw=4 et
+
+%% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+%% use this file except in compliance with the License. You may obtain a copy of
+%% the License at
+%%
+%%   http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+%% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+%% License for the specific language governing permissions and limitations under
+%% the License.
+
+Arch = erlang:system_info(system_architecture),
+
+ICUConfig = fun(Args) ->
+    {0, Value} = eunit_lib:command("icu-config " ++ Args),
+    [C||C <- Value, C =/= $\n]
+end,
+
+GetFlag = fun(Name, Args) ->
+        case os:getenv(Name) of
+            false -> ICUConfig(Args);
+            Val -> Val
+        end
+    end,
+
+ICUCFLAGS = GetFlag("ICU_CFLAGS", "--cflags"),
+ICUCXXFLAGS = GetFlag("ICU_CXXFLAGS", "--cxxflags"),
+ICULDFLAGS = GetFlag("ICU_LDFLAGS", "--ldflags"),
+ICUINCPATH = GetFlag("ICU_INCPATH", "--cppflags-searchpath"),
+
+
+PortEnv = [{port_env, [
+            {"CFLAGS",  ICUCFLAGS ++  " $CFLAGS " ++ ICUINCPATH},
+            {"CXXFLAGS", ICUCXXFLAGS ++ " $CXXFLAGS " ++ ICUINCPATH},
+            {"LDFLAGS",  ICULDFLAGS ++ " $LDFLAGS"}]},
+
+           {port_specs, [
+            {filename:join(["priv", Arch, "couch_collate.so"]),
+            ["c_src/*.c"]}]}
+],
+
+lists:keymerge(1,lists:keysort(1, PortEnv), lists:keysort(1, CONFIG)).
diff --git a/src/couch_collate.app.src b/src/couch_collate.app.src
new file mode 100644
index 0000000..4541b67
--- /dev/null
+++ b/src/couch_collate.app.src
@@ -0,0 +1,13 @@
+%% -*- tab-width: 4;erlang-indent-level: 4;indent-tabs-mode: nil -*-
+%% ex: ft=erlang ts=4 sw=4 et
+
+{application, couch_collate,
+    [
+        {description, "couchdb collation module"},
+        {vsn, "0.1.0"},
+        {registered, []},
+        {applications, [kernel,
+                        stdlib]},
+        {included_applications, []},
+        {env, []}
+]}.
diff --git a/src/couch_collate.erl b/src/couch_collate.erl
new file mode 100644
index 0000000..341510f
--- /dev/null
+++ b/src/couch_collate.erl
@@ -0,0 +1,62 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_collate).
+
+-export([init/0]).
+-export([collate/2, collate/3]).
+
+-on_load(init/0).
+
+-type collate_options() :: [nocase].
+-export_type([collate_options/0]).
+
+init() ->
+    PrivDir = case code:priv_dir(?MODULE) of
+        {error, _} ->
+            EbinDir = filename:dirname(code:which(?MODULE)),
+            AppPath = filename:dirname(EbinDir),
+            filename:join(AppPath, "priv");
+        Path ->
+            Path
+    end,
+    NumScheds = erlang:system_info(schedulers),
+    Arch = erlang:system_info(system_architecture),
+    (catch erlang:load_nif(filename:join([PrivDir, Arch, ?MODULE]),
+                           NumScheds)),
+    case erlang:system_info(otp_release) of
+        "R13B03" -> true;
+        _ -> ok
+    end.
+
+%% @doc compare 2 string, result is -1 for lt, 0 for eq and 1 for gt.
+-spec collate(binary(), binary()) -> 0 | -1 | 1.
+collate(A, B) ->
+    collate(A, B, []).
+
+-spec collate(binary(), binary(), collate_options()) -> 0 | -1 | 1.
+collate(A, B, Options) when is_binary(A), is_binary(B) ->
+    HasNoCase = case lists:member(nocase, Options) of
+        true -> 1; % Case insensitive
+        false -> 0 % Case sensitive
+    end,
+    do_collate(A, B, HasNoCase).
+
+%% @private
+
+do_collate(BinaryA, BinaryB, 0) ->
+    collate_nif(BinaryA, BinaryB, 0);
+do_collate(BinaryA, BinaryB, 1) ->
+    collate_nif(BinaryA, BinaryB, 1).
+
+collate_nif(_BinaryA, _BinaryB, _HasCase) ->
+    exit(couch_collate_not_loaded).