/** \file test_casserializer.cpp .

 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.

-------------------------------------------------------------------------- */


// define this first to get your application name added to command line
// used inside "cmdline_driver_args.h"
#define MAIN_TITLE            _TEXT("UIMA Test CAS Serializer")


/* ----------------------------------------------------------------------- */
/*       Include dependencies                                              */
/* ----------------------------------------------------------------------- */

#include "uima/pragmas.hpp" //must be first include to surpress warnings
#include "uima/api.hpp"
#include "uima/internal_casimpl.hpp"
#include "uima/casdefinition.hpp"
#include "uima/internal_casserializer.hpp"
#include "uima/internal_casdeserializer.hpp"
#include <sys/stat.h>

using namespace uima;
/* ----------------------------------------------------------------------- */
/*       Constants                                                         */
/* ----------------------------------------------------------------------- */

#ifndef NDEBUG
#define ASSERT_OR_THROWEXCEPTION(x) assert(x)
#else
#define ASSERT_OR_THROWEXCEPTION(x) if (!(x)) { cerr << __FILE__ << ": Error in line " << __LINE__ << endl; exit(1); }
#endif
#define LOG(x) cout << __FILE__ << __LINE__ << ": " << x << endl
/* ----------------------------------------------------------------------- */
/*      UTILS                                                  */
/* ----------------------------------------------------------------------- */
void displayException(util::ConsoleUI & console, Exception & crclException)
/* ----------------------------------------------------------------------- */
{
  console.formatHeader(_TEXT("Exception"));
  console.format(_TEXT("Exception error id"), crclException.getErrorInfo().getErrorId());
  console.format(_TEXT("Exception name"), crclException.getName());
  console.format(_TEXT("Exception what"), crclException.what());
  console.format(_TEXT("Exception message"), crclException.getErrorInfo().getMessage().asString().c_str());
  console.formatBool(_TEXT("Exception recoverable"), crclException.getErrorInfo().isRecoverable());
  const TCHAR * cpszSavePrefix = ErrorInfo::getGlobalErrorInfoIndent();
  ErrorInfo::setGlobalErrorInfoIndent("  ");
  console.getOutputStream() << crclException.getErrorInfo() << endl;
  ErrorInfo::setGlobalErrorInfoIndent(cpszSavePrefix);
}


template<class T>
bool equalVectors(vector<T> const & crV1, vector<T> const & crV2, util::ConsoleUI * pConsole) {
  bool bResult = true;
  if ( crV1.size() != crV2.size() ) {
    pConsole->getOutputStream()  << "size 1: " << crV1.size() << ", size 2: " << crV2.size() << endl;
    return false;
  }

  size_t i;
  for (i=0; i<crV1.size(); ++i) {
    bool bIsEqual = (crV1[i] == crV2[i]);

    if (!bIsEqual) {
      bResult = false;
    }
  }

  if (!bResult) {
    size_t j;
    for (j=0; j<crV1.size(); ++j) {
      if (crV1[j] != crV2[j]) {
        pConsole->getOutputStream() << j << ": " << crV1[j] << "         " << crV2[j] << endl;
      }
    }
  }
  return bResult;
}

/* ----------------------------------------------------------------------- */
/*       Tests                                                             */
/* ----------------------------------------------------------------------- */

void testSerializeDefinitions(util::ConsoleUI * pConsole) {
  uima::internal::CASDefinition * iv_newCASDefinition;
  uima::internal::SerializedCAS iv_serializedCAS;
  uima::Timer iv_serializeTimerDefinitions;

  /* create engine */
  ErrorInfo errInfo;
  UnicodeString filename("toktest.xml");
  UnicodeString fn = ResourceManager::resolveFilename(filename, filename);

  uima::TextAnalysisEngine * pEngine = TextAnalysisEngine::createTextAnalysisEngine
                                       (UnicodeStringRef(fn).asUTF8().c_str(), errInfo);
  if (pEngine == NULL ) {
    LOG("Error: " << errInfo.asString());
    ASSERT_OR_THROWEXCEPTION(false);
  }
  ASSERT_OR_THROWEXCEPTION(EXISTS(pEngine));
  ASSERT_OR_THROWEXCEPTION( errInfo.getErrorId() == UIMA_ERR_NONE );

  iv_newCASDefinition = uima::internal::CASDefinition::createCASDefinition(pEngine->getAnnotatorContext());

  pConsole->info("Serializing Type System and Index definitions CAS");
  uima::internal::CASSerializer serializer(true);
  iv_serializedCAS.reset();

  // serialize type system and index definition

  uima::internal::CASDeserializer deserializer;

  iv_serializeTimerDefinitions.reset();
  iv_serializeTimerDefinitions.start();
  serializer.serializeDefinitions(*iv_newCASDefinition, iv_serializedCAS);
  iv_serializeTimerDefinitions.stop();
#ifndef UIMA_SUPPRESS_TIMING
  pConsole->format("  Serialization of Definitions", uima::Timer::timeString( iv_serializeTimerDefinitions.getAccumulatedTime() ).c_str() );
#endif

  iv_serializeTimerDefinitions.reset();
  iv_serializeTimerDefinitions.start();
  deserializer.deserializeDefinitions(iv_serializedCAS , *iv_newCASDefinition );
  iv_serializeTimerDefinitions.stop();
#ifndef UIMA_SUPPRESS_TIMING
  pConsole->format("  Deserialization of Definitions", uima::Timer::timeString( iv_serializeTimerDefinitions.getAccumulatedTime() ).c_str() );
#endif


  // serializing again
  uima::internal::SerializedCAS serializedCAS2;

  serializer.serializeDefinitions(*iv_newCASDefinition, serializedCAS2);

  // checking
  pConsole->format("Checking", "type symbol table");
  ASSERT_OR_THROWEXCEPTION( equalVectors( iv_serializedCAS.getTypeSymbolTable(), serializedCAS2.getTypeSymbolTable(), pConsole ));
  pConsole->format("   success", true);

  pConsole->format("Checking", "type inheritance table");
  ASSERT_OR_THROWEXCEPTION( equalVectors( iv_serializedCAS.getTypeInheritanceTable(), serializedCAS2.getTypeInheritanceTable(), pConsole));
  pConsole->format("   success", true);

  pConsole->format("Checking", "feature symbol table");
  ASSERT_OR_THROWEXCEPTION( equalVectors( iv_serializedCAS.getFeatureSymbolTable(), serializedCAS2.getFeatureSymbolTable(), pConsole));
  pConsole->format("   success", true);

  pConsole->format("Checking", "feature offset table");
  ASSERT_OR_THROWEXCEPTION( equalVectors( iv_serializedCAS.getFeatureOffsetTable(), serializedCAS2.getFeatureOffsetTable(), pConsole));
  pConsole->format("   success", true);

  pConsole->format("Checking", "feature def table");
  ASSERT_OR_THROWEXCEPTION( equalVectors( iv_serializedCAS.getFeatureDefinitionTable(), serializedCAS2.getFeatureDefinitionTable(), pConsole));
  pConsole->format("   success", true);

  pConsole->format("Checking", "type priorities");
  ASSERT_OR_THROWEXCEPTION( equalVectors( iv_serializedCAS.getTypePriorityTable(), serializedCAS2.getTypePriorityTable(), pConsole));
  pConsole->format("   success", true);

  pConsole->format("Checking", "index IDs");
  ASSERT_OR_THROWEXCEPTION( equalVectors( iv_serializedCAS.getIndexIDTable(), serializedCAS2.getIndexIDTable(), pConsole ));
  pConsole->format("   success", true);

  pConsole->format("Checking", "index kinds");
  ASSERT_OR_THROWEXCEPTION( equalVectors(  iv_serializedCAS.getIndexKindTable(), serializedCAS2.getIndexKindTable(), pConsole) );
  pConsole->format("   success", true);

  pConsole->format("Checking", "comparator start");
  ASSERT_OR_THROWEXCEPTION( equalVectors( iv_serializedCAS.getComparatorStartTable(), serializedCAS2.getComparatorStartTable(), pConsole) );
  pConsole->format("   success", true);

  pConsole->format("Checking", "comparator def");
  ASSERT_OR_THROWEXCEPTION( equalVectors( iv_serializedCAS.getComparatorDefinitionTable(), serializedCAS2.getComparatorDefinitionTable(), pConsole) );
  pConsole->format("   success", true);

  delete iv_newCASDefinition;
  delete pEngine;

}

void testSerializeData(util::ConsoleUI * pConsole ) {
  pConsole->info("Serializing Document Data CAS");
  uima::Timer iv_serializeTimerData;

  ErrorInfo errInfo;
  UnicodeString filename("toktest.xml");
  UnicodeString fn = ResourceManager::resolveFilename(filename, filename);

  uima::TextAnalysisEngine * pEngine =
    TextAnalysisEngine::createTextAnalysisEngine(UnicodeStringRef(fn).asUTF8().c_str(), errInfo);
  if (pEngine == NULL ) {
    LOG("Error: " << errInfo.asString());
    ASSERT_OR_THROWEXCEPTION(false);
  }
  ASSERT_OR_THROWEXCEPTION(EXISTS(pEngine));
  ASSERT_OR_THROWEXCEPTION( errInfo.getErrorId() == UIMA_ERR_NONE );

  /* read in a file */
  UnicodeString dataFile("tdoc_001_en_850.asc");
  UnicodeString datafn = ResourceManager::resolveFilename(dataFile, dataFile);
  std::string dataFilename = UnicodeStringRef(datafn).asUTF8();
  /* open file for read */
  FILE * pFile = fopen( dataFilename.c_str(),"rb");
  ASSERT_OR_THROWEXCEPTION(pFile != NULL );

  /* allocate buffer for file contents */
  struct stat stat_result;
  stat(dataFilename.c_str(), &stat_result);
  int filesize = stat_result.st_size;
  char * pBuffer = new char[filesize+1];
  ASSERT_OR_THROWEXCEPTION(pBuffer != NULL );

  /* read the file */
  size_t numread = fread(pBuffer,1,filesize,pFile);
  fclose(pFile);

  /* convert to unicode and set tcas document text*/
  UnicodeString ustrInputText(pBuffer, (int32_t)numread, "utf-8");
  delete[] pBuffer;

  /* set TCAS Document text */
  CAS * tcas = pEngine->newCAS();
  ASSERT_OR_THROWEXCEPTION( EXISTS(tcas) );

  tcas->setDocumentText(ustrInputText.getBuffer(), ustrInputText.length(), true);
  tcas->getDocumentAnnotation().setLanguage("en");

  /* call process */
  TyErrorId err = pEngine->process(*tcas);
  ASSERT_OR_THROWEXCEPTION( err == UIMA_ERR_NONE );

  uima::internal::CASImpl & cas = uima::internal::CASImpl::promoteCAS( *tcas);
  uima::internal::CASSerializer serializer(true);

  uima::internal::SerializedCAS iv_serializedCAS;
  iv_serializedCAS.reset();

  iv_serializeTimerData.reset();
  iv_serializeTimerData.start();
  serializer.serializeData(cas, iv_serializedCAS);
  iv_serializeTimerData.stop();
#ifndef UIMA_SUPPRESS_TIMING
  pConsole->format("  Serialization per-document data", uima::Timer::timeString( iv_serializeTimerData.getAccumulatedTime() ).c_str() );
#endif

  iv_serializeTimerData.reset();
  iv_serializeTimerData.start();
  size_t blobsz = serializer.getBlobSize(cas);
  char* blob = new char[blobsz];
  blobsz = serializer.getBlob(cas, blob, blobsz);
  iv_serializeTimerData.stop();
#ifndef UIMA_SUPPRESS_TIMING
  pConsole->format("  Blob serialization per-document data", uima::Timer::timeString( iv_serializeTimerData.getAccumulatedTime() ).c_str() );
#endif

  pConsole->formatHeader("CAS Serialization Results:");
  pConsole->format("  FS Heap size", (unsigned long)iv_serializedCAS.getFSHeapArray().size() );
  pConsole->format("  String Heap size", (unsigned long)iv_serializedCAS.getStringSymbolTable().size() );
  pConsole->format("  Number of Indexed FSs", (unsigned long)iv_serializedCAS.getIndexedFSs().size() );
  pConsole->format("  Blob size:", (unsigned long)blobsz);


//      iv_serializedCAS.print(cout);

  pConsole->info("Deserializing CAS");

//ee      uima::internal::TCASImpl * newTCAS = uima::internal::TCASImpl::createTCASImpl( *iv_newCASDefinition, getEngine().getAnnotatorContext() );
  uima::internal::CASImpl * newTCAS = &uima::internal::CASImpl::promoteCAS( *(pEngine->newCAS()) );

  uima::internal::CASDeserializer deserializer;

  /*
  iv_serializeTimerFSAndStringHeap.reset();
  iv_serializeTimerFSAndStringHeap.start();
  deserializer.deserializeFSHeapAndStringTable(iv_serializedCAS, *iv_newTCAS);
  iv_serializeTimerFSAndStringHeap.stop();
  getConsole().format("  Deserialization of FS/String Heap", uima::Timer::timeString( iv_serializeTimerFSAndStringHeap.getAccumulatedTime() ).c_str() );
  */

  iv_serializeTimerData.reset();
  iv_serializeTimerData.start();
  deserializer.deserializeData(iv_serializedCAS, *newTCAS);
  iv_serializeTimerData.stop();
#ifndef UIMA_SUPPRESS_TIMING
  getConsole().format("  Deserialization document data", uima::Timer::timeString( iv_serializeTimerData.getAccumulatedTime() ).c_str() );
#endif

  // deserialize the blob on top for a quick test
  iv_serializeTimerData.reset();
  iv_serializeTimerData.start();
  deserializer.deserializeBlob(blob, *newTCAS);
  iv_serializeTimerData.stop();
#ifndef UIMA_SUPPRESS_TIMING
  pConsole->format("  Deserialization blob", uima::Timer::timeString( iv_serializeTimerData.getAccumulatedTime() ).c_str() );
#endif

//      deserializer.deserializeDocument(iv_serializedCAS, *iv_newTCAS);

//      iv_newTCAS->getFSSystem().getLowlevelFSHeap().print(cout);
//      cout << "DocID: " << newTCAS.getDocumentAnnotation().getID() << endl;

//      cout << "========SERIALIZE AGAIN======" << endl;
  pConsole->info("Serializing CAS again");
  uima::internal::SerializedCAS serializedCAS2;
  uima::internal::CASSerializer serializer2(true);

  /*
  serializer2.serializeDocument(*iv_newTCAS, serializedCAS2);
  serializer2.serializeFSHeapAndStringHeap(*iv_newTCAS, serializedCAS2);
  serializer2.serializeIndexedFSs(*iv_newTCAS, serializedCAS2);
  */

  serializer2.serializeData(*newTCAS, serializedCAS2);

//    serCas2.print(cout);

  pConsole->info("Checking results");
  pConsole->format("Checking", "fs heap");
  ASSERT_OR_THROWEXCEPTION(equalVectors( iv_serializedCAS.getFSHeapArray(), serializedCAS2.getFSHeapArray(), pConsole ) );
  pConsole->format("   success", true);

  pConsole->format("Checking", "string symbol table");
  ASSERT_OR_THROWEXCEPTION( equalVectors( iv_serializedCAS.getStringSymbolTable(), serializedCAS2.getStringSymbolTable(), pConsole ) );
  pConsole->format("   success", true);

  pConsole->format("Checking", "document");
  ASSERT_OR_THROWEXCEPTION( iv_serializedCAS.getDocument() == serializedCAS2.getDocument() );
  pConsole->format("   success", true);

  pConsole->format("Checking", "indexed FSs");
  ASSERT_OR_THROWEXCEPTION( equalVectors( iv_serializedCAS.getIndexedFSs(), serializedCAS2.getIndexedFSs(), pConsole ));
  pConsole->format("   success", true);

  pConsole->info("Done");
  delete tcas;
  delete newTCAS;
  delete pEngine;
  delete blob;
}

/* ----------------------------------------------------------------------- */
/*       Main routine                                                      */
/* ----------------------------------------------------------------------- */

int main(int argc, char * argv[]) /*
---------------------------------- */
{
  /* create console */
  util::ConsoleUI * pConsole = new util::ConsoleUI(argc, argv, MAIN_TITLE, "\n");
  assert(EXISTS(pConsole));
  #if !defined(NDEBUG) && defined(_MSC_VER)
   //int iRetVal = _CrtSetBreakAlloc(124613);
  #endif
  /* create a UIMA resource */
  try {
    ResourceManager::createInstance(MAIN_TITLE);
    testSerializeDefinitions(pConsole);
    testSerializeData(pConsole);
    ResourceManager::deleteInstance();
  } catch (Exception & rclException) {
    displayException(*pConsole, rclException);
    pConsole->error("Unexpected UIMA exception");
    return 1;
  } catch (exception & rclException) {
    pConsole->error(rclException.what());
    return 1;
  }
  delete pConsole;
  return(0);

}

/* <EOF> */



