core/sql/common/str.cpp - trafodion - Git at Google

 /**********************************************************************
 // @@@ START COPYRIGHT @@@
 //
 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
 //
 // @@@ END COPYRIGHT @@@
 **********************************************************************/
 /* -*-C++-*-
  *****************************************************************************
  *
  * File:         str.cpp
  * Description:
  *
  * Language:     C++
  *
  *
  *****************************************************************************
  */

 // -----------------------------------------------------------------------

 #include "Platform.h"
 #include "NABoolean.h"
 #include "str.h"
 #include "NAStdlib.h"
 #include "NAAssert.h"
 #include "BaseTypes.h"
 #include "Int64.h"
 #include "NAString.h"

 #include <stdarg.h>

 #include "ComResWords.h"

 /*
  ******************************************************************
  * Helper functions for ISO 8859_1 (8-bit european) alphabet processing
  *
  */

 Int32 isUpper8859_1(NAWchar c)
 {
   if ((c >= 'A') && (c <= 'Z'))
     return TRUE;

   if ((c >= 0xc0) && (c <= 0xde))  // between cap A with grave accent
   {                                // and cap icelandic letter thorn
     if (c == 0xd7)     // but not multiplication symbol
        return FALSE;
     else
        return TRUE;
   }

   return FALSE;
 }


 Int32 isLower8859_1(NAWchar c)
 {
   if ((c >= 'a') && (c <= 'z'))
     return TRUE;

   if ((c >= 0xdf) && (c <= 0xff))  // between lower german sharp S
   {                                // and lower y with diaeresis
     if (c == 0xf7)     // but not division symbol
        return FALSE;
     else
        return TRUE;
   }

   return FALSE;
 }


 Int32 isAlpha8859_1(NAWchar c)
 {
   if (((c >= 'a') && (c <= 'z')) ||
       ((c >= 'A') && (c <= 'Z')))   //North american english alphabetic
   {
     return TRUE;
   }

   if ((c >= 0xc0) && (c <= 0xff))   // possible european letter
   {
     if ((c == 0xd7) || (c == 0xf7))  // multiple or divide sign
       return FALSE;
     else
       return TRUE;
   }

   return FALSE;
 }

 Int32 isHexDigit8859_1(NAWchar c)
 {
    return (isDigit8859_1(c) || ('A' <= c AND c <= 'F' ) || ( 'a' <= c AND c <= 'f'));
 }

 Int32 isAlNum8859_1(NAWchar c)
 {
   return (isAlpha8859_1(c) || isDigit8859_1(c));
 }

 Int32 isSpace8859_1(NAWchar c)
 {
   if (((c >= 0x09) && (c <= 0x0d))  ||
       (c == 0x20))
     return TRUE;

   return FALSE;
 }

 Int32 isDigit8859_1(NAWchar c) // ISO 8859-1 char set safe isdigit routine
 {
   if ((c >= '0') && (c <= '9'))
     return TRUE;
   return FALSE;
 }

 Int32 isCaseInsensitive8859_1(NAWchar c) // ISO 8859-1 char for which there is no
                                // upcase equivalent.  hex values 0xDF & 0xFF
 {
   if ((c==0xDF) || (c==0xFF))
     return TRUE;

   return FALSE;
 }


 // Dummy routine to ensure that str_cpy_all gets inlined.  Once
 // the compiler is fixed to inline routines with calls to assert,
 // Remove callAssert() in callers and replace with direct call to
 // assert.
 void callAssert(const char* tgt, const char* src, Lng32 length) {
   assert((tgt && src) || !length);
 }

 Int32 str_cmp_ne(const char *left, const char *right)
 {
   if (!left) return right ? -3 : 0;	// -3 = not equal, 0 = eq (both NULL)
   if (!right) return +3;		// +3 = not equal
   Int32 len1 = str_len(left);
   Int32 len2 = str_len(right);
   if (len1 != len2) return 2;		// 2 = not equal
   return str_cmp(left, right, len1);	// 0 = equal, neg/pos = not equal
 }

 Int32 str_cpy(char *tgt, const char *src, Int32 tgtlen, char padchar)
 {
   assert((tgt && src) || !tgtlen);

   Int32 copy_len = 0;
   while ((copy_len < tgtlen) && (src[copy_len] != 0))
     copy_len++;

   str_cpy_all(tgt, src, copy_len);

   if (tgtlen > copy_len)
     str_pad(&tgt[copy_len],
 	    (tgtlen - copy_len),
 	    padchar);

   return 0;
 }

 Int32
 byte_str_cpy(char *tgt, Int32 tgtlen, const char *src, Int32 srclen, char padchar)
 {
   assert((tgt && src) || !tgtlen);

   Int32 copy_len;
   if ( srclen < tgtlen )
     copy_len = srclen;
   else
     copy_len = tgtlen;

   str_cpy_all(tgt, src, copy_len);

   if (tgtlen > copy_len)
     str_pad(&tgt[copy_len],
 	    (tgtlen - copy_len),
 	    padchar);

   return 0;
 }

 Int32 str_cat(const char *first, const char *second, char *result)
 {
   assert(first && second && result);

   Int32 firstlen = str_len(first);
   Int32 secondlen = str_len(second);

   if(result != first)
     str_cpy_all(result,first,firstlen);
   str_cpy_all(&result[firstlen],second,secondlen);
   result[firstlen+secondlen] = 0;
   return 0;
 }

 char *str_itoa(ULng32 i, char *outstr)
 {
   assert(outstr);

   if (i == 0)
     {
       outstr[0] = '0';
       outstr[1] = 0;
     }
   else
     {
       short j = 0;
       ULng32 temp = i;

       // check how many digits there are in the output string
       while (temp > 0)
 	{
 	  temp = temp / 10;
 	  j++;
 	}

       // set the NULL byte at the end of the string
       outstr[j--] = 0;

       // produce the ASCII digits right to left
       temp = i;
       while (temp > 0)
 	{
 	  outstr[j--] = '0' + (char) (temp%10);
 	  temp = temp / 10;
 	}
     }

   return outstr;
 }

 char *str_ltoa(Int64 i, char *outstr)
 {
   assert(outstr);

   Int64 ii = i;
   NABoolean neg = FALSE;
   if (i < 0)
     {
       ii = -i;
       neg = TRUE;
     }

   if (ii == 0)
     {
       outstr[0] = '0';
       outstr[1] = 0;
     }
   else
     {
       short j = 0;
       Int64 temp = ii;

       // check how many digits there are in the output string
       while (temp > 0)
 	{
 	  temp = temp / 10;
 	  j++;
 	}

       if (neg)
 	j++;

       // set the NULL byte at the end of the string
       outstr[j--] = 0;

       // produce the ASCII digits right to left
       temp = ii;
       while (temp > 0)
 	{
 	  outstr[j--] = '0' + (char) (temp%10);
 	  temp = temp / 10;
 	}
       if (neg)
 	outstr[0] = '-';
     }

   return outstr;
 }

 Int64 str_atoi(const char * instr, Lng32 instrLen)
 {
   assert(instr);

   Int64 v = 0;

   Int32 i = 0;
   // skip leading blanks
   while ((i < instrLen) && (instr[i] == ' '))
     i++;

   if (i == instrLen)
     return -1;

   //  for (Int32 i = 0; i < instrLen; i++)
   while (i < instrLen)
     {
       if ((instr[i] >= '0') &&
 	  (instr[i] <= '9'))
 	{
 	  v = v*10 + (instr[i] - '0');
 	}
       else if (instr[i] == ' ')
 	{
 	  // skip trailing blanks
 	  while ((i < instrLen) && (instr[i] == ' '))
 	    i++;

 	  // error, not trailing blanks
 	  if (i < instrLen)
 	    return -1;
 	}
       else
 	{
 	  // error
 	  return -1;
 	}
       i++;
     }

   return v;
 }

 // convert a scaled exact numeric string and return as float.
 double str_ftoi(const char * instr, Lng32 instrLen)
 {
   assert(instr);

   double v = 0;

   Int32 i = 0;

   // look for decimal point
   while ((i < instrLen) && (instr[i] != '.'))
     i++;

   if (i == instrLen)
     {
       // not a scaled number.
       v = (double)str_atoi(instr, instrLen);
     }
   else
     {
       // found decimal point at 'i'

       // extract the mantissa
       Int64 m = 0;
       if (i > 0)
 	{
 	  m = str_atoi(instr, i);
 	  if (m < 0)
 	    return -1;
 	}

       // extract the fraction
       Int64 f;
       Lng32 scaleLen = instrLen - (i + 1);
       f = str_atoi(&instr[i+1], scaleLen);
       if (f < 0)
 	return -1;

       v = (double)m;
       Int64 tf = f;
       Int64 tens = 10;
       while (tf > 0)
 	{
 	  tf = tf / 10;
 	  tens = tens * 10;
 	}
       v = (v*tens + f) / tens;
     }

   return v;

 }

 Int32 mem_cpy_all(void *tgt, const void *src, Lng32 length)
 {
   memmove(tgt, src, length);
   return 0;
 }

 void str_memmove(char *tgt, const char *src, Lng32 length)
 {
   assert((tgt && src) || !length);
   memmove(tgt, src, length);
 }


 // copies src to tgt for length bytes.
 // Removes trailing blanks and puts the end_char.
 Int32 str_cpy_and_null(char * tgt,
 		       const char * src,
 		       Lng32 length,
 		       char end_char,
 		       char blank_char,
 		       NABoolean nullTerminate)
 {
 assert((tgt && src) || !length);

   Lng32 i = 0;
   for (; i < length; i++)
     {
       tgt[i] = src[i];
     }

   i = length-1;
   while ((i > 0) && (tgt[i] == blank_char))
     i--;

   if (i < length-1)
     tgt[i+1] = end_char;
   else if (nullTerminate)
     tgt[i+1] = end_char;

   return 0;
 }

 // ---------------------------------------------------------------
 // copies src to tgt for length bytes and upshifts, if upshift <> 0,
 // else downshifts.
 // Src and Tgt may point to the same location.
 // ---------------------------------------------------------------
 Int32 str_cpy_convert(char * tgt, char * src,
 		    Lng32 length, Int32 upshift)
 {
   assert((tgt && src) || !length);

    for (Lng32 i = 0; i < length; i++)
     {
       if (upshift)
 	tgt[i] = TOUPPER(src[i]);

       if (!upshift)
 	tgt[i] = TOLOWER(src[i]);
     }

    return 0;
 }

 Int32 str_len(const char * s)
 {
   Int32 i = 0;

   while (s[i] != 0) i++;

   return i;
 }


 Int32 str_inc(const ULng32 length, char * s)
 {
   unsigned char * s_ = (unsigned char *)s;
   ULng32 i;
   Int32 carry = 1;
   for (i = length; i > 0 && carry; i--)
     {
       if (s_[i-1] == 255)
         {
           s_[i-1] = 0;
         }
       else
         {
           s_[i-1]++;
           carry = 0;
         }
     }
   //  If final carry is not zero, report failure.
   if (carry)
     {
       return 1;
     }
   return 0;
 }

 void str_complement(const ULng32 length, char * s)
 {
   for (ULng32 i = 0; i < length; i++)
     s[i] = ~(s[i]);
 }

 // ----------------------------------------------------------------------
 // How many bytes are needed to encode byteLen bytes in ASCII?
 // -----------------------------------------------------------------------
 Lng32 str_encoded_len(Lng32 byteLen)
 {
   // As mentioned below, we always make groups of 4 characters for
   // 3 input bytes and add extra bytes as needed for odd lots

   Lng32 threes = byteLen / 3;

   switch (byteLen % 3)
     {
     case 0:
       // encoding is threes*4 groups of 4 chars with 3 bytes in them
       return threes*4;
     case 1:
       // with one extra byte add two more characters (containing 6+2 bits)
       return threes*4+2;
     case 2:
       // with two extra bytes add three more characters (6+6+4 bits)
       return threes*4+3;
     }
   assert(0);
   return 0; // should be hard to get here but compiler doesn't know that
 }

 // -----------------------------------------------------------------------
 // encode the source buffer (may contain embedded NULLs, not NULL
 // terminated) into printable ASCII characters and return the length
 // of the encoded string
 // -----------------------------------------------------------------------
 Lng32 str_encode(char *tgt, Lng32 tgtMaxLen, void *src, Lng32 srcLen)
 {
   // We expand every 6 bits to 8 bits.  Bias the 8-bit value by 32
   // (ASCII blank) to turn it into a printable char value.  This in
   // effect converts every 3 bytes to 4 bytes. This routine was
   // formerly called CatRWAccessPath::explodeKey().

   // start character for encoding (a range of 64 chars is used)
   // NOTE: this char is also defined in str_decode below!!!
   const char minChar = '!';

   const unsigned char * key_in = (const unsigned char *)src;
   unsigned char * key_out = (unsigned char *) tgt;
   Lng32 length = str_encoded_len(srcLen);

   assert(tgtMaxLen >= length);

   Lng32 srcix = 0;
   Lng32 tgtix = 0;

   // move in groups of 3 source bytes and 4 target characters
   while (srcix < srcLen)
     {
       // high-order 6 bits of input byte 0 go into output char 0
       key_out[tgtix]   = (key_in[srcix] >> 2) + minChar;
       // low-order 2 bits of input byte 0 go into output char 1
       key_out[tgtix+1] = ((key_in[srcix] & 0x3) << 4) + minChar;

       if (srcix+1 < srcLen)
 	{
 	  // add high-order 4 bits of input byte 1 to output char 1
 	  key_out[tgtix+1] += key_in[srcix+1] >> 4;
 	  // low-order 4 bits of input byte 1 go to output char 2
 	  key_out[tgtix+2]  = ((key_in[srcix+1] & 0xf) << 2) + minChar;
 	}

       if (srcix+2 < srcLen)
 	{
 	  // add high-order 2 bits of input byte 2 to output char 2
 	  key_out[tgtix+2] += key_in[srcix+2] >> 6;
 	  // low-order 6 bits of input byte 2 go to output char 3
 	  key_out[tgtix+3]  = (key_in[srcix+2] & 0x3f) + minChar;
 	}

       srcix += 3;
       tgtix += 4;
     }
   return length;
 }

 // -----------------------------------------------------------------------
 // compute how many bytes are encoded in an ASCII string of length
 // charLen, assuming it was created by str_encode
 // -----------------------------------------------------------------------
 Lng32 str_decoded_len(Lng32 charLen)
 {
   // find out how many groups of 4 chars and how many extra chars
   Lng32 fours = charLen / 4;

   switch (charLen % 4)
     {
     case 0:
       // an even number of four groups, return 3 times as many bytes
       return fours * 3;
     case 1:
       // this length cannot have been generated by str_encoded_len!!
       assert(0);
     case 2:
       // one extra byte in two extra characters
       return fours*3+1;
     case 3:
       // two extra bytes in the three extra characters
       return fours*3+2;
     }
   assert(0);
   return 0;
 }

 // -----------------------------------------------------------------------
 // the inverse of str_encode
 // -----------------------------------------------------------------------
 Lng32 str_decode(void *tgt, Lng32 tgtMaxLen, const char *src, Lng32 srcLen)
 {
   // start character for encoding (a range of 64 chars is used)
   // NOTE: this char is also defined in str_encode above!!!
   const char minChar = '!';

   unsigned char *target = (unsigned char *) tgt;
   unsigned char *src1   = (unsigned char *) src;

   Lng32 length = str_decoded_len(srcLen);

   //  assert(tgtMaxLen >= length);
   if (NOT (tgtMaxLen >= length))
     return -1;

   Lng32 srcix = 0;
   Lng32 tgtix = 0;

   // move in groups of 4 source chars, at this point we have 0, 2, 3, or >3
   // characters left in the source
   while (srcix+1 < srcLen)
     {
       // first byte comes from 6 bits of first char and 2 bits of second char
       if (NOT (src1[srcix]-minChar < 64 && src1[srcix+1]-minChar < 64))
         return -1;

       //      assert(src1[srcix]-minChar < 64 && src1[srcix+1]-minChar < 64);
       target[tgtix]  = (src1[srcix]-minChar) << 2;
       target[tgtix] |= (src1[srcix+1]-minChar) >> 4;

       if (srcix+2 < srcLen)
 	{
           // second byte gets 4 bits from second and 4 bits from third char
           if (NOT (src1[srcix+2]-minChar < 64))
             return -1;
           //          assert(src1[srcix+2]-minChar < 64);
           target[tgtix+1]  = ((src1[srcix+1]-minChar) & 0xf) << 4;
 	  target[tgtix+1] |= (src1[srcix+2]-minChar) >> 2;
 	}

       if (srcix+3 < srcLen)
 	{
 	  // third byte gets 2 bits from third and 6 bits from fourth char
           if (NOT (src1[srcix+3]-minChar < 64))
             return -1;
           //          assert(src1[srcix+3]-minChar < 64);
           target[tgtix+2]  = ((src1[srcix+2]-minChar) & 0x3) << 6;
 	  target[tgtix+2] |= ((src1[srcix+3]-minChar) & 0x3f);
 	}

       srcix += 4;
       tgtix += 3;
     }

   return length;
 }

 // Strips leading and/or trailing blanks. src will contain a NULL after the
 // end of the first non-blank character.The length of the "stripped" string
 // is returned in len.
 // Returns pointer to the start of string after leading blanks.
 char * str_strip_blanks(char *src , Lng32 &len,
                         NABoolean stripLeading,
                         NABoolean stripTrailing
                         )
 {
   if (! src)
     return NULL;

   len = str_len(src);
   if (len == 0) // empty
     return src;

   if (stripTrailing)
     {
       len--;
       while ((len >= 0) && (src[len] == ' '))
         len--;

       len++;
       src[len] = 0;
     }

   Lng32 i = 0;
   if (stripLeading)
     {
       while ((i < len) && (src[i] == ' '))
         i++;
       len = len - i;
     }

   return &src[i];
 }

 //------------------------------------------------
 //See .h file for explanation on parameters etc
 //------------------------------------------------
 Lng32 str_to_ansi_id(char *src, char *tgt,Lng32 &tgtLen, short mustValidate, char *allowedChars)
 {
   UInt32 i;
   register char *pBuf = src;
   NABoolean dQuoteSeen = FALSE;

   assert(tgt && src) ;
   tgtLen = str_len(src);

   str_cpy_all(tgt,src,str_len(src));

   tgt[tgtLen] = '\0';

   if (tgtLen == 0)
     return -1;

   // Check to see if this is a delimited identifier
   if ((tgt[0] == '"') && (tgt[tgtLen-1] == '"'))
     {
       dQuoteSeen = TRUE;
       // strip the first double quote out
       pBuf = tgt;
       tgt++;
       // strip the ending double quote out
       tgt[tgtLen-2] = '\0';
       tgtLen = tgtLen-2;
     }

   // If it is delimited, make sure it is not a string with just blanks
   NABoolean empty = TRUE;
   if (dQuoteSeen)
     {
       for (i = 0; i < tgtLen;i++)
 	{
 	  if (isSpace8859_1(tgt[i])) // Convert all tabs to spaces
 	    tgt[i] = ' ';
       if (tgt[i] != ' ')
         empty = FALSE;
 	}
       if (empty == TRUE)
 	return -1;
     }

   if (tgtLen == 0)
     return -1;

   if (tgtLen > 128)
     return -1;

   UInt32 j = 0;

   i = 0;
   for (i = 0; i < tgtLen; i++)
     {
       if (dQuoteSeen)
 	{
 	  // This is a delimited identifier. Do the foll:
 	  // 1.  We have removed the surrounding quotes by now
 	  // 2.  Replace any double quote symbols by a single double quote.
 	  // 3.  Leave the case of each character untouched

 	  if ((tgt[i] == '"') && (tgt[i+1] == '"'))
 	    {
 	      // a double quote has been seen inside the string
 	      // remove the second double quote by shifting all
 	      // the chars to the right of it by
 	      for (j = i; j < tgtLen; j++)
 		tgt[j] = tgt[j+1];
 	      tgtLen--;

 	    }
 	} //if dQuoteSeen
       else
 	{
 	  // Check if this character is an alpha numeric or
 	  // contains of the allowed chars

 	  if( NOT isAlNum8859_1((unsigned char)(tgt[i])) && (tgt[i] != '_'))
 	    {
 	      if (allowedChars)
 		{
 		  short found = 0;
 		  for (UInt32 j = 0; j <str_len(allowedChars); j++)
 		    {
 		      if (tgt[i] == allowedChars[j])
 			found = 1;
 		    }
 		  // If it is not in the allowed char list then it is invalid
 		  if (!found)
 		    return -1;
 		}
 	      else
 		return -1;
 	    }
 	  tgt[i] = TOUPPER(tgt[i]);
 	}
     } // end for

   // In case it is not a delimited id then do this additional check
   if ((!dQuoteSeen) && (mustValidate))
     {

       // Check if it is a SQL reserved word
       //      if (ComResWords::isSqlReservedWord(tgt))
       if (IsSqlReservedWord(tgt))
 	return -1;
     }

   //Copy everything back to the original pointer
   str_cpy_all(pBuf,tgt,tgtLen);
   tgt = pBuf;
   return 0;
 }

 // -----------------------------------------------------------------------
 // following two functions are used to return the catalog and schema names
 // given a qualified table name. Either the catalog or schema name can be
 // a delimited identifier name.
 // -----------------------------------------------------------------------
 Int32 extractDelimitedName (char* tgt,  const char* const src, const char sep)
 {
    Int32 i = 0, j = 0;

    assert(tgt);

    // delimited identifier case
    if (src[0] == '\"')
    {
       // look for the first period after an even number of double quotes
       while ((src[i] != '\0') && ((j % 2 != 0) || (src[i] != sep)))
       {
         if (src[i] == '\"')
           j++;
         tgt[i] = src[i];
         i++;
       }
    }
    else // regular identifier case
    {
       while ((src[i] != '\0') && (src[i] != sep))
       {
         tgt[i] = src[i];
         i++;
       }
    }

    tgt[i] = '\0';

    // return the length of the returned buffer
    return (i);
 }

 void extractCatSchemaNames (char* catName, char *schName, char* qualTabName)
 {
    assert(catName && schName && qualTabName);

    char* src = qualTabName;

    // extract the catalog name
    Int32 buffLength = extractDelimitedName (catName, src);

    // advance to the start of the schema name
    src =  src + buffLength + ((src[0]=='\"')?2:1);

    // extract the schema name
    extractDelimitedName (schName, src);
 }

 /* str_str */
 char *(str_str)(const char *s1, const char *s2)
 {
     size_t s2len;
     /* Check for the null s2 case.  */
     if (*s2 == '\0')
         return (char *) s1;
     s2len = str_len(s2);
     for (; (s1 = str_chr(s1, *s2)) != NULL; s1++)
         if (str_ncmp(s1, s2, s2len) == 0)
             return (char *) s1;
     return NULL;
 }


 /* str_replace */
 char *(str_replace)(char *s1, const char *s2, const char *s3)
 {
   size_t s2len;
   size_t s3len;
   /* Check for the null s2 case.  */
   if (! s1)
     return NULL;

   if (! s2 || ! s3)
     return NULL;

   if ((*s2 == '\0') || (*s3 == '\0'))
     return (char *) s1;

   s2len = str_len(s2);
   s3len = str_len(s3);

   if (s2len != s3len)
     return NULL;

   NABoolean matchFound = FALSE;
   for (; (s1 = str_chr(s1, *s2)) != NULL; )
     {
       if (str_ncmp(s1, s2, s2len) == 0)
 	{
 	  matchFound = TRUE;

 	  str_cpy_all(s1, s3, s2len);

 	  s1 += s2len;
 	}
       else
 	s1++;
     }

   return (char *) s1;
 }


 /* str_ncmp */
 Int32 (str_ncmp)(const char *s1, const char *s2, size_t n)
 {
     unsigned char uc1, uc2;
     /* Nothing to compare?  Return zero.  */
     if (n == 0)
         return 0;
     /* Loop, comparing bytes.  */
     while (n-- > 0 && *s1 == *s2) {
         /* If we've run out of bytes or hit a null, return zero
            since we already know *s1 == *s2.  */
         if (n == 0 || *s1 == '\0')
             return 0;
         s1++;
         s2++;
     }
     uc1 = (*(unsigned char *) s1);
     uc2 = (*(unsigned char *) s2);
     return ((uc1 < uc2) ? -1 : (uc1 > uc2));
 }

 /* str_chr */
 char *(str_chr)(const char *s, Int32 c)
 {
     /* Scan s for the character.  When this loop is finished,
        s will either point to the end of the string or the
        character we were looking for.  */
     while (*s != '\0' && *s != (char)c)
         s++;
     return ( (*s == c) ? (char *) s : NULL );
 }


 /* str_cpy_c */
 char *(str_cpy_c)(char *s1, const char *s2)
 {
     char *dst = s1;
     const char *src = s2;
     /* Do the copying in a loop.  */
     while ((*dst++ = *src++) != '\0')
         ;
     /* Return the destination string.  */
     return s1;
 }

 /* str_ncpy */
 char *(str_ncpy)(char *s1, const char *s2, size_t n)
 {
     char *dst = s1;
     const char *src = s2;
     /* Copy bytes, one at a time.  */
     while (n > 0) {
         n--;
         if ((*dst++ = *src++) == '\0') {
             /* If we get here, we found a null character at the end
                of s2, so use memset to put null bytes at the end of
                s1.  */
             memset(dst, '\0', n);
             break;
         }
     }
     return s1;
 }


 Int32 (str_cmp_c)(const char *s1, const char *s2)
 {
     unsigned char uc1, uc2;
     /* Move s1 and s2 to the first differing characters
        in each string, or the ends of the strings if they
        are identical.  */
     while (*s1 != '\0' && *s1 == *s2) {
         s1++;
         s2++;
     }
     /* Compare the characters as unsigned char and
        return the difference.  */
     uc1 = (*(unsigned char *) s1);
     uc2 = (*(unsigned char *) s2);
     return ((uc1 < uc2) ? -1 : (uc1 > uc2));
 }

 char *(str_cat_c)(char *s1, const char *s2)
 {
     char *s = s1;
     /* Move s so that it points to the end of s1.  */
     while (*s != '\0')
         s++;
     /* Copy the contents of s2 into the space at the end of s1.  */
     strcpy(s, s2);
     return s1;
 }


 char *str_tok(char *inStr, char c, char **internal)
 {
   char *ptr;
   char *tempPtr;

   if (inStr != NULL)
     ptr = inStr;
   else
     ptr = *internal;
   if (ptr == NULL)
     return NULL;
   tempPtr = ptr;
   while (*tempPtr != '\0' && *tempPtr == ' ')
       tempPtr++;
   if (*tempPtr == '\0')
   {
      *internal = NULL;
      return NULL;
   }
   else
     ptr = tempPtr;
   while (*tempPtr != '\0' && *tempPtr != c)
     tempPtr++;
   if (*tempPtr == '\0')
     *internal = NULL;
   else
   {
     *tempPtr = '\0';
     tempPtr++;
     *internal = tempPtr;
   }
   return ptr;
 }


 /*

 Algorithm - Run length encoding (RLE)

 If the element repeats less than 2, copy as is.

 If an element in the list repeats at least 2 times, copy it twice and
 then the count, which is n - 2, where n is the number of repeats.

 Note in some cases, the size of compressed could be longer than non-compressed

 Examples -
 Normal:
   aaaab...                aa<2>b...
   aa<repeat 300>c         aa<255>aa<41>c
   abbbbbc...              abb<3>c...
   aabbcc...               aa<0>bb<0>cc<0>...       -- bad case

 where <n> represend counter with value of n, taking one element space.

 */

 // max counter value, affacted by element size, e.g 0x80 for char
 #define MAXCNT 0xFF //

 size_t str_compress_size(const char *src, const size_t len)
 {
   // len is original size before compression

   if (len < 3) return len;  // too short to compress

   size_t i = 0;
   size_t j = 0;
   unsigned char k;
   while (i + 2 < len)
   {
     if (src[i] == src[i+1]) // repeated elements
     {
       k = 0;
 /* some optimization: don't check count size,
                   count the repeated element at once
       while (i+k+2 < len && src[i] == src[i+k+2])
       {
         k++;
       };

       j += (k / (MAXCNT + 2) + 1) * 3;
       if (k > MAXCNT && (k % MAXCNT) == 1)
         j -= 2;    // meaning if an element repeats <n> * MAXCNT times,
                    // where <n> is great than 1, we won't compress the
                    // last element. This is because the last element
                    // will not be compressable
 */
       while (src[i] == src[i+k+2] && k < (MAXCNT-1) && i+k+2 < len)
       {
         k++;
       };

       j += 3;
       i += k + 2;
     }
     else // non-repeat element
     {
       j++; i++;
     }
   }

   while (i++ < len)  // last few elements
     j++;

   return j;  // compressed size
 }

 size_t str_compress(char *tgt, const char *src, const size_t len)
 {
   // tgt - compressed
   // src - before compress
   // len is original size before compression

   if (len < 3) return len;  // too short to compress

   size_t i = 0;
   size_t j = 0;
   unsigned char k;
   while (i + 2 < len)
   {
     if (src[i] == src[i+1]) // repeated elements
     {
       k = 0;
       while (src[i] == src[i+k+2] && k < (MAXCNT-1) && i+k+2 < len)
       {
         k++;
       };

       tgt[j++] = src[i];  // first of the repeated elements
       tgt[j++] = src[i+1];  // second of the repeated elements
       tgt[j++] = k;    // repeat count
       i += k + 2;
     }
     else // non-repeat element
     {
       tgt[j++] = src[i++];
     }
   }

   while (i < len)  // last few elements
     tgt[j++] = src[i++];

   return j;  // new size of a, or compressed size
 }

 size_t str_decompress(char *tgt, const char *src, const size_t srcLen)
 {
   // tgt - target space;
   // src - source; srcLen - size of compressed source;
   // return decompressed size.

   size_t i = 0;
   size_t j = 0;
   unsigned char k;

   while (i + 2 < srcLen)
   {
     if (src[i] == src[i+1])
     {  // compressed
       tgt[j++] = src[i++];  // first repeated element
       tgt[j++] = src[i];    // second repeated element
       k = src[i+1];         // the counter

       // uncompress by copying 2nd element
       while (k-- > 0) tgt[j++] = src[i];
       i += 2;
     }
     else
     {
       // not compressed or unable to compress
       tgt[j++] = src[i++];
     }
   }

   while (i < srcLen)  // last few elements
     tgt[j++] = src[i++];

   return j;  // decompressed size
 }

 // -----------------------------------------------------------------------
 // How many bytes are needed to encode byteLen bytes in Hex ASCII?
 // Each byte of input string is converted into 2 hexadecimal digit
 // ASCII characters output string; for example, the ASCII character 0
 // in the input string is converted into 30 in the output string.
 // The computed length includes neither the NULL terminator character
 // nor the 0x (or 0X) prefix.
 // -----------------------------------------------------------------------
 size_t str_computeHexAsciiLen(size_t srcByteLen)
 {
   return 2*srcByteLen;
 }

 // -----------------------------------------------------------------------
 // Convert the input string (a stream of bytes) into the encoded
 // hexadecimal digit ASCII characters returned via the parameter "result".
 // The output string does not include the 0x prefix.  By default a
 // NULL character - i.e. '\0' - is appended to the output string.
 // -----------------------------------------------------------------------
 Int32 str_convertToHexAscii(const char * src,               // in
                           const size_t srcLength,         // in
                           char *       result,            // out
                           const size_t maxResultSize,     // in
                           NABoolean    addNullAtEnd)      // in - default is TRUE
 {
   const char hexArray[16] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
                              'A', 'B', 'C', 'D', 'E', 'F'};

   if (src == NULL || result == NULL || srcLength <= 0  || maxResultSize <= 0)
     return -590; // ZFIL_ERR_BADPARMVALUE - bad parameter value(s)

   size_t computedHexAsciiStrLen = str_computeHexAsciiLen(srcLength);
   if (computedHexAsciiStrLen + (addNullAtEnd ? 1 : 0) > maxResultSize)
     return -563; // ZFIL_ERR_BUFTOOSMALL - output buffer too small

   const char * srcTemp = src;
   size_t upper4bits, lower4bits;
   char * resultTemp = &result[0];

   // Since source length may be a odd value, it is not possible to
   // convert between little or big endian. We just convert the
   // memory into hex and put it in the string.
   for (size_t i = 0; i < srcLength; i++)
   {
     lower4bits = (*srcTemp) & 0x0F;
     upper4bits = (*srcTemp) & 0xF0;
     upper4bits >>= 4;
     resultTemp[2*i  ] = hexArray[upper4bits];
     resultTemp[2*i+1] = hexArray[lower4bits];
     srcTemp++;
   }

   if (addNullAtEnd)
     result[computedHexAsciiStrLen] = '\0';

   return computedHexAsciiStrLen;
 }

 // Print the data pointed at by a tupp. The data type
 // is inferred from the characters. The arguments
 // are obtained from a tupp as follows.
 //
 //    char * dataPointer = getDataPointer();
 //    Lng32 len = tupp_.getAllocatedSize();
 //
 //    printBrief(dataPointer, len) if you want an end of line
 //
 //    printBrief(dataPointer, len, FALSE) if you don't
 //
 void printBrief(char* dataPointer, Lng32 len, NABoolean endLine)
 {
    // We don't know what the data type is, but we do know how
    // long the field is. So we will guess the data type.

    // Note that varchar length fields are not handled here. For
    // certain Tupp such as MdamPoint, this is OK because the Generator
    // transforms varchars to chars.

    // We might have a null indicator, but we have no way of knowing
    // that here. So we will ignore that possibility. (Sorry!)

    // If the length is 2 or 4 or 8, we'll guess that it is an
    // integer and print a signed integer interpretation.

    // If the length is 7 and the first two bytes, when interpreted
    // as Big Endian, looks like a year within 100 years of 2000,
    // we'll interpret it as a TIMESTAMP(0).

    // There are other possibilities of course which can be added
    // over time but a better solution would be to change the
    // Generator and Executor to simply give us the data type info.

    char local[1001];  // will assume our length is <= 1000
    local[0] = '\0';

    if (dataPointer)
      {
      bool allNulls = true;
      bool allFFs = true;
      bool allPrintable = true;
      size_t i = 0;
      while (i < len && (allNulls || allFFs))
        {
        if (dataPointer[i] != '\0') allNulls = false;
        if (dataPointer[i] != -1) allFFs = false;
        if (!isprint(dataPointer[i])) allPrintable = false;
        i++;
        }
      if (allNulls)
        {
        strcpy(local,"*lo*");  // hopefully there won't be a legitimate value of *lo*
        }
      else if (allFFs)
        {
        strcpy(local,"*hi*");  // hopefully there won't be a legitimate value of *hi*
        }
      else if (allPrintable)
        {
        size_t lengthToMove = sizeof(local) - 1;
        if (len < lengthToMove)
          lengthToMove = len;
        strncpy(local,dataPointer,lengthToMove);
        local[lengthToMove] = '\0';
        }
      else
        {
        // create a hex representation of the first 498 characters
        strcpy(local,"hex ");
        char * nextTarget = local + strlen(local);
        size_t repdChars = ((sizeof(local) - 1)/2) - 4; // -4 to allow for "hex "
        if (len < repdChars)
          repdChars = len;

        for (size_t i = 0; i < repdChars; i++)
          {
          unsigned char nibbles[2];
          nibbles[0] = ((unsigned char)dataPointer[i] &
                        (unsigned char)0xf0)/16;
          nibbles[1] = (unsigned char)dataPointer[i] & (unsigned char)0x0f;
          for (size_t j = 0; j < 2; j++)
            {
            if (nibbles[j] < 10)
              *nextTarget = '0' + nibbles[j];
            else
              *nextTarget = 'a' + (nibbles[j] - 10);
            nextTarget++;
            }  // for j
          }  // for i

        *nextTarget = '\0';
        }

      if (len == 2)  // if it might be a short
        {
        // append an interpretation as a short (note that there
        // is room in local for this purpose)

        // the value is big-endian hence the weird computation
        long value = 256 * dataPointer[0] +
                     (unsigned char)dataPointer[1];
        sprintf(local + strlen(local), " (short %ld)",value);
        }
      else if (len == 4)  // if it might be a long
        {
        // append an interpretation as a long (note that there
        // is room in local for this purpose)

        // the value is big-endian hence the weird computation
        long value = 256 * 256 * 256 * dataPointer[0] +
                     256 * 256 * (unsigned char)dataPointer[1] +
                     256 * (unsigned char)dataPointer[2] +
                     (unsigned char)dataPointer[3];
        sprintf(local + strlen(local), " (long %ld)",value);
        }
      else if (len == 8)  // if it might be a 64-bit integer
        {
        // append an interpretation as a short (note that there
        // is room in local for this purpose)

        // the value is big-endian hence the weird computation
        long long value = 256 * 256 * 256 * dataPointer[0] +
                     256 * 256 * (unsigned char)dataPointer[1] +
                     256 * (unsigned char)dataPointer[2] +
                     (unsigned char)dataPointer[3];
        value = (long long)256 * 256 * 256 * 256 * value +
                     256 * 256 * 256 * (unsigned char)dataPointer[4] +
                     256 * 256 * (unsigned char)dataPointer[5] +
                     256 * (unsigned char)dataPointer[6] +
                     (unsigned char)dataPointer[7];
        sprintf(local + strlen(local), " (long long %lld)",value);
        }
      else if (len == 7)  // a TIMESTAMP(0) perhaps?
        {
        long year = 256 * dataPointer[0] +
                            (unsigned char)dataPointer[1];
        if ((year >= 1900) && (year <= 2100))
          {
          // looks like a TIMESTAMP(0); look further
          long month = (unsigned char)dataPointer[2];
          long day = (unsigned char)dataPointer[3];
          long hour = (unsigned char)dataPointer[4];
          long minute = (unsigned char)dataPointer[5];
          long second = (unsigned char)dataPointer[6];

          if ((month >= 1) && (month <= 12) &&
              (day >= 1) && (day <= 31) &&
              (hour >= 0) && (hour <= 23) &&
              (minute >= 0) && (minute <= 59) &&
              (second >= 0) && (second <= 59))
            {
            sprintf(local + strlen(local),
                    " (TIMESTAMP(0) %ld-%02ld-%02ld %02ld:%02ld:%02ld)",
                    year,month,day,hour,minute,second);
            }
          }
        }
      }
    cout << local;
    if (endLine)
      cout << endl;
  }