blob: d6bfab004a75195d6378a6edba5684dad269541d [file] [log] [blame]
/**********************************************************************
// @@@ START COPYRIGHT @@@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// @@@ END COPYRIGHT @@@
**********************************************************************/
#ifndef STR_H
#define STR_H
/* -*-C++-*-
*****************************************************************************
*
* File: str.h
* Description: SQL/MX string functions. One big reason for declaring our
* own string functions is that much of the SQL/MX code
* can not use the C runtime (see also file NAStdlib.h).
* Created: 7/10/95
* Language: C++
*
*
*
*
*****************************************************************************
*/
// -----------------------------------------------------------------------
#include <string.h>
#include "Platform.h"
#include "NAWinNT.h"
#include "NAStdlib.h"
#include "unicode_char_set.h"
#include "Int64.h"
#include "NAAssert.h"
// -----------------------------------------------------------------------
// toupper() and tolower() equivalence
// -----------------------------------------------------------------------
#ifndef TOUPPER
#define TOUPPER(c) (((c >= 'a') && (c <= 'z')) ? (c - 32) : c)
#endif
#ifndef TOLOWER
#define TOLOWER(c) (((c >= 'A') && (c <= 'Z')) ? (c + 32) : c)
#endif
//
// ---------------------------------------------------------------------
// Helper functions for ISO 8859_1 (8-bit european) alphabet processing
// ---------------------------------------------------------------------
//
Int32 isUpper8859_1(NAWchar c);
Int32 isLower8859_1(NAWchar c);
Int32 isAlpha8859_1(NAWchar c);
Int32 isAlNum8859_1(NAWchar c);
Int32 isSpace8859_1(NAWchar c);
Int32 isHexDigit8859_1(NAWchar c); // 0, 1, .., A, B, C, D, E, F (case insensitive)
Int32 isDigit8859_1(NAWchar c);
Int32 isCaseInsensitive8859_1(NAWchar c); // ISO 8859-1 char for which there is no
// upcase equivalent. hex values 0xDF & 0xFF
// -----------------------------------------------------------------------
// Compare strings <left> and <right> (using unsigned comparison).
// for <length> characters.
// Return -1 if left < right,
// return 0 if left == right,
// return 1 if left > right.
// -----------------------------------------------------------------------
inline Int32 str_cmp(const char *left, const char *right, Int32 length)
{
Int32 result = memcmp(left, right, length);
if (result == 0)
return 0;
else
return ((result > 0)? 1: -1);
}
Int32 str_cmp_ne(const char *left, const char *right);
// str_cmp_c() takes two strings as arguments and returns a value less
// than zero if the first is lexographically less than the second, a
// value greater than zero if the first is lexographically greater
// than the second, or zero if the two strings are equal. The
// comparison is done by comparing the coded (ascii) value of the
// chararacters, character by character.
Int32 (str_cmp_c)(const char *s1, const char *s2);
// The str_ncmp() function shall compare not more than n bytes
// (bytes that follow a null byte are not compared) from the array
// pointed to by s1 to the array pointed to by s2. The sign of a
// non-zero return value is determined by the sign of the difference
// between the values of the first pair of bytes (both interpreted
// as type unsigned char) that differ in the strings being compared.
Int32 (str_ncmp)(const char *s1, const char *s2, size_t n);
// The str_str() function shall locate the first occurrence in the
// string pointed to by s1 of the sequence of bytes (excluding the
// terminating null byte) in the string pointed to by s2. The
// function returns the pointer to the matching string in s1 or a
// null pointer if a match is not found. If s2 is an empty string,
// the function returns s1.
char *(str_str)(const char *s1, const char *s2);
// The str_chr() function shall locate the first occurrence of c
// (converted to a char) in the string pointed to by s. The terminating
// null byte is considered to be part of the string. The function
// returns the location of the found character, or a null pointer
// if the character was not found.
// This function is used to find certain characters in strings.
char *(str_chr)(const char *s, Int32 c);
// The str_replace() function shall locate all occurrences in the
// string pointed to by s1 of the sequence of bytes (excluding the
// terminating null byte) in the string pointed to by s2 and replace
// them with string pointed to by s3.
// s2 and s3 must have the same length.
// The function returns the pointer to the replaced string s1 or a
// null pointer if a match is not found. If s2 or s3 are empty strings,
// or do not have the same length, the function returns NULL.
char *(str_replace)(char *s1, const char *s2, const char *s3);
// -----------------------------------------------------------------------
// fill string <str> for <length> bytes with <padchar>
// -----------------------------------------------------------------------
inline
Int32 str_pad(char *str, Int32 length, char padchar = ' ')
{
// Below is a more efficient version of str_pad. The C++ runtime
// routine memset() provides the same service, but hopefully with
// better performance. On NSK, memset is part of the system library.
// int i;
//
// for (i=0; i<length; i++)
// str[i] = padchar;
memset(str, padchar, length);
return 0;
}
// -----------------------------------------------------------------------
// copy <tgtlen> bytes of string <src> into <tgt>, if <src> is
// shorter than <tgtlen> bytes then pad <tgt> with <padchar>
//
// WARNING: This routine *does not* work exactly like the C-runtime strcpy(),
// nor the strncpy() functions.
//
// For example: str_cpy(dest,src,str_len(src)) is guaranteed
// to result in a non-null-terminated string. Adding one or
// more to the result of str_len() won't help.
//
// But str_cpy(dest,src,str_len(src)+1,'\0') should
// work. You may want to use str_cpy_all() instead; that
// routine works more like memcpy() and behaves well.
// -----------------------------------------------------------------------
Int32 str_cpy(char *tgt, const char *src, Int32 tgtlen, char padchar = ' ');
// -----------------------------------------------------------------------
// This routine behaves like str_cpy except that the length of
// the target is also supplied.
// -----------------------------------------------------------------------
Int32 byte_str_cpy(char *tgt, Int32 tgtlen,
const char *src, Int32 srclen, char padchar);
// Perform a true C style strcpy()
// The str_cpy_c() function shall copy the string pointed to by s2
// (including the terminating null byte) into the array pointed to by
// s1. If copying takes place between objects that overlap,
// the behavior is undefined. The function returns s1. No value is
// used to indicate an error.
char *(str_cpy_c)(char *s1, const char *s2);
// The str_ncpy() function shall copy not more than n bytes (bytes
// that follow a null byte are not copied) from the array pointed to
// by s2 to the array pointed to by s1. If copying takes place between
// objects that overlap, the behavior is undefined. If the array
// pointed to by s2 is a string that is shorter than n bytes, null bytes
// shall be appended to the copy in the array pointed to by s1, until
// n bytes in all are written. The function shall return s1;
// no return value is reserved to indicate an error.
char *(str_ncpy)(char *s1, const char *s2, size_t n);
// -----------------------------------------------------------------------
// concatenate <first> and <second> and store the result in <result>
// -----------------------------------------------------------------------
Int32 str_cat(const char *first, const char *second, char *result);
// The str_cat_c() function shall append a copy of the string pointed to
// by s2 (including the terminating null byte) to the end of the string
// pointed to by s1. The initial byte of s2 overwrites the null byte
// at the end of s1. If copying takes place between objects that
// overlap, the behavior is undefined. The function returns s1.
// This function is used to attach one string to the end of another
// string. It is imperative that the first string (s1) have the space
// needed to store both strings.
char *(str_cat_c)(char *s1, const char *s2);
// -----------------------------------------------------------------------
// convert <i> to ASCII and store the result in <outstr>
// -----------------------------------------------------------------------
char *str_itoa(ULng32 i, char *outstr);
char *str_ltoa(Int64 i, char *outstr);
// -----------------------------------------------------------------------
// convert instr to numeric and return in i. String must be numbers only.
// -----------------------------------------------------------------------
Int64 str_atoi(const char * instr, Lng32 instrLen);
// convert a scaled exact numeric string and return as float.
// input of the form: mmm.ff
double str_ftoi(const char * instr, Lng32 instrLen);
// Dummy routine to ensure that str_cpy_all gets inlined. Once
// the compiler is fixed to inline routines with calls to assert,
// Remove callAssert() in callers and replace with direct call to
// assert.
void callAssert(const char* tgt, const char* src, Lng32 length);
// -----------------------------------------------------------------------
// move <length> bytes from <src> to <tgt>
// -----------------------------------------------------------------------
inline void str_cpy_all(char *tgt, const char *src, Lng32 length)
{
if ((!tgt || !src) && length)
callAssert(tgt, src, length);
// Below is a more inefficient version of str_cpy_all. The C++ runtime
// procedure "memcpy()" provides the same service, but hopefully with
// better performance, by copying entire words where possible.
// We do not source in a header file for memcpy, since we want to
// avoid dependencies to the C++ run-time, instead it is simply declared
// as an external procedure (hopefully with the same parameters on all
// platforms). On NSK, memcpy is part of the system library.
//
//It has been noticed that Executor can call this function with overlapped
// memory. Change to memmove() during DUMA work.
memmove(tgt, src, length);
}
// -----------------------------------------------------------------------
// move <length> bytes from <src> to <tgt>
// handles overlapping memory between target and source
// -----------------------------------------------------------------------
void str_memmove(char *tgt, const char *src, Lng32 length);
// -----------------------------------------------------------------------
// allocate new <tgt>,
// move <length> characters from <src> into <tgt>, reserve the
// the first 4 bytes of <tgt> for the varchar length and set the varchar
// length to <length>.
//
// Compare convertNAString in String.h
// -----------------------------------------------------------------------
Int32 str_varchar_alloc_and_copy(char *tgt, const char *src, Lng32 length);
// -----------------------------------------------------------------------
// Copies <src> to <tgt> for <length> bytes.
// Removes trailing <blank_char>s by putting an <end_char> as terminator.
// -----------------------------------------------------------------------
Int32 str_cpy_and_null(char * tgt,
const char * src,
Lng32 length,
char end_char = '\0',
char blank_char = ' ',
NABoolean nullTerminate = FALSE);
// ---------------------------------------------------------------
// copies src to tgt for length bytes and upshifts, if upshift <> 0,
// else downshifts.
// Src and Tgt may point to the same location.
// ---------------------------------------------------------------
Int32 str_cpy_convert(char * tgt, char * src,
Lng32 length, Int32 upshift);
Int32 str_len(const char * s);
// -----------------------------------------------------------------------
// "Increments" a string. If successful, 0 is returned. Otherwise, 1
// is returned.
// -----------------------------------------------------------------------
Int32 str_inc(const ULng32 length, char * s);
// -----------------------------------------------------------------------
// Complements a string.
// -----------------------------------------------------------------------
void str_complement(const ULng32 length, char * s);
//------------------------------------------------------------------------
// Our own version of sprintf that provides a subset of the functionality
// of sprintf as we cannot call C runtime from the SQL executor.
//------------------------------------------------------------------------
#define str_sprintf sprintf
Int32 str_sprintf(char *, const char *, ...);
// -----------------------------------------------------------------------
// Encoding and decoding of an array of bytes into characters
// -- how many string bytes are needed to encode byteLen bytes?
// -- encode srcLen bytes into a string (not null terminated),
// return the encoded length
// -- how many bytes are in a given encoded string?
// -- decode a string into byte representation and return number of
// bytes decoded
// -----------------------------------------------------------------------
Lng32 str_encoded_len(Lng32 byteLen);
Lng32 str_encode(char *tgt, Lng32 tgtMaxLen, void *src, Lng32 srcLen);
Lng32 str_decoded_len(Lng32 srcLen);
Lng32 str_decode(void *tgt, Lng32 tgtMaxLen, const char *src, Lng32 srcLen);
//------------------------------------------------------------------------
// Strips leading and/or trailing blanks. src will contain a NULL after the
// end of the first non-blank character.The length of the "stripped" string
// is returned in len.
// Returns pointer to the start of string after leading blanks.
//------------------------------------------------------------------------
char * str_strip_blanks(char *src , Lng32 &len,
NABoolean stripLeading = FALSE,
NABoolean stripTrailing = TRUE
);
//------------------------------------------------------------------------
// This funtion transforms src into the form of an SQL identifier
// Input : src,allowedChar
// Output: tgt, tgtLen
// The caller needs to pass in the string to be tranformed in src.
// tgt needs to be a pointer pointing to a buffer atleast as long as src
// allowedChars is generally NULL. But if the caller wants certain special
// characters to be allowed as part of the Id, then a null termintated string
// of these chars needs to be passed in eg : "$#"
//------------------------------------------------------------------------
Lng32 str_to_ansi_id(char *src, char *tgt,Lng32 &tgtLen,short mustValidate = 1, char *allowedChar = 0);
//int strSprintf(char * buffer, char * format, ...);
// -----------------------------------------------------------------------
// following function is used to extract a name that may be delimited
// from a given string. The name may end with a separator or end of line.
// -----------------------------------------------------------------------
Int32 extractDelimitedName (char* tgt, const char* const src, const char sep = '.');
// -----------------------------------------------------------------------
// following function is used to return the catalog and schema names
// given a qualified table name. Either the catalog or schema name can be
// a delimited identifier name.
// -----------------------------------------------------------------------
void extractCatSchemaNames (char* catName, char *schName, char* qualTabName);
//inline
Int32 mem_cpy_all(void *tgt, const void *src, Lng32 length);
//{
//#if defined (NA_YOS)
// for (int i = 0; i < length; i++)
// ((unsigned char *)tgt)[i] = ((unsigned char *)src)[i];
//#else
// memcpy(tgt, src, length);
//#endif
// return 0;
//}
// Similar to strtok, but for the following
// Only one character can be delimiter
// Third parameter char **internal should not be manipulated by the caller
// Removes blanks at the begining of the tokem automatically
char *str_tok(char *inStr, char c, char **internal);
//------------------------------------------------------------------------
// String compression and decompression functions using RLE method
// See str.cpp for details
//------------------------------------------------------------------------
size_t str_compress_size(const char *src, const size_t len);
size_t str_compress(char *tgt, const char *src, const size_t len);
size_t str_decompress(char *tgt, const char *src, const size_t srcLen);
// -----------------------------------------------------------------------
// How many bytes are needed to encode byteLen bytes in Hex ASCII?
// Each byte of input string is converted into 2 hexadecimal digit
// ASCII characters output string; for example, the ASCII character 0
// in the input string is converted into 30 in the output string.
// The computed length includes neither the NULL terminator character
// nor the 0x (or 0X) prefix.
// -----------------------------------------------------------------------
size_t str_computeHexAsciiLen(size_t srcByteLen);
// -----------------------------------------------------------------------
// Convert the input string (a stream of bytes) into the encoded
// hexadecimal digit ASCII characters returned via the parameter "result".
// The output string does not include the 0x prefix. By default a
// NULL character - i.e. '\0' - is appended to the output string.
// -----------------------------------------------------------------------
Int32 str_convertToHexAscii(const char * src, // in
const size_t srcLength, // in
char * result, // out
const size_t maxResultSize, // in - including NULL terminator if addNullAtEnd
NABoolean addNullAtEnd = TRUE); // in
// Print the data pointed at by a tupp. The data type
// is inferred from the characters. The arguments
// are the obtained from a tupp as follows.
//
// char * dataPointer = getDataPointer();
// Lng32 len = tupp_.getAllocatedSize();
//
// printBrief(dataPointer, len) if you want an end of line
//
// printBrief(dataPointer, len, FALSE) if you don't
//
void printBrief(char* dataPointer, Lng32 keyLen, NABoolean endLine = TRUE);
#endif // STR_H