blob: 4d7f4ed614d240107e10f7c3f40d81b09d873018 [file] [log] [blame]
/**********************************************************************
// @@@ START COPYRIGHT @@@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// @@@ END COPYRIGHT @@@
//
**********************************************************************/
/* -*-C++-*-
******************************************************************************
*
* File: RuRangeCollection.cpp
* Description: Implementation of class CRURangeCollection
*
* Created: 07/06/2000
* Language: C++
*
*
******************************************************************************
*/
#include "RuRangeCollection.h"
#include "dmSqlTuple.h"
#include "RuDupElimLogRecord.h"
//--------------------------------------------------------------------------//
// CRURangeCollection
//--------------------------------------------------------------------------//
//--------------------------------------------------------------------------//
// Constructor and destructor
//--------------------------------------------------------------------------//
CRURangeCollection::CRURangeCollection(CDDObject::ERangeLogType rlType) :
rlType_(rlType),
rangeList_(eItemsAreOwned),
pSortedRangeVector_(NULL),
minEpoch_(0),
pMaxCKRecord_(NULL),
balanceCounter_(0)
{}
CRURangeCollection::~CRURangeCollection()
{
if (NULL != pSortedRangeVector_)
{
delete [] pSortedRangeVector_;
}
}
//--------------------------------------------------------------------------//
// CRURangeCollection::Reset()
//
// Release memory and re-initialize the state variables.
// The collection will live through multiple invocations of Flush().
//
//--------------------------------------------------------------------------//
void CRURangeCollection::Reset()
{
rangeList_.RemoveAll();
if (NULL != pSortedRangeVector_)
{
delete [] pSortedRangeVector_;
pSortedRangeVector_ = NULL;
}
balanceCounter_ = 0;
minEpoch_ = 0;
pMaxCKRecord_ = NULL;
}
//--------------------------------------------------------------------------//
// CRURangeCollection::IsClusteringKeyCovered()
//
// Does *some* range in the collection cover this key's value?
// (If not, the collection's construction is complete).
// Recall that all the ranges in the IUD log are closed.
//
//--------------------------------------------------------------------------//
BOOL CRURangeCollection::
IsClusteringKeyCovered(const CRUIUDLogRecord *pRec) const
{
if (0 == GetSize())
{
return FALSE;
}
if (balanceCounter_ > 0)
{
return TRUE; // There is at least one unmatched Begin-range
}
// The collection is balanced.
// Let's check whether this CK is equal to the maximum CK stored in it.
RUASSERT(NULL != pMaxCKRecord_);
return pMaxCKRecord_->IsClusteringKeyEqualTo(*pRec);
}
//--------------------------------------------------------------------------//
// CRURangeCollection::PerformCrossTypeDE()
//
// Applicable only if the single-row records are scanned (i.e.,
// the single-row resolution is enforced).
//
// Find whether this record must be deleted/updated by DE. If yes,
// mark both the record's and the screening range's data structures
// (in the main memory). This will allow to avoid blind I/O.
//
//--------------------------------------------------------------------------//
void CRURangeCollection::PerformCrossTypeDE(CRUIUDLogRecord *pRec)
{
DSListPosition pos = rangeList_.GetHeadPosition();
while (NULL != pos)
{
CRURange *pRange = rangeList_.GetNext(pos);
pRange->PerformCrossTypeDE(pRec);
}
}
//--------------------------------------------------------------------------//
// CRURangeCollection::InsertRangeBoundary()
//
// Insert a new Begin-range/End-range record into the collection.
//
//--------------------------------------------------------------------------//
void CRURangeCollection::InsertRangeBoundary(const CRUIUDLogRecord *pRec)
{
RUASSERT(FALSE == pRec->IsSingleRowOp());
// The scan order guarantees that the last record has the greatest CK.
pMaxCKRecord_ = pRec;
TInt32 ep = pRec->GetEpoch();
UpdateMinEpoch(ep);
if (FALSE == pRec->IsBeginRange())
{
balanceCounter_--;
if (balanceCounter_ < 0)
{
// Incorrent (unbalanced) logging - should never happen
CRUException ex;
ex.SetError(IDS_RU_UNBALANCED_LOGGING);
throw ex;
}
// Find the open range which has a Begin-range record
// that matches this End-range record
LocateMatchForER(pRec);
}
else
{
balanceCounter_++;
// Open a new range
CRURange *pNewRange = new CRURange();
pNewRange->SetBRRecord(pRec);
rangeList_.AddTail(pNewRange);
}
}
//--------------------------------------------------------------------------//
// CRURangeCollection::PrepareForFlush()
//
// Before the flush, verify that all the ranges are balanced,
// and sort them by the temporal (syskey) order, which will allow
// I/O optimizations (see CRUDupElimRangeResolver).
//
//--------------------------------------------------------------------------//
void CRURangeCollection::PrepareForFlush()
{
Lng32 size = GetSize();
RUASSERT(size > 0);
VerifyBalance();
typedef CRURange *PCRURange;
pSortedRangeVector_ = new PCRURange[size];
DSListPosition pos = rangeList_.GetHeadPosition();
for (Int32 i=0; NULL != pos; i++)
{
CRURange *pRange = rangeList_.GetNext(pos);
// Initialize the fragment list
pRange->PrepareForFlush();
pSortedRangeVector_[i] = pRange;
}
// Sort the ranges by the temporal order
qsort(pSortedRangeVector_, size, sizeof(PCRURange), CompareElem);
}
//--------------------------------------------------------------------------//
// CRURangeCollection::PerformRangeAnalysis()
//
// This method is called by CRUDupElimRangeResolver if the
// collection has more than one range in it (and hence,
// there are at least two overlapping ranges).
//
// In order to solve the conflicts, every range should be analysed
// with respect to all the ranges that have been logged later
// (i.e., those that it's inferior to).
//
// ASSUMPTION: The list of ranges is ordered by SYSKEY before the call.
//
//--------------------------------------------------------------------------//
void CRURangeCollection::PerformRangeAnalysis()
{
Lng32 size = GetSize();
for (Int32 oldIndex=0; oldIndex<size; oldIndex++)
{
CRURange *pOlderRng = pSortedRangeVector_[oldIndex];
for (Int32 youngIndex = oldIndex+1; youngIndex<size; youngIndex++)
{
CRURange *pYoungerRng = pSortedRangeVector_[youngIndex];
pOlderRng->PerformRangeAnalysis(*pYoungerRng);
}
}
}
//--------------------------------------------------------------------------//
// PRIVATE AREA
//--------------------------------------------------------------------------//
//--------------------------------------------------------------------------//
// CRURangeCollection::LocateMatchForER()
//
// Find the begin-range record that matches this end-range record,
// and update the appropriate boundary pair.
//
// Algorithm: search for the greatest value of BR-syskey
// which is *not* greater than this record's syskey. This is based
// on an *assumption* that for an originally logged (BR,ER) pair,
// the timestamps are close enough (i.e., there is no other BR record
// that has a syskey between theirs).
//
// Do not perform this algorithm for manually range-logged tables.
// For such tables, no cross-type duplicates exist. Besides, the matching
// algorithm relies on comparison between syskeys, which is correct only
// within the same partition's boundaries.
//
// For hash-partitioned tables, manually-logged ranges can span partitions,
// and the relationship between BR-syskey and ER-syskey is not defined.
// In this case, we always match the end-range record to the *some*
// open range in the list. This ensures that the match is always found, and
// preserves the user's semantics if the ranges are either disjoint or touching:
//
// -------- or ----------
// ------ --------
//
// However, if the user manually logs overlapping ranges (which is forbidden,
// by the external), the semantics will not be preserved.
//
//--------------------------------------------------------------------------//
void CRURangeCollection::LocateMatchForER(const CRUIUDLogRecord *pRec)
{
CRURange *pMatchingRange = NULL;
if (CDDObject::eMANUAL == rlType_)
{
DSListPosition pos = rangeList_.GetHeadPosition();
while (NULL != pos)
{
CRURange *pRange = rangeList_.GetNext(pos);
if (NULL == pRange->GetERRecord())
{
pMatchingRange = pRange;
break;
}
}
}
else
{
TInt64 erSyskey = pRec->GetSyskey();
DSListPosition pos = rangeList_.GetHeadPosition();
TInt64 matchingSyskey = 0;
while (NULL != pos)
{
CRURange *pRange = rangeList_.GetNext(pos);
if (NULL != pRange->GetERRecord())
{
continue; // This one is already matched
}
TInt64 brSyskey = pRange->GetBRRecord()->GetSyskey();
if (brSyskey > erSyskey)
{
continue; // This cannot match for sure
}
if (NULL == pMatchingRange
||
brSyskey > matchingSyskey)
{
pMatchingRange = pRange;
matchingSyskey = brSyskey;
}
}
}
if (NULL == pMatchingRange)
{
// Incorrent (unbalanced) logging - should never happen
CRUException ex;
ex.SetError(IDS_RU_UNBALANCED_LOGGING);
throw ex;
}
pMatchingRange->SetERRecord(pRec);
}
//--------------------------------------------------------------------------//
// CRURangeCollection::UpdateMinEpoch()
//--------------------------------------------------------------------------//
void CRURangeCollection::UpdateMinEpoch(TInt32 ep)
{
if (0 == minEpoch_)
{
minEpoch_ = ep;
}
else
{
minEpoch_ = (ep < minEpoch_) ? ep : minEpoch_;
}
}
//--------------------------------------------------------------------------//
// CRURangeCollection::VerifyBalance()
//
// Called by CRURangeResolver::Flush()
//
// Verify that the number of Begin-range records is equal to the number
// of the End-range records. If this condition holds, the InsertRangeBoundary()
// method guarantees that boundaries exist (ar not null) for every range
// in the collection.
//
//--------------------------------------------------------------------------//
void CRURangeCollection::VerifyBalance()
{
if (balanceCounter_ != 0)
{
// Incorrent (unbalanced) logging - should never happen
CRUException ex;
ex.SetError(IDS_RU_UNBALANCED_LOGGING);
throw ex;
}
}
//--------------------------------------------------------------------------//
// CRURangeCollection::CompareElem()
//
// The function serves for the quicksort criteria.
//--------------------------------------------------------------------------//
Int32 CRURangeCollection::CompareElem(const void *pEl1, const void *pEl2)
{
CRURange *pRng1 = *(CRURange **)pEl1;
CRURange *pRng2 = *(CRURange **)pEl2;
return (pRng1->GetRangeId() < pRng2->GetRangeId()) ? -1 : 1;
}