blob: 028b295cc0b32ee44899f72c477505296282b9b9 [file] [log] [blame]
/*-
* Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
*
* This file was distributed by Oracle as part of a version of Oracle Berkeley
* DB Java Edition made available at:
*
* http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html
*
* Please see the LICENSE file included in the top-level directory of the
* appropriate version of Oracle Berkeley DB Java Edition for a copy of the
* license and additional information.
*/
package com.sleepycat.je.rep.util.ldiff;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.List;
import com.sleepycat.je.Cursor;
import com.sleepycat.je.DatabaseEntry;
import com.sleepycat.je.LockMode;
import com.sleepycat.je.OperationStatus;
import com.sleepycat.je.utilint.Adler32;
/**
* A rolling window of key/data pairs used by the ldiff algorithm.
*/
public class Window {
private final Cursor cursor;
private List<byte[]> window;
private final MessageDigest md;
private final int windowSize;
private long chksum;
/* The begin key/data pair of a window. */
private byte[] beginKey;
private byte[] beginData;
/* The size of a different area. */
private long diffSize;
/**
* Create a window of the given size, starting at the next record pointed
* at by the Cursor.
*
* @param cursor an open cursor on the database being diff'd
* @param windowSize the number of records to include in the window
* @throws Exception
*/
public Window(Cursor cursor, int windowSize)
throws Exception {
this.cursor = cursor;
this.windowSize = windowSize;
/* To compute a MD5 hash for each block. */
try {
md = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
throw new Exception("MD5 hashes are required for ldiff.");
}
nextWindow();
}
/**
* Roll a window forward by one key. The rolling checksum is adjusted to
* account for this move past one key/value pair. Note that the md5 hash
* associated with the window is computed on demand and is not updated
* here.
*/
public void rollWindow()
throws Exception {
DatabaseEntry key = new DatabaseEntry();
DatabaseEntry data = new DatabaseEntry();
if (cursor.getNext(key, data, LockMode.DEFAULT) ==
OperationStatus.SUCCESS) {
byte[] keyValue = LDiffUtil.concatByteArray(key.getData(),
data.getData());
int removeXi = LDiffUtil.getXi(window.remove(0));
window.add(keyValue);
int addXi = LDiffUtil.getXi(keyValue);
rollChecksum(removeXi, addXi);
} else {
chksum = 0;
}
diffSize++;
}
/**
* Advance the window to the next block of records and update the rolling
* checksum.
*/
public void nextWindow() {
/* DatabaseEntry represents the key and data of each record. */
DatabaseEntry key = new DatabaseEntry();
DatabaseEntry data = new DatabaseEntry();
int i = 0;
window = new ArrayList<byte[]>();
diffSize = 0;
/* Please pay attention to the check order in while loop. */
while ((i < windowSize) &&
(cursor.getNext(key, data, LockMode.DEFAULT) ==
OperationStatus.SUCCESS)) {
if (i == 0) {
beginKey = key.getData();
beginData = data.getData();
}
window.add(LDiffUtil.concatByteArray(key.getData(),
data.getData()));
i++;
}
setChecksum();
}
/**
* The checksum for the window.
*
* @return the checksum for the window.
*/
public long getChecksum() {
return chksum;
}
public byte[] getBeginKey() {
return beginKey;
}
public byte[] getBeginData() {
return beginData;
}
public long getDiffSize() {
return diffSize;
}
/**
* Compute the MD5 hash for the window. This is an expensive operation and
* should be used sparingly.
*
* @return the MD5 for the window.
*/
public byte[] getMd5Hash() {
/* Reset the Message Digest first. */
md.reset();
/* Feed the data into the Message Digest. */
for (byte[] ba : window) {
md.update(ba);
}
return md.digest();
}
/**
* The number of records in the window. The size of the window will match
* the value set during instantiation, until the end of the database is
* reached.
*
* @return the number of records in the window.
*/
public int size() {
return window.size();
}
/**
* We use the rsync rolling checksum algorithm with the following changes:
*
* 1. Each byte (Xi in the tech report) is replaced by a 32 bit Adler
* checksum of the bytes representing the concatenation of the key/value
* pair.
*
* 2. The value for M is 64 instead of 32 to reduce the chances of false
* collisions on the rolling checksum, given our adaptation of the original
* algorithm to logically use 32 bit bytes.
*/
private void setChecksum() {
/* Adler32 to compute the rolling checksum of key/data pair. */
Adler32 adler32 = new Adler32();
int a = 0, b = 0;
for (int i = 0; i < size(); i++) {
byte[] element = window.get(i);
adler32.reset();
adler32.update(element, 0, element.length);
final int xi = (int) adler32.getValue(); /* It's really 32 bits */
a += xi;
b += (xi * (size() - i));
}
chksum = (a & LDiffUtil.MASK_32BIT) | ((long) b << 32);
}
/**
* Update the checksum by removing removeXi and adding addXi, according to
* the rsync algorithm.
*
* @param removeXi
* the value to remove from the checksum
* @param addXi
* the value to add to the checksum
*/
private void rollChecksum(int removeXi, int addXi) {
final int a = (int) chksum - removeXi + addXi;
final int b = (int) (chksum >> 32) - (removeXi * size()) + a;
chksum = (a & LDiffUtil.MASK_32BIT) | ((long) b << 32);
}
}