blob: e1117ef6ea2675bbe796f959208c199c3ce2b933 [file] [log] [blame]
/**
* Copyright 2009 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver.wal;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.NavigableMap;
import java.util.TreeMap;
import org.apache.hadoop.hbase.io.HeapSize;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ClassSize;
import org.apache.hadoop.io.Writable;
/**
* WALEdit: Used in HBase's transaction log (WAL) to represent
* the collection of edits (KeyValue objects) corresponding to a
* single transaction. The class implements "Writable" interface
* for serializing/deserializing a set of KeyValue items.
*
* Previously, if a transaction contains 3 edits to c1, c2, c3 for a row R,
* the HLog would have three log entries as follows:
*
* <logseq1-for-edit1>:<KeyValue-for-edit-c1>
* <logseq2-for-edit2>:<KeyValue-for-edit-c2>
* <logseq3-for-edit3>:<KeyValue-for-edit-c3>
*
* This presents problems because row level atomicity of transactions
* was not guaranteed. If we crash after few of the above appends make
* it, then recovery will restore a partial transaction.
*
* In the new world, all the edits for a given transaction are written
* out as a single record, for example:
*
* <logseq#-for-entire-txn>:<WALEdit-for-entire-txn>
*
* where, the WALEdit is serialized as:
* <-1, # of edits, <KeyValue>, <KeyValue>, ... >
* For example:
* <-1, 3, <Keyvalue-for-edit-c1>, <KeyValue-for-edit-c2>, <KeyValue-for-edit-c3>>
*
* The -1 marker is just a special way of being backward compatible with
* an old HLog which would have contained a single <KeyValue>.
*
* The deserializer for WALEdit backward compatibly detects if the record
* is an old style KeyValue or the new style WALEdit.
*
*/
public class WALEdit implements Writable, HeapSize {
private final int VERSION_2 = -1;
private final ArrayList<KeyValue> kvs = new ArrayList<KeyValue>();
private NavigableMap<byte[], Integer> scopes;
public WALEdit() {
}
public void add(KeyValue kv) {
this.kvs.add(kv);
}
public boolean isEmpty() {
return kvs.isEmpty();
}
public int size() {
return kvs.size();
}
public List<KeyValue> getKeyValues() {
return kvs;
}
public NavigableMap<byte[], Integer> getScopes() {
return scopes;
}
public void setScopes (NavigableMap<byte[], Integer> scopes) {
// We currently process the map outside of WALEdit,
// TODO revisit when replication is part of core
this.scopes = scopes;
}
public void readFields(DataInput in) throws IOException {
kvs.clear();
if (scopes != null) {
scopes.clear();
}
int versionOrLength = in.readInt();
if (versionOrLength == VERSION_2) {
// this is new style HLog entry containing multiple KeyValues.
int numEdits = in.readInt();
for (int idx = 0; idx < numEdits; idx++) {
KeyValue kv = new KeyValue();
kv.readFields(in);
this.add(kv);
}
int numFamilies = in.readInt();
if (numFamilies > 0) {
if (scopes == null) {
scopes = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
}
for (int i = 0; i < numFamilies; i++) {
byte[] fam = Bytes.readByteArray(in);
int scope = in.readInt();
scopes.put(fam, scope);
}
}
} else {
// this is an old style HLog entry. The int that we just
// read is actually the length of a single KeyValue.
KeyValue kv = new KeyValue();
kv.readFields(versionOrLength, in);
this.add(kv);
}
}
public void write(DataOutput out) throws IOException {
out.writeInt(VERSION_2);
out.writeInt(kvs.size());
// We interleave the two lists for code simplicity
for (KeyValue kv : kvs) {
kv.write(out);
}
if (scopes == null) {
out.writeInt(0);
} else {
out.writeInt(scopes.size());
for (byte[] key : scopes.keySet()) {
Bytes.writeByteArray(out, key);
out.writeInt(scopes.get(key));
}
}
}
public long heapSize() {
long ret = 0;
for (KeyValue kv : kvs) {
ret += kv.heapSize();
}
if (scopes != null) {
ret += ClassSize.TREEMAP;
ret += ClassSize.align(scopes.size() * ClassSize.MAP_ENTRY);
// TODO this isn't quite right, need help here
}
return ret;
}
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("[#edits: " + kvs.size() + " = <");
for (KeyValue kv : kvs) {
sb.append(kv.toString());
sb.append("; ");
}
if (scopes != null) {
sb.append(" scopes: " + scopes.toString());
}
sb.append(">]");
return sb.toString();
}
}