test/src/main/java/org/apache/accumulo/test/continuous/ContinuousIngest.java - accumulo - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.accumulo.test.continuous;

 import static java.nio.charset.StandardCharsets.UTF_8;

 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Random;
 import java.util.UUID;
 import java.util.zip.CRC32;
 import java.util.zip.Checksum;

 import org.apache.accumulo.core.cli.BatchWriterOpts;
 import org.apache.accumulo.core.cli.ClientOnDefaultTable;
 import org.apache.accumulo.core.client.BatchWriter;
 import org.apache.accumulo.core.client.Connector;
 import org.apache.accumulo.core.client.MutationsRejectedException;
 import org.apache.accumulo.core.client.TableNotFoundException;
 import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.security.ColumnVisibility;
 import org.apache.accumulo.core.trace.CountSampler;
 import org.apache.accumulo.core.trace.Trace;
 import org.apache.accumulo.core.util.FastFormat;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;

 public class ContinuousIngest {

   private static final byte[] EMPTY_BYTES = new byte[0];

   private static List<ColumnVisibility> visibilities;

   private static void initVisibilities(ContinuousOpts opts) throws Exception {
     if (opts.visFile == null) {
       visibilities = Collections.singletonList(new ColumnVisibility());
       return;
     }

     visibilities = readVisFromFile(opts.visFile);
   }

   public static List<ColumnVisibility> readVisFromFile(String visFile) {
     List<ColumnVisibility> vis = new ArrayList<>();

     try (BufferedReader in = new BufferedReader(new InputStreamReader(
         FileSystem.get(new Configuration()).open(new Path(visFile)), UTF_8))) {
       String line;
       while ((line = in.readLine()) != null) {
         vis.add(new ColumnVisibility(line));
       }
     } catch (IOException e) {
       System.out.println("ERROR reading visFile " + visFile + ": ");
       e.printStackTrace();
     }
     return vis;
   }

   private static ColumnVisibility getVisibility(Random rand) {
     return visibilities.get(rand.nextInt(visibilities.size()));
   }

   public static void main(String[] args) throws Exception {

     ContinuousOpts opts = new ContinuousOpts();
     BatchWriterOpts bwOpts = new BatchWriterOpts();
     ClientOnDefaultTable clientOpts = new ClientOnDefaultTable("ci");
     clientOpts.parseArgs(ContinuousIngest.class.getName(), args, bwOpts, opts);

     initVisibilities(opts);

     if (opts.min < 0 || opts.max < 0 || opts.max <= opts.min) {
       throw new IllegalArgumentException("bad min and max");
     }
     Connector conn = clientOpts.getConnector();

     if (!conn.tableOperations().exists(clientOpts.getTableName())) {
       throw new TableNotFoundException(null, clientOpts.getTableName(),
           "Consult the README and create the table before starting ingest.");
     }

     BatchWriter bw =
         conn.createBatchWriter(clientOpts.getTableName(), bwOpts.getBatchWriterConfig());
     bw = Trace.wrapAll(bw, new CountSampler(1024));

     Random r = new Random();

     byte[] ingestInstanceId = UUID.randomUUID().toString().getBytes(UTF_8);

     System.out.printf("UUID %d %s%n", System.currentTimeMillis(),
         new String(ingestInstanceId, UTF_8));

     long count = 0;
     final int flushInterval = 1000000;
     final int maxDepth = 25;

     // always want to point back to flushed data. This way the previous item should
     // always exist in accumulo when verifying data. To do this make insert N point
     // back to the row from insert (N - flushInterval). The array below is used to keep
     // track of this.
     long prevRows[] = new long[flushInterval];
     long firstRows[] = new long[flushInterval];
     int firstColFams[] = new int[flushInterval];
     int firstColQuals[] = new int[flushInterval];

     long lastFlushTime = System.currentTimeMillis();

     out: while (true) {
       // generate first set of nodes
       ColumnVisibility cv = getVisibility(r);

       for (int index = 0; index < flushInterval; index++) {
         long rowLong = genLong(opts.min, opts.max, r);
         prevRows[index] = rowLong;
         firstRows[index] = rowLong;

         int cf = r.nextInt(opts.maxColF);
         int cq = r.nextInt(opts.maxColQ);

         firstColFams[index] = cf;
         firstColQuals[index] = cq;

         Mutation m =
             genMutation(rowLong, cf, cq, cv, ingestInstanceId, count, null, r, opts.checksum);
         count++;
         bw.addMutation(m);
       }

       lastFlushTime = flush(bw, count, flushInterval, lastFlushTime);
       if (count >= opts.num)
         break out;

       // generate subsequent sets of nodes that link to previous set of nodes
       for (int depth = 1; depth < maxDepth; depth++) {
         for (int index = 0; index < flushInterval; index++) {
           long rowLong = genLong(opts.min, opts.max, r);
           byte[] prevRow = genRow(prevRows[index]);
           prevRows[index] = rowLong;
           Mutation m = genMutation(rowLong, r.nextInt(opts.maxColF), r.nextInt(opts.maxColQ), cv,
               ingestInstanceId, count, prevRow, r, opts.checksum);
           count++;
           bw.addMutation(m);
         }

         lastFlushTime = flush(bw, count, flushInterval, lastFlushTime);
         if (count >= opts.num)
           break out;
       }

       // create one big linked list, this makes all of the first inserts
       // point to something
       for (int index = 0; index < flushInterval - 1; index++) {
         Mutation m = genMutation(firstRows[index], firstColFams[index], firstColQuals[index], cv,
             ingestInstanceId, count, genRow(prevRows[index + 1]), r, opts.checksum);
         count++;
         bw.addMutation(m);
       }
       lastFlushTime = flush(bw, count, flushInterval, lastFlushTime);
       if (count >= opts.num)
         break out;
     }

     bw.close();
     clientOpts.stopTracing();
   }

   private static long flush(BatchWriter bw, long count, final int flushInterval, long lastFlushTime)
       throws MutationsRejectedException {
     long t1 = System.currentTimeMillis();
     bw.flush();
     long t2 = System.currentTimeMillis();
     System.out.printf("FLUSH %d %d %d %d %d%n", t2, (t2 - lastFlushTime), (t2 - t1), count,
         flushInterval);
     lastFlushTime = t2;
     return lastFlushTime;
   }

   public static Mutation genMutation(long rowLong, int cfInt, int cqInt, ColumnVisibility cv,
       byte[] ingestInstanceId, long count, byte[] prevRow, Random r, boolean checksum) {
     // Adler32 is supposed to be faster, but according to wikipedia is not good for small data....
     // so used CRC32 instead
     CRC32 cksum = null;

     byte[] rowString = genRow(rowLong);

     byte[] cfString = FastFormat.toZeroPaddedString(cfInt, 4, 16, EMPTY_BYTES);
     byte[] cqString = FastFormat.toZeroPaddedString(cqInt, 4, 16, EMPTY_BYTES);

     if (checksum) {
       cksum = new CRC32();
       cksum.update(rowString);
       cksum.update(cfString);
       cksum.update(cqString);
       cksum.update(cv.getExpression());
     }

     Mutation m = new Mutation(new Text(rowString));

     m.put(new Text(cfString), new Text(cqString), cv,
         new Value(createValue(ingestInstanceId, count, prevRow, cksum)));
     return m;
   }

   public static byte[] genCol(int cfInt) {
     return FastFormat.toZeroPaddedString(cfInt, 4, 16, EMPTY_BYTES);
   }

   public static final long genLong(long min, long max, Random r) {
     return ((r.nextLong() & 0x7fffffffffffffffl) % (max - min)) + min;
   }

   static final byte[] genRow(long min, long max, Random r) {
     return genRow(genLong(min, max, r));
   }

   static final byte[] genRow(long rowLong) {
     return FastFormat.toZeroPaddedString(rowLong, 16, 16, EMPTY_BYTES);
   }

   static byte[] createValue(byte[] ingestInstanceId, long count, byte[] prevRow, Checksum cksum) {
     int dataLen = ingestInstanceId.length + 16 + (prevRow == null ? 0 : prevRow.length) + 3;
     if (cksum != null)
       dataLen += 8;
     byte val[] = new byte[dataLen];
     System.arraycopy(ingestInstanceId, 0, val, 0, ingestInstanceId.length);
     int index = ingestInstanceId.length;
     val[index++] = ':';
     int added = FastFormat.toZeroPaddedString(val, index, count, 16, 16, EMPTY_BYTES);
     if (added != 16)
       throw new RuntimeException(" " + added);
     index += 16;
     val[index++] = ':';
     if (prevRow != null) {
       System.arraycopy(prevRow, 0, val, index, prevRow.length);
       index += prevRow.length;
     }

     val[index++] = ':';

     if (cksum != null) {
       cksum.update(val, 0, index);
       cksum.getValue();
       FastFormat.toZeroPaddedString(val, index, cksum.getValue(), 8, 16, EMPTY_BYTES);
     }

     // System.out.println("val "+new String(val));

     return val;
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.accumulo.test.continuous;

	import static java.nio.charset.StandardCharsets.UTF_8;

	import java.io.BufferedReader;
	import java.io.IOException;
	import java.io.InputStreamReader;
	import java.util.ArrayList;
	import java.util.Collections;
	import java.util.List;
	import java.util.Random;
	import java.util.UUID;
	import java.util.zip.CRC32;
	import java.util.zip.Checksum;

	import org.apache.accumulo.core.cli.BatchWriterOpts;
	import org.apache.accumulo.core.cli.ClientOnDefaultTable;
	import org.apache.accumulo.core.client.BatchWriter;
	import org.apache.accumulo.core.client.Connector;
	import org.apache.accumulo.core.client.MutationsRejectedException;
	import org.apache.accumulo.core.client.TableNotFoundException;
	import org.apache.accumulo.core.data.Mutation;
	import org.apache.accumulo.core.data.Value;
	import org.apache.accumulo.core.security.ColumnVisibility;
	import org.apache.accumulo.core.trace.CountSampler;
	import org.apache.accumulo.core.trace.Trace;
	import org.apache.accumulo.core.util.FastFormat;
	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.fs.FileSystem;
	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.io.Text;

	public class ContinuousIngest {

	private static final byte[] EMPTY_BYTES = new byte[0];

	private static List<ColumnVisibility> visibilities;

	private static void initVisibilities(ContinuousOpts opts) throws Exception {
	if (opts.visFile == null) {
	visibilities = Collections.singletonList(new ColumnVisibility());
	return;
	}

	visibilities = readVisFromFile(opts.visFile);
	}

	public static List<ColumnVisibility> readVisFromFile(String visFile) {
	List<ColumnVisibility> vis = new ArrayList<>();

	try (BufferedReader in = new BufferedReader(new InputStreamReader(
	FileSystem.get(new Configuration()).open(new Path(visFile)), UTF_8))) {
	String line;
	while ((line = in.readLine()) != null) {
	vis.add(new ColumnVisibility(line));
	}
	} catch (IOException e) {
	System.out.println("ERROR reading visFile " + visFile + ": ");
	e.printStackTrace();
	}
	return vis;
	}

	private static ColumnVisibility getVisibility(Random rand) {
	return visibilities.get(rand.nextInt(visibilities.size()));
	}

	public static void main(String[] args) throws Exception {

	ContinuousOpts opts = new ContinuousOpts();
	BatchWriterOpts bwOpts = new BatchWriterOpts();
	ClientOnDefaultTable clientOpts = new ClientOnDefaultTable("ci");
	clientOpts.parseArgs(ContinuousIngest.class.getName(), args, bwOpts, opts);

	initVisibilities(opts);

	if (opts.min < 0 \|\| opts.max < 0 \|\| opts.max <= opts.min) {
	throw new IllegalArgumentException("bad min and max");
	}
	Connector conn = clientOpts.getConnector();

	if (!conn.tableOperations().exists(clientOpts.getTableName())) {
	throw new TableNotFoundException(null, clientOpts.getTableName(),
	"Consult the README and create the table before starting ingest.");
	}

	BatchWriter bw =
	conn.createBatchWriter(clientOpts.getTableName(), bwOpts.getBatchWriterConfig());
	bw = Trace.wrapAll(bw, new CountSampler(1024));

	Random r = new Random();

	byte[] ingestInstanceId = UUID.randomUUID().toString().getBytes(UTF_8);

	System.out.printf("UUID %d %s%n", System.currentTimeMillis(),
	new String(ingestInstanceId, UTF_8));

	long count = 0;
	final int flushInterval = 1000000;
	final int maxDepth = 25;

	// always want to point back to flushed data. This way the previous item should
	// always exist in accumulo when verifying data. To do this make insert N point
	// back to the row from insert (N - flushInterval). The array below is used to keep
	// track of this.
	long prevRows[] = new long[flushInterval];
	long firstRows[] = new long[flushInterval];
	int firstColFams[] = new int[flushInterval];
	int firstColQuals[] = new int[flushInterval];

	long lastFlushTime = System.currentTimeMillis();

	out: while (true) {
	// generate first set of nodes
	ColumnVisibility cv = getVisibility(r);

	for (int index = 0; index < flushInterval; index++) {
	long rowLong = genLong(opts.min, opts.max, r);
	prevRows[index] = rowLong;
	firstRows[index] = rowLong;

	int cf = r.nextInt(opts.maxColF);
	int cq = r.nextInt(opts.maxColQ);

	firstColFams[index] = cf;
	firstColQuals[index] = cq;

	Mutation m =
	genMutation(rowLong, cf, cq, cv, ingestInstanceId, count, null, r, opts.checksum);
	count++;
	bw.addMutation(m);
	}

	lastFlushTime = flush(bw, count, flushInterval, lastFlushTime);
	if (count >= opts.num)
	break out;

	// generate subsequent sets of nodes that link to previous set of nodes
	for (int depth = 1; depth < maxDepth; depth++) {
	for (int index = 0; index < flushInterval; index++) {
	long rowLong = genLong(opts.min, opts.max, r);
	byte[] prevRow = genRow(prevRows[index]);
	prevRows[index] = rowLong;
	Mutation m = genMutation(rowLong, r.nextInt(opts.maxColF), r.nextInt(opts.maxColQ), cv,
	ingestInstanceId, count, prevRow, r, opts.checksum);
	count++;
	bw.addMutation(m);
	}

	lastFlushTime = flush(bw, count, flushInterval, lastFlushTime);
	if (count >= opts.num)
	break out;
	}

	// create one big linked list, this makes all of the first inserts
	// point to something
	for (int index = 0; index < flushInterval - 1; index++) {
	Mutation m = genMutation(firstRows[index], firstColFams[index], firstColQuals[index], cv,
	ingestInstanceId, count, genRow(prevRows[index + 1]), r, opts.checksum);
	count++;
	bw.addMutation(m);
	}
	lastFlushTime = flush(bw, count, flushInterval, lastFlushTime);
	if (count >= opts.num)
	break out;
	}

	bw.close();
	clientOpts.stopTracing();
	}

	private static long flush(BatchWriter bw, long count, final int flushInterval, long lastFlushTime)
	throws MutationsRejectedException {
	long t1 = System.currentTimeMillis();
	bw.flush();
	long t2 = System.currentTimeMillis();
	System.out.printf("FLUSH %d %d %d %d %d%n", t2, (t2 - lastFlushTime), (t2 - t1), count,
	flushInterval);
	lastFlushTime = t2;
	return lastFlushTime;
	}

	public static Mutation genMutation(long rowLong, int cfInt, int cqInt, ColumnVisibility cv,
	byte[] ingestInstanceId, long count, byte[] prevRow, Random r, boolean checksum) {
	// Adler32 is supposed to be faster, but according to wikipedia is not good for small data....
	// so used CRC32 instead
	CRC32 cksum = null;

	byte[] rowString = genRow(rowLong);

	byte[] cfString = FastFormat.toZeroPaddedString(cfInt, 4, 16, EMPTY_BYTES);
	byte[] cqString = FastFormat.toZeroPaddedString(cqInt, 4, 16, EMPTY_BYTES);

	if (checksum) {
	cksum = new CRC32();
	cksum.update(rowString);
	cksum.update(cfString);
	cksum.update(cqString);
	cksum.update(cv.getExpression());
	}

	Mutation m = new Mutation(new Text(rowString));

	m.put(new Text(cfString), new Text(cqString), cv,
	new Value(createValue(ingestInstanceId, count, prevRow, cksum)));
	return m;
	}

	public static byte[] genCol(int cfInt) {
	return FastFormat.toZeroPaddedString(cfInt, 4, 16, EMPTY_BYTES);
	}

	public static final long genLong(long min, long max, Random r) {
	return ((r.nextLong() & 0x7fffffffffffffffl) % (max - min)) + min;
	}

	static final byte[] genRow(long min, long max, Random r) {
	return genRow(genLong(min, max, r));
	}

	static final byte[] genRow(long rowLong) {
	return FastFormat.toZeroPaddedString(rowLong, 16, 16, EMPTY_BYTES);
	}

	static byte[] createValue(byte[] ingestInstanceId, long count, byte[] prevRow, Checksum cksum) {
	int dataLen = ingestInstanceId.length + 16 + (prevRow == null ? 0 : prevRow.length) + 3;
	if (cksum != null)
	dataLen += 8;
	byte val[] = new byte[dataLen];
	System.arraycopy(ingestInstanceId, 0, val, 0, ingestInstanceId.length);
	int index = ingestInstanceId.length;
	val[index++] = ':';
	int added = FastFormat.toZeroPaddedString(val, index, count, 16, 16, EMPTY_BYTES);
	if (added != 16)
	throw new RuntimeException(" " + added);
	index += 16;
	val[index++] = ':';
	if (prevRow != null) {
	System.arraycopy(prevRow, 0, val, index, prevRow.length);
	index += prevRow.length;
	}

	val[index++] = ':';

	if (cksum != null) {
	cksum.update(val, 0, index);
	cksum.getValue();
	FastFormat.toZeroPaddedString(val, index, cksum.getValue(), 8, 16, EMPTY_BYTES);
	}

	// System.out.println("val "+new String(val));

	return val;
	}
	}