indexing-hadoop/src/main/java/org/apache/druid/indexer/HadoopDruidDetermineConfigurationJob.java - druid - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.apache.druid.indexer;

 import com.google.common.collect.Lists;
 import com.google.inject.Inject;
 import org.apache.druid.indexer.partitions.HashedPartitionsSpec;
 import org.apache.druid.indexer.partitions.PartitionsSpec;
 import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec;
 import org.apache.druid.java.util.common.ISE;
 import org.apache.druid.java.util.common.logger.Logger;
 import org.apache.druid.timeline.partition.HashBasedNumberedShardSpec;
 import org.apache.druid.timeline.partition.HashPartitionFunction;
 import org.joda.time.DateTime;
 import org.joda.time.Interval;

 import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;

 /**
  */
 public class HadoopDruidDetermineConfigurationJob implements Jobby
 {
   private static final Logger log = new Logger(HadoopDruidDetermineConfigurationJob.class);
   private final HadoopDruidIndexerConfig config;
   private Jobby job;
   private String hadoopJobIdFile;

   @Inject
   public HadoopDruidDetermineConfigurationJob(HadoopDruidIndexerConfig config)
   {
     this.config = config;
   }

   @Override
   public boolean run()
   {
     JobHelper.ensurePaths(config);

     if (config.isDeterminingPartitions()) {
       job = createPartitionJob(config);
       config.setHadoopJobIdFileName(hadoopJobIdFile);
       return JobHelper.runSingleJob(job, config);
     } else {
       final PartitionsSpec partitionsSpec = config.getPartitionsSpec();
       final int shardsPerInterval;
       final HashPartitionFunction partitionFunction;
       if (partitionsSpec instanceof HashedPartitionsSpec) {
         final HashedPartitionsSpec hashedPartitionsSpec = (HashedPartitionsSpec) partitionsSpec;
         shardsPerInterval = PartitionsSpec.isEffectivelyNull(hashedPartitionsSpec.getNumShards())
                             ? 1
                             : hashedPartitionsSpec.getNumShards();
         partitionFunction = hashedPartitionsSpec.getPartitionFunction();
       } else {
         shardsPerInterval = 1;
         partitionFunction = null;
       }
       Map<Long, List<HadoopyShardSpec>> shardSpecs = new TreeMap<>();
       int shardCount = 0;
       for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) {
         DateTime bucket = segmentGranularity.getStart();
         // negative shardsPerInterval means a single shard
         List<HadoopyShardSpec> specs = Lists.newArrayListWithCapacity(shardsPerInterval);
         for (int i = 0; i < shardsPerInterval; i++) {
           specs.add(
               new HadoopyShardSpec(
                   new HashBasedNumberedShardSpec(
                       i,
                       shardsPerInterval,
                       i,
                       shardsPerInterval,
                       config.getPartitionsSpec().getPartitionDimensions(),
                       partitionFunction,
                       HadoopDruidIndexerConfig.JSON_MAPPER
                   ),
                   shardCount++
               )
           );
         }
         shardSpecs.put(bucket.getMillis(), specs);
         log.info("DateTime[%s], spec[%s]", bucket, specs);
       }
       config.setShardSpecs(shardSpecs);
       return true;
     }
   }

   private static Jobby createPartitionJob(HadoopDruidIndexerConfig config)
   {
     final PartitionsSpec partitionsSpec = config.getPartitionsSpec();
     if (partitionsSpec instanceof HashedPartitionsSpec) {
       return new DetermineHashedPartitionsJob(config);
     } else if (partitionsSpec instanceof SingleDimensionPartitionsSpec) {
       return new DeterminePartitionsJob(config);
     } else {
       throw new ISE("Unknown partitionsSpec[%s]", partitionsSpec);
     }
   }

   @Override
   public Map<String, Object> getStats()
   {
     if (job == null) {
       return null;
     }

     return job.getStats();
   }

   @Override
   public String getErrorMessage()
   {
     if (job == null) {
       return null;
     }

     return job.getErrorMessage();
   }

   public void setHadoopJobIdFile(String hadoopJobIdFile)
   {
     this.hadoopJobIdFile = hadoopJobIdFile;
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	package org.apache.druid.indexer;

	import com.google.common.collect.Lists;
	import com.google.inject.Inject;
	import org.apache.druid.indexer.partitions.HashedPartitionsSpec;
	import org.apache.druid.indexer.partitions.PartitionsSpec;
	import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec;
	import org.apache.druid.java.util.common.ISE;
	import org.apache.druid.java.util.common.logger.Logger;
	import org.apache.druid.timeline.partition.HashBasedNumberedShardSpec;
	import org.apache.druid.timeline.partition.HashPartitionFunction;
	import org.joda.time.DateTime;
	import org.joda.time.Interval;

	import java.util.List;
	import java.util.Map;
	import java.util.TreeMap;

	/**
	*/
	public class HadoopDruidDetermineConfigurationJob implements Jobby
	{
	private static final Logger log = new Logger(HadoopDruidDetermineConfigurationJob.class);
	private final HadoopDruidIndexerConfig config;
	private Jobby job;
	private String hadoopJobIdFile;

	@Inject
	public HadoopDruidDetermineConfigurationJob(HadoopDruidIndexerConfig config)
	{
	this.config = config;
	}

	@Override
	public boolean run()
	{
	JobHelper.ensurePaths(config);

	if (config.isDeterminingPartitions()) {
	job = createPartitionJob(config);
	config.setHadoopJobIdFileName(hadoopJobIdFile);
	return JobHelper.runSingleJob(job, config);
	} else {
	final PartitionsSpec partitionsSpec = config.getPartitionsSpec();
	final int shardsPerInterval;
	final HashPartitionFunction partitionFunction;
	if (partitionsSpec instanceof HashedPartitionsSpec) {
	final HashedPartitionsSpec hashedPartitionsSpec = (HashedPartitionsSpec) partitionsSpec;
	shardsPerInterval = PartitionsSpec.isEffectivelyNull(hashedPartitionsSpec.getNumShards())
	? 1
	: hashedPartitionsSpec.getNumShards();
	partitionFunction = hashedPartitionsSpec.getPartitionFunction();
	} else {
	shardsPerInterval = 1;
	partitionFunction = null;
	}
	Map<Long, List<HadoopyShardSpec>> shardSpecs = new TreeMap<>();
	int shardCount = 0;
	for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) {
	DateTime bucket = segmentGranularity.getStart();
	// negative shardsPerInterval means a single shard
	List<HadoopyShardSpec> specs = Lists.newArrayListWithCapacity(shardsPerInterval);
	for (int i = 0; i < shardsPerInterval; i++) {
	specs.add(
	new HadoopyShardSpec(
	new HashBasedNumberedShardSpec(
	i,
	shardsPerInterval,
	i,
	shardsPerInterval,
	config.getPartitionsSpec().getPartitionDimensions(),
	partitionFunction,
	HadoopDruidIndexerConfig.JSON_MAPPER
	),
	shardCount++
	)
	);
	}
	shardSpecs.put(bucket.getMillis(), specs);
	log.info("DateTime[%s], spec[%s]", bucket, specs);
	}
	config.setShardSpecs(shardSpecs);
	return true;
	}
	}

	private static Jobby createPartitionJob(HadoopDruidIndexerConfig config)
	{
	final PartitionsSpec partitionsSpec = config.getPartitionsSpec();
	if (partitionsSpec instanceof HashedPartitionsSpec) {
	return new DetermineHashedPartitionsJob(config);
	} else if (partitionsSpec instanceof SingleDimensionPartitionsSpec) {
	return new DeterminePartitionsJob(config);
	} else {
	throw new ISE("Unknown partitionsSpec[%s]", partitionsSpec);
	}
	}

	@Override
	public Map<String, Object> getStats()
	{
	if (job == null) {
	return null;
	}

	return job.getStats();
	}

	@Override
	public String getErrorMessage()
	{
	if (job == null) {
	return null;
	}

	return job.getErrorMessage();
	}

	public void setHadoopJobIdFile(String hadoopJobIdFile)
	{
	this.hadoopJobIdFile = hadoopJobIdFile;
	}
	}