SAMZA-847: update master branch to use samza 0.10.0
diff --git a/bin/grid b/bin/grid
index 3d0e66e..042cabe 100755
--- a/bin/grid
+++ b/bin/grid
@@ -36,7 +36,7 @@
SYSTEM=$2
DOWNLOAD_KAFKA=http://www.us.apache.org/dist/kafka/0.8.2.1/kafka_2.10-0.8.2.1.tgz
-DOWNLOAD_YARN=https://archive.apache.org/dist/hadoop/common/hadoop-2.4.0/hadoop-2.4.0.tar.gz
+DOWNLOAD_YARN=https://archive.apache.org/dist/hadoop/common/hadoop-2.6.1/hadoop-2.6.1.tar.gz
DOWNLOAD_ZOOKEEPER=http://archive.apache.org/dist/zookeeper/zookeeper-3.4.3/zookeeper-3.4.3.tar.gz
bootstrap() {
@@ -50,11 +50,28 @@
}
install_all() {
+ $DIR/grid install samza
$DIR/grid install zookeeper
$DIR/grid install yarn
$DIR/grid install kafka
}
+install_samza() {
+ mkdir -p "$DEPLOY_ROOT_DIR"
+ if [ -d "$DOWNLOAD_CACHE_DIR/samza/.git" ]; then
+ pushd "$DOWNLOAD_CACHE_DIR/samza"
+ git fetch origin
+ git reset --hard origin/master
+ else
+ mkdir -p $DOWNLOAD_CACHE_DIR
+ pushd $DOWNLOAD_CACHE_DIR
+ git clone git://git.apache.org/samza.git
+ cd samza
+ fi
+ ./gradlew -PscalaVersion=2.10 clean publishToMavenLocal
+ popd
+}
+
install_zookeeper() {
mkdir -p "$DEPLOY_ROOT_DIR"
install zookeeper $DOWNLOAD_ZOOKEEPER zookeeper-3.4.3
@@ -63,7 +80,7 @@
install_yarn() {
mkdir -p "$DEPLOY_ROOT_DIR"
- install yarn $DOWNLOAD_YARN hadoop-2.4.0
+ install yarn $DOWNLOAD_YARN hadoop-2.6.1
cp "$BASE_DIR/conf/yarn-site.xml" "$DEPLOY_ROOT_DIR/yarn/etc/hadoop/yarn-site.xml"
if [ ! -f "$HOME/.samza/conf/yarn-site.xml" ]; then
mkdir -p "$HOME/.samza/conf"
@@ -181,7 +198,7 @@
echo
echo " $ grid"
echo " $ grid bootstrap"
- echo " $ grid install [yarn|kafka|zookeeper|all]"
+ echo " $ grid install [yarn|kafka|zookeeper|samza|all]"
echo " $ grid start [yarn|kafka|zookeeper|all]"
echo " $ grid stop [yarn|kafka|zookeeper|all]"
echo
diff --git a/pom.xml b/pom.xml
index f9c4fa9..1c455ec 100644
--- a/pom.xml
+++ b/pom.xml
@@ -27,7 +27,7 @@
<groupId>org.apache.samza</groupId>
<artifactId>hello-samza</artifactId>
- <version>0.9.1</version>
+ <version>0.10.0</version>
<packaging>jar</packaging>
<name>Samza Example</name>
<description>
@@ -106,14 +106,14 @@
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
- <version>2.4.0</version>
+ <version>2.6.1</version>
</dependency>
</dependencies>
<properties>
<!-- maven specific properties -->
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
- <samza.version>0.9.1</samza.version>
+ <samza.version>0.10.0</samza.version>
</properties>
<developers>
@@ -204,8 +204,8 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
</configuration>
</plugin>
<plugin>
diff --git a/src/main/config/wikipedia-feed.properties b/src/main/config/wikipedia-feed.properties
index ce97357..180d749 100644
--- a/src/main/config/wikipedia-feed.properties
+++ b/src/main/config/wikipedia-feed.properties
@@ -39,3 +39,9 @@
systems.kafka.samza.msg.serde=json
systems.kafka.consumer.zookeeper.connect=localhost:2181/
systems.kafka.producer.bootstrap.servers=localhost:9092
+
+# Job Coordinator
+job.coordinator.system=kafka
+# Add configuration to disable checkpointing for this job once it is available in the Coordinator Stream model
+# See https://issues.apache.org/jira/browse/SAMZA-465?focusedCommentId=14533346&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-14533346 for more details
+job.coordinator.replication.factor=1
diff --git a/src/main/config/wikipedia-parser.properties b/src/main/config/wikipedia-parser.properties
index d80d59b..6d1e3df 100644
--- a/src/main/config/wikipedia-parser.properties
+++ b/src/main/config/wikipedia-parser.properties
@@ -25,10 +25,6 @@
# Task
task.class=samza.examples.wikipedia.task.WikipediaParserStreamTask
task.inputs=kafka.wikipedia-raw
-task.checkpoint.factory=org.apache.samza.checkpoint.kafka.KafkaCheckpointManagerFactory
-task.checkpoint.system=kafka
-# Normally, this would be 3, but we have only one broker.
-task.checkpoint.replication.factor=1
# Metrics
metrics.reporters=snapshot,jmx
@@ -47,3 +43,8 @@
systems.kafka.consumer.zookeeper.connect=localhost:2181/
systems.kafka.consumer.auto.offset.reset=largest
systems.kafka.producer.bootstrap.servers=localhost:9092
+
+# Job Coordinator
+job.coordinator.system=kafka
+# Normally, this would be 3, but we have only one broker.
+job.coordinator.replication.factor=1
diff --git a/src/main/config/wikipedia-stats.properties b/src/main/config/wikipedia-stats.properties
index 1ff3552..13bab64 100644
--- a/src/main/config/wikipedia-stats.properties
+++ b/src/main/config/wikipedia-stats.properties
@@ -55,3 +55,8 @@
# Normally, we'd set this much higher, but we want things to look snappy in the demo.
stores.wikipedia-stats.write.batch.size=0
stores.wikipedia-stats.object.cache.size=0
+
+# Job Coordinator
+job.coordinator.system=kafka
+# Normally, this would be 3, but we have only one broker.
+job.coordinator.replication.factor=1