[GRIFFIN-295] Limit the memory used by test case
The container memory size is 3G in travis, but out test cases always uses more than 3G memory, so `Cannot allocate memory` will be thrown.
```
Java HotSpot(TM) 64-Bit Server VM warning: INFO: os::commit_memory(0x00000000fe980000, 23592960, 0) failed; error='Cannot allocate memory' (errno=12)
#
# There is insufficient memory for the Java Runtime Environment to continue.
# Native memory allocation (mmap) failed to map 23592960 bytes for committing reserved memory.
# An error report file with more information is saved as:
# /home/travis/build/apache/griffin/measure/hs_err_pid11948.log
# [ timer expired, abort... ]
```
There are two kind of programs in our tests, the maven main program and the tests run by maven-surefire-plugin and scalatest-maven-plugin.
If the memory is unlimited, test cases will occupy as much memory as possible especially spark jobs.
Spark jobs will not free the memory until a full GC occurs , even if we have stopped the spark context .so we need to limit the momery used by test cases.
We can limit the maven memory used by set export MAVEN_OPTS=" -Xmx1024m -XX:ReservedCodeCacheSize=128m" , and we can limit the memory used by spark job tests by configuring the maven-surefire-plugin and scalatest-maven-plugin.
For example:
Before we limit the memory used, maven program occupy 1.5G memory and spark job occupy 1.8G memory.
<img width="1153" alt="1" src="https://user-images.githubusercontent.com/3626747/67956554-40108e00-fc2f-11e9-83de-d0840fb42cb7.png">
<img width="1150" alt="2" src="https://user-images.githubusercontent.com/3626747/67956567-46066f00-fc2f-11e9-8a73-6d141be28e70.png">
After we limit the memory used, maven program occupy 1G memory and spark job occupy 1G memory.
<img width="1142" alt="3" src="https://user-images.githubusercontent.com/3626747/67956579-4999f600-fc2f-11e9-9cd4-9032966ca923.png">
<img width="1139" alt="4" src="https://user-images.githubusercontent.com/3626747/67956586-4dc61380-fc2f-11e9-800b-1d26d637a479.png">
Author: wankunde <wankunde@163.com>
Closes #546 from wankunde/testcase_memory_limit.
diff --git a/.travis.yml b/.travis.yml
index 57405c6..68705ab 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -43,6 +43,6 @@
# keep 30, need change according to ci logs.
script:
- export MAVEN_SKIP_RC=true
- - export MAVEN_OPTS=" -Dorg.slf4j.simpleLogger.defaultLogLevel=warn -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"
+ - export MAVEN_OPTS=" -Xmx1g -XX:ReservedCodeCacheSize=128m -Dorg.slf4j.simpleLogger.defaultLogLevel=warn -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"
- mvn -B clean test -Dlogging.level.org.springframework=WARN
diff --git a/measure/pom.xml b/measure/pom.xml
index 7f9f1f1..8fa5f5d 100644
--- a/measure/pom.xml
+++ b/measure/pom.xml
@@ -215,25 +215,15 @@
<scalaVersion>${scala.version}</scalaVersion>
</configuration>
</plugin>
- <!-- enable scalatest -->
- <plugin>
- <groupId>org.scalatest</groupId>
- <artifactId>scalatest-maven-plugin</artifactId>
- <version>1.0</version>
- <executions>
- <execution>
- <id>test</id>
- <goals>
- <goal>test</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
</plugin>
<plugin>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.1.0</version>
diff --git a/measure/src/test/resources/log4j.properties b/measure/src/test/resources/log4j.properties
index bd3f027..3b408db 100644
--- a/measure/src/test/resources/log4j.properties
+++ b/measure/src/test/resources/log4j.properties
@@ -24,4 +24,7 @@
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss.SSS} %-5p [%c] - %m%n
log4j.logger.org.apache=WARN
-log4j.logger.org.spark_project=WARN
\ No newline at end of file
+log4j.logger.org.spark_project=WARN
+
+# for travis test log
+log4j.logger.org.apache.hadoop.hive.metastore=INFO
\ No newline at end of file
diff --git a/measure/src/test/scala/org/apache/griffin/measure/SparkSuiteBase.scala b/measure/src/test/scala/org/apache/griffin/measure/SparkSuiteBase.scala
index 4d6a06f..89e09d8 100644
--- a/measure/src/test/scala/org/apache/griffin/measure/SparkSuiteBase.scala
+++ b/measure/src/test/scala/org/apache/griffin/measure/SparkSuiteBase.scala
@@ -29,29 +29,35 @@
@transient var spark: SparkSession = _
@transient var sc: SparkContext = _
- @transient var checkpointDir: String = _
-
- var conf = new SparkConf(false)
+ @transient var conf: SparkConf = _
override def beforeAll() {
super.beforeAll()
cleanTestHiveData()
- this.spark = SparkSession.builder
+ conf = new SparkConf(false)
+ spark = SparkSession.builder
.master("local[4]")
.appName("Griffin Job Suite")
.config(conf)
.enableHiveSupport()
.getOrCreate()
- sc = this.spark.sparkContext
+ sc = spark.sparkContext
}
override def afterAll() {
try {
+ spark.sparkContext.stop()
SparkSession.clearActiveSession()
if (spark != null) {
spark.stop()
}
spark = null
+ if (sc != null) {
+ sc.stop()
+ }
+ sc = null
+ conf = null
+
cleanTestHiveData()
} finally {
super.afterAll()
diff --git a/measure/src/test/scala/org/apache/griffin/measure/job/BatchDQAppTest.scala b/measure/src/test/scala/org/apache/griffin/measure/job/BatchDQAppTest.scala
index c1981af..ae0bfb8 100644
--- a/measure/src/test/scala/org/apache/griffin/measure/job/BatchDQAppTest.scala
+++ b/measure/src/test/scala/org/apache/griffin/measure/job/BatchDQAppTest.scala
@@ -19,15 +19,17 @@
package org.apache.griffin.measure.job
import scala.util.{Failure, Success, Try}
+
import org.apache.griffin.measure.Application.readParamFile
-import org.apache.griffin.measure.SparkSuiteBase
import org.apache.griffin.measure.configuration.dqdefinition.EnvConfig
import org.apache.griffin.measure.launch.batch.BatchDQApp
import org.apache.griffin.measure.step.builder.udf.GriffinUDFAgent
-class BatchDQAppTest extends DQAppTest with SparkSuiteBase {
+class BatchDQAppTest extends DQAppTest {
override def beforeAll(): Unit = {
+ super.beforeAll()
+
envParam = readParamFile[EnvConfig](getConfigFilePath("/env-batch.json")) match {
case Success(p) => p
case Failure(ex) =>
@@ -38,18 +40,14 @@
sparkParam = envParam.getSparkParam
Try {
- // build spark 2.0+ application context
- conf.setAppName("BatchDQApp Test")
- conf.setAll(sparkParam.getConfig)
- conf.set("spark.sql.crossJoin.enabled", "true")
+ sparkParam.getConfig.foreach { case (k, v) => spark.conf.set(k, v) }
+ spark.conf.set("spark.app.name", "BatchDQApp Test")
+ spark.conf.set("spark.sql.crossJoin.enabled", "true")
- super.beforeAll()
-
- sparkSession = spark
val logLevel = getGriffinLogLevel()
- sparkSession.sparkContext.setLogLevel(sparkParam.getLogLevel)
+ sc.setLogLevel(sparkParam.getLogLevel)
griffinLogger.setLevel(logLevel)
- val sqlContext = sparkSession.sqlContext
+ val sqlContext = spark.sqlContext
// register udf
GriffinUDFAgent.register(sqlContext)
diff --git a/measure/src/test/scala/org/apache/griffin/measure/job/DQAppTest.scala b/measure/src/test/scala/org/apache/griffin/measure/job/DQAppTest.scala
index ce38408..fe47213 100644
--- a/measure/src/test/scala/org/apache/griffin/measure/job/DQAppTest.scala
+++ b/measure/src/test/scala/org/apache/griffin/measure/job/DQAppTest.scala
@@ -18,24 +18,26 @@
*/
package org.apache.griffin.measure.job
-import scala.util.{Failure, Success}
+import scala.util.Failure
+import scala.util.Success
-import org.apache.spark.sql.SparkSession
-import org.scalatest.{FlatSpec, Matchers}
+import org.scalatest.BeforeAndAfterAll
+import org.scalatest.FlatSpec
+import org.scalatest.Matchers
import org.apache.griffin.measure.Application._
import org.apache.griffin.measure.Loggable
+import org.apache.griffin.measure.SparkSuiteBase
import org.apache.griffin.measure.configuration.dqdefinition._
import org.apache.griffin.measure.configuration.enums._
import org.apache.griffin.measure.launch.DQApp
import org.apache.griffin.measure.launch.batch.BatchDQApp
import org.apache.griffin.measure.launch.streaming.StreamingDQApp
-class DQAppTest extends FlatSpec with Matchers with Loggable {
+class DQAppTest extends FlatSpec with SparkSuiteBase with BeforeAndAfterAll with Matchers with Loggable {
var envParam: EnvConfig = _
var sparkParam: SparkParam = _
- var sparkSession: SparkSession = _
var dqApp: DQApp = _
@@ -56,15 +58,14 @@
// choose process
val procType = ProcessType(allParam.getDqConfig.getProcType)
dqApp = procType match {
- case BatchProcessType => new BatchDQApp(allParam)
+ case BatchProcessType => BatchDQApp(allParam)
case StreamingProcessType => StreamingDQApp(allParam)
case _ =>
error(s"${procType} is unsupported process type!")
sys.exit(-4)
}
- dqApp.sparkSession = sparkSession
+ dqApp.sparkSession = spark
dqApp
}
-
}
diff --git a/pom.xml b/pom.xml
index 310d1c5..e96e44d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -115,6 +115,43 @@
<target>${maven.compiler.target}</target>
</configuration>
</plugin>
+ <!-- Surefire runs all Java tests -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <version>2.22.0</version>
+ <configuration>
+ <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
+ <argLine>-ea -Xmx1g -Xss4m -XX:ReservedCodeCacheSize=128m</argLine>
+ </configuration>
+ <executions>
+ <execution>
+ <id>test</id>
+ <goals>
+ <goal>test</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <!-- Scalatest runs all Scala tests -->
+ <!-- enable scalatest -->
+ <plugin>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest-maven-plugin</artifactId>
+ <version>1.0</version>
+ <configuration>
+ <argLine>-ea -Xmx1g -Xss4m -XX:ReservedCodeCacheSize=128m</argLine>
+ </configuration>
+ <executions>
+ <execution>
+ <id>test</id>
+ <goals>
+ <goal>test</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>