[LIVY-472][SERVER] Improve the logs for fail-to-create session
## What changes were proposed in this pull request?
Livy currently doesn't give a very clear log about the fail-to-create session, it only says that session related app tag cannot be found in RM, but doesn't tell user how to search and get the true root cause. So here change the logs to make it more clear.
## How was this patch tested?
Local verification.
Author: jerryshao <sshao@hortonworks.com>
Closes #96 from jerryshao/LIVY-472.
diff --git a/server/src/main/scala/org/apache/livy/server/interactive/InteractiveSessionServlet.scala b/server/src/main/scala/org/apache/livy/server/interactive/InteractiveSessionServlet.scala
index 54046a1..4d614f4 100644
--- a/server/src/main/scala/org/apache/livy/server/interactive/InteractiveSessionServlet.scala
+++ b/server/src/main/scala/org/apache/livy/server/interactive/InteractiveSessionServlet.scala
@@ -70,7 +70,7 @@
Option(session.logLines())
.map { lines =>
val size = 10
- var from = math.max(0, lines.length - size)
+ val from = math.max(0, lines.length - size)
val until = from + size
lines.view(from, until)
diff --git a/server/src/main/scala/org/apache/livy/utils/SparkYarnApp.scala b/server/src/main/scala/org/apache/livy/utils/SparkYarnApp.scala
index 91c70ca..d255796 100644
--- a/server/src/main/scala/org/apache/livy/utils/SparkYarnApp.scala
+++ b/server/src/main/scala/org/apache/livy/utils/SparkYarnApp.scala
@@ -25,6 +25,7 @@
import scala.concurrent.duration._
import scala.language.postfixOps
import scala.util.Try
+import scala.util.control.NonFatal
import org.apache.hadoop.yarn.api.records.{ApplicationId, ApplicationReport, FinalApplicationStatus, YarnApplicationState}
import org.apache.hadoop.yarn.client.api.YarnClient
@@ -70,7 +71,7 @@
override def run(): Unit = {
while (true) {
if (!leakedAppTags.isEmpty) {
- // kill the app if found it and remove it if exceeding a threashold
+ // kill the app if found it and remove it if exceeding a threshold
val iter = leakedAppTags.entrySet().iterator()
var isRemoved = false
val now = System.currentTimeMillis()
@@ -179,9 +180,11 @@
if (deadline.isOverdue) {
process.foreach(_.destroy())
leakedAppTags.put(appTag, System.currentTimeMillis())
- throw new Exception(s"No YARN application is found with tag $appTagLowerCase in " +
- livyConf.getTimeAsMs(LivyConf.YARN_APP_LOOKUP_TIMEOUT)/1000 + " seconds. " +
- "Please check your cluster status, it is may be very busy.")
+ throw new IllegalStateException(s"No YARN application is found with tag" +
+ s" $appTagLowerCase in ${livyConf.getTimeAsMs(LivyConf.YARN_APP_LOOKUP_TIMEOUT)/1000}" +
+ " seconds. This may be because 1) spark-submit fail to submit application to YARN; " +
+ "or 2) YARN cluster doesn't have enough resources to start the application in time. " +
+ "Please check Livy log and YARN log to know the details.")
} else {
Clock.sleep(pollInterval.toMillis)
getAppIdFromTag(appTagLowerCase, pollInterval, deadline)
@@ -290,12 +293,12 @@
debug(s"$appId $state ${yarnDiagnostics.mkString(" ")}")
} catch {
- case e: InterruptedException =>
+ case _: InterruptedException =>
yarnDiagnostics = ArrayBuffer("Session stopped by user.")
changeState(SparkApp.State.KILLED)
- case e: Throwable =>
- error(s"Error whiling refreshing YARN state: $e")
- yarnDiagnostics = ArrayBuffer(e.toString, e.getStackTrace().mkString(" "))
+ case NonFatal(e) =>
+ error(s"Error whiling refreshing YARN state", e)
+ yarnDiagnostics = ArrayBuffer(e.getMessage)
changeState(SparkApp.State.FAILED)
}
}