blob: 14af223ced62ef33ecc9441b3896f2ff03d3f52c [file] [log] [blame]
;; Licensed to the Apache Software Foundation (ASF) under one
;; or more contributor license agreements. See the NOTICE file
;; distributed with this work for additional information
;; regarding copyright ownership. The ASF licenses this file
;; to you under the Apache License, Version 2.0 (the
;; "License"); you may not use this file except in compliance
;; with the License. You may obtain a copy of the License at
;;
;; http://www.apache.org/licenses/LICENSE-2.0
;;
;; Unless required by applicable law or agreed to in writing, software
;; distributed under the License is distributed on an "AS IS" BASIS,
;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;; See the License for the specific language governing permissions and
;; limitations under the License.
(ns backtype.storm.command.healthcheck
(:require [backtype.storm
[config :refer :all]
[log :refer :all]]
[clojure.java [io :as io]]
[clojure [string :refer [split]]])
(:gen-class))
(defn interrupter
"Interrupt a given thread after ms milliseconds."
[thread ms]
(let [interrupter (Thread.
(fn []
(try
(Thread/sleep ms)
(.interrupt thread)
(catch InterruptedException e))))]
(.start interrupter)
interrupter))
(defn check-output [lines]
(if (some #(.startsWith % "ERROR") lines)
:failed
:success))
(defn process-script [conf script]
(let [script-proc (. (Runtime/getRuntime) (exec script))
curthread (Thread/currentThread)
interrupter-thread (interrupter curthread
(conf STORM-HEALTH-CHECK-TIMEOUT-MS))]
(try
(.waitFor script-proc)
(.interrupt interrupter-thread)
(if (not (= (.exitValue script-proc) 0))
:failed_with_exit_code
(check-output (split
(slurp (.getInputStream script-proc))
#"\n+")))
(catch InterruptedException e
(println "Script" script "timed out.")
:timeout)
(catch Exception e
(println "Script failed with exception: " e)
:failed_with_exception)
(finally (.interrupt interrupter-thread)))))
(defn health-check [conf]
(let [health-dir (absolute-healthcheck-dir conf)
health-files (file-seq (io/file health-dir))
health-scripts (filter #(and (.canExecute %)
(not (.isDirectory %)))
health-files)
results (->> health-scripts
(map #(.getAbsolutePath %))
(map (partial process-script conf)))]
(log-message
(pr-str (map #'vector
(map #(.getAbsolutePath %) health-scripts)
results)))
; failed_with_exit_code is OK. We're mimicing Hadoop's health checks.
; We treat non-zero exit codes as indicators that the scripts failed
; to execute properly, not that the system is unhealthy, in which case
; we don't want to start killing things.
(if (every? #(or (= % :failed_with_exit_code)
(= % :success))
results)
0
1)))
(defn -main [& args]
(let [conf (read-storm-config)]
(System/exit
(health-check conf))))