cluster monitoring WIP
diff --git a/modules/cluster-monitoring/pom.xml b/modules/cluster-monitoring/pom.xml
new file mode 100644
index 0000000..0fb9e40
--- /dev/null
+++ b/modules/cluster-monitoring/pom.xml
@@ -0,0 +1,51 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <parent>
+ <artifactId>airavata</artifactId>
+ <groupId>org.apache.airavata</groupId>
+ <version>0.17-SNAPSHOT</version>
+ <relativePath>../../pom.xml</relativePath>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+
+ <artifactId>cluster-monitoring</artifactId>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-io</artifactId>
+ <version>1.3.2</version>
+ </dependency>
+ <dependency>
+ <groupId>com.jcraft</groupId>
+ <artifactId>jsch</artifactId>
+ <version>0.1.50</version>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>jcl-over-slf4j</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.code.gson</groupId>
+ <artifactId>gson</artifactId>
+ <version>2.3.1</version>
+ </dependency>
+ </dependencies>
+
+
+</project>
\ No newline at end of file
diff --git a/modules/cluster-monitoring/src/main/java/org/apache/airavata/cluster/monitoring/ClusterHealthMonitor.java b/modules/cluster-monitoring/src/main/java/org/apache/airavata/cluster/monitoring/ClusterHealthMonitor.java
new file mode 100644
index 0000000..2f1dc1d
--- /dev/null
+++ b/modules/cluster-monitoring/src/main/java/org/apache/airavata/cluster/monitoring/ClusterHealthMonitor.java
@@ -0,0 +1,265 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+package org.apache.airavata.cluster.monitoring;
+
+
+import com.google.gson.Gson;
+import com.google.gson.reflect.TypeToken;
+import com.jcraft.jsch.*;
+import org.apache.commons.io.IOUtils;
+import org.apache.log4j.Logger;
+
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+public class ClusterHealthMonitor {
+ private static final Logger logger = Logger.getLogger(ClusterHealthMonitor.class);
+
+ public static void main(String[] args) throws IOException {
+
+ byte[] publicKeyBytes = IOUtils.toByteArray(ClusterHealthMonitor.class.getResourceAsStream("/id_rsa.pub"));
+ byte[] privateKeyBytes = IOUtils.toByteArray(ClusterHealthMonitor.class.getResourceAsStream("/id_rsa"));
+ String passPhrase = "ultrascan";
+
+ Gson gson = new Gson();
+ List<ComputeResourceProfile> computeResourceProfiles = gson.fromJson(new FileReader(ClusterHealthMonitor.class
+ .getResource("/cluster-properties.json").getFile()), new TypeToken<List<ComputeResourceProfile>>(){}.getType());
+
+ ArrayList<QueueStatus> queueStatuses = new ArrayList<>();
+
+ for(ComputeResourceProfile computeResourceProfile : computeResourceProfiles){
+
+ String userName = computeResourceProfile.getUserName();
+ String hostName = computeResourceProfile.getHostName();
+ int port = computeResourceProfile.getPort();
+
+ try{
+ JSch jsch = new JSch();
+ jsch.addIdentity(hostName, privateKeyBytes, publicKeyBytes, passPhrase.getBytes());
+
+ Session session=jsch.getSession(userName, hostName, port);
+ java.util.Properties config = new java.util.Properties();
+ config.put("StrictHostKeyChecking", "no");
+ session.setConfig(config);
+
+
+ logger.debug("Connected to " + hostName);
+
+ session.connect();
+ for(String queue : computeResourceProfile.getQueueNames()) {
+ String command = "";
+ if (computeResourceProfile.getResourceManagerType().equals("SLURM"))
+ command = "sinfo -s -p " + queue + " -o \"%a %F\" | tail -1";
+ else if (computeResourceProfile.getResourceManagerType().equals("PBS"))
+ command = "qstat -Q " + queue + "| tail -1";
+
+ if (command.equals("")) {
+ logger.warn("No matching resource manager type found for " + computeResourceProfile.getResourceManagerType());
+ continue;
+ }
+
+ Channel channel = session.openChannel("exec");
+ ((ChannelExec) channel).setCommand(command);
+ channel.setInputStream(null);
+ ((ChannelExec) channel).setErrStream(System.err);
+ InputStream in = channel.getInputStream();
+ channel.connect();
+ byte[] tmp = new byte[1024];
+ String result = "";
+ while (true) {
+ while (in.available() > 0) {
+ int i = in.read(tmp, 0, 1024);
+ if (i < 0) break;
+ result += new String(tmp, 0, i);
+ }
+ if (channel.isClosed()) {
+ if (in.available() > 0) continue;
+ logger.debug(hostName + " " + queue + " " + "exit-status: " + channel.getExitStatus());
+ break;
+ }
+ try {
+ Thread.sleep(1000);
+ } catch (Exception ee) {
+ }
+ }
+ channel.disconnect();
+
+ if (result != null && result.length() > 0) {
+ QueueStatus queueStatus = null;
+ if (computeResourceProfile.getResourceManagerType().equals("SLURM")) {
+ String[] sparts = result.split(" ");
+ boolean isUp = sparts[0].equalsIgnoreCase("up");
+ String knts = sparts[1];
+ sparts = knts.split("/");
+ int running = Integer.parseInt(sparts[0].trim());
+ int queued = Integer.parseInt(sparts[1].trim());
+ queueStatus = new QueueStatus(hostName, queue, isUp, running, queued, System.currentTimeMillis());
+
+ } else if (computeResourceProfile.getResourceManagerType().equals("PBS")) {
+ result = result.replaceAll("\\s+", " ");
+ String[] sparts = result.split(" ");
+ boolean isUp = sparts[3].equalsIgnoreCase("yes");
+ int running = Integer.parseInt(sparts[6].trim());
+ int queued = Integer.parseInt(sparts[5].trim());
+ queueStatus = new QueueStatus(hostName, queue, isUp, running, queued, System.currentTimeMillis());
+ }
+
+ if (queueStatus != null)
+ queueStatuses.add(queueStatus);
+ }
+ }
+ session.disconnect();
+ }catch (JSchException ex){
+ logger.error(ex.getMessage(), ex);
+ }
+ }
+
+ System.out.println(queueStatuses.size());
+
+ }
+
+ private static class ComputeResourceProfile{
+
+ private String hostName;
+ private String userName;
+ private int port;
+ private List<String> queueNames;
+ private String resourceManagerType;
+
+ public ComputeResourceProfile(String hostName, String userName, int port, List<String> queueNames, String resourceManagerType) {
+ this.hostName = hostName;
+ this.userName = userName;
+ this.port = port;
+ this.queueNames = queueNames;
+ this.resourceManagerType = resourceManagerType;
+ }
+
+ public String getHostName() {
+ return hostName;
+ }
+
+ public void setHostName(String hostName) {
+ this.hostName = hostName;
+ }
+
+ public String getUserName() {
+ return userName;
+ }
+
+ public void setUserName(String userName) {
+ this.userName = userName;
+ }
+
+ public int getPort() {
+ return port;
+ }
+
+ public void setPort(int port) {
+ this.port = port;
+ }
+
+ public List<String> getQueueNames() {
+ return queueNames;
+ }
+
+ public void setQueueNames(List<String> queueNames) {
+ this.queueNames = queueNames;
+ }
+
+ public String getResourceManagerType() {
+ return resourceManagerType;
+ }
+
+ public void setResourceManagerType(String resourceManagerType) {
+ this.resourceManagerType = resourceManagerType;
+ }
+ }
+
+ private static class QueueStatus{
+
+ private String hostName;
+ private String queueName;
+ private boolean queueUp;
+ private int runningJobs;
+ private int queuedJobs;
+ private long time;
+
+ public QueueStatus(String hostName, String queueName, boolean queueUp, int runningJobs, int queuedJobs, long time) {
+ this.hostName = hostName;
+ this.queueName = queueName;
+ this.queueUp = queueUp;
+ this.runningJobs = runningJobs;
+ this.queuedJobs = queuedJobs;
+ this.time = time;
+ }
+
+ public String getHostName() {
+ return hostName;
+ }
+
+ public void setHostName(String hostName) {
+ this.hostName = hostName;
+ }
+
+ public String getQueueName() {
+ return queueName;
+ }
+
+ public void setQueueName(String queueName) {
+ this.queueName = queueName;
+ }
+
+ public boolean isQueueUp() {
+ return queueUp;
+ }
+
+ public void setQueueUp(boolean queueUp) {
+ this.queueUp = queueUp;
+ }
+
+ public int getRunningJobs() {
+ return runningJobs;
+ }
+
+ public void setRunningJobs(int runningJobs) {
+ this.runningJobs = runningJobs;
+ }
+
+ public int getQueuedJobs() {
+ return queuedJobs;
+ }
+
+ public void setQueuedJobs(int queuedJobs) {
+ this.queuedJobs = queuedJobs;
+ }
+
+ public long getTime() {
+ return time;
+ }
+
+ public void setTime(long time) {
+ this.time = time;
+ }
+ }
+}
\ No newline at end of file
diff --git a/modules/cluster-monitoring/src/main/resources/cluster-properties.json b/modules/cluster-monitoring/src/main/resources/cluster-properties.json
new file mode 100644
index 0000000..f016565
--- /dev/null
+++ b/modules/cluster-monitoring/src/main/resources/cluster-properties.json
@@ -0,0 +1,53 @@
+[
+ {
+ "hostName": "stampede.tacc.xsede.org",
+ "userName": "scigap",
+ "port": 22,
+ "queueNames": [
+ "normal",
+ "development",
+ "gpu"
+ ],
+ "resourceManagerType": "SLURM"
+ },
+ {
+ "hostName": "comet.sdsc.edu",
+ "userName": "scigap",
+ "port": 22,
+ "queueNames": [
+ "compute",
+ "shared",
+ "gpu"
+ ],
+ "resourceManagerType": "SLURM"
+ },
+ {
+ "hostName": "bigred2.uits.iu.edu",
+ "userName": "cgateway",
+ "port": 22,
+ "queueNames": [
+ "gpu",
+ "cpu",
+ "serial"
+ ],
+ "resourceManagerType": "PBS"
+ },
+ {
+ "hostName": "gordon.sdsc.edu",
+ "userName": "ogce",
+ "port": 22,
+ "queueNames": [
+ "normal"
+ ],
+ "resourceManagerType": "PBS"
+ },
+ {
+ "hostName": "karst.uits.iu.edu",
+ "userName": "seagrid",
+ "port": 22,
+ "queueNames": [
+ "batch"
+ ],
+ "resourceManagerType": "PBS"
+ }
+]
\ No newline at end of file
diff --git a/modules/cluster-monitoring/src/main/resources/id_rsa b/modules/cluster-monitoring/src/main/resources/id_rsa
new file mode 100644
index 0000000..b5d1099
--- /dev/null
+++ b/modules/cluster-monitoring/src/main/resources/id_rsa
@@ -0,0 +1,30 @@
+-----BEGIN RSA PRIVATE KEY-----
+Proc-Type: 4,ENCRYPTED
+DEK-Info: DES-EDE3-CBC,AF720E62F3BF175C
+
+81OQtQCUQNZ9SmbdeU6zh0mDpjfmaBcDu0lPs9P/GrjE/DkepgIdYyc6f/zKzDOe
+V4SGtgO4Lazv1l/LSwjRLJ0FSheTFDAgCj5TN10Kza44dQ03rX8XIylqjpDoVbKb
+S+CvDN+1HCfcmFfMrw/x3MvWt7BcBeOYcKtemRClfkSFSLqvZrEkRtO6TqHwIumd
+mvSQTdez2MXECmlxMeLGAN88OBA3qXdG3q1k5eojm5/MM60rWNg5kW+aUIpjwCer
+l7Z01e2WkMMkw7E6roO+pRV5UO53cgqQcoYjlwGngLXPQi5QBdD9JLbaxWSAS5OZ
+H04BlEVFvbbSJIP2gKlKfFsclCtg5ldi5Sefhrqi2ewWAlL0ibVf0Ed83ygmm7DS
+0ljFv+U3q1O62ODiQyLpxqUZkROTYy6u18B11ck6n1F625jEK3Sh8Jhu4svxIu4x
+FTNJqZvG1vwPunePp6dzaBoSa6739mC1mSaDgaCvU2BxWfxN0ePKEumZg9NpskpK
+Pu9lHAWkuWs+nBuPzcloiizzd9eC9am0fKRMdGAKM0bE0eHFmsSESkmPjxVyHsK8
+dOA22IX2Ars6utlF4JqSK/vD3frYOSVJ55hIhQrKj0Kq6TyTWv4RVKRolOiOioOY
+7JSKCoYwR5eDA/UyLswBghydteyq1ljmOIE276xO4VnZAbK0gyhlKEReB8Gxbi80
+rtNe11wFKUu8OVUzm2bsDGYY6ZyvKnOXcL7/gwR7oObC0Y67tUkEXS15uiTH+mec
+/YVji54TT5GX7BH23sT3DxoPzTKngjAzBiRKlKlJEh2H3fLYNB5xhyrxO9rSIsBD
+y3nE1cSMUUVR/IPkJKKOmrZ8JfhwQpAiDLux1NBtveo342VY2twJs/U4Y6zYspcU
+d0UPFGWWC/SuxVhR191LnuT5f90V8HMW2S7vohSXx60qoB0OvYOrclYFX9eqcw2v
+e7ka/Rk922HcP1Yt5ak2pIIZMaTRWNBBE17EExmKkErzbJWosNDC/3HnmxkUb/v6
+3Y1wYxIjtcAjWjDQjstKGNty82zy8yeG9NSWTrMzljkosBLXrlVhn6VBqmqhUxUS
+N6ma5ORuziL5FMdSrBsqqIs8Sam7JjVGUZmeGrTiOgQSUM5GV9EyML+jBBqj9/RD
+CbFQCNBCVRNX2LLXHZGnODZ0i1TZ+P7Nap7TIaD1PJwoiFwl9gysf4WYJrWBqWU3
+ORPPWSGkWFdmXGOPyXQFSEMppdHagiFDgCC+5F9VfnrKkKrB+BMmLB8fh2BSmoQH
+LkrXksTTVRDP/8SaIZs+KdBIEnL6UGLiej4p27bj0B9lG0nacyj8E8u0UfxTy2YS
+dhdLeG55ZU+ori0HNBT4/bGY+0gMMMrmXhLYQSR/IoWU0hgzidM67ExYQ0tFpYx9
+g2AWMghQKWnSy9Cyi9sE2umXCISOV3jkPDClBKVxwrt/DTxZcUVzz5YqlZKMXIE3
+tTq4xHsRP/3KJOTun/2elmTFQ7Ml158df2dzTtKCRQ1la0YAMx4gdNWU8a/napN3
+g7l0KSygOWVPKuVFVbR6ZlLTzeJYpeR5ZOi5o4dRAfUB0qgblwn0Hg==
+-----END RSA PRIVATE KEY-----
\ No newline at end of file
diff --git a/modules/cluster-monitoring/src/main/resources/id_rsa.pub b/modules/cluster-monitoring/src/main/resources/id_rsa.pub
new file mode 100644
index 0000000..b8308db
--- /dev/null
+++ b/modules/cluster-monitoring/src/main/resources/id_rsa.pub
@@ -0,0 +1 @@
+ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAzESt/JtW7JxJ0JNSW6570OQMYtmZjWw5zMLOVffsOoTbSZ6ZM/udT7mwHrlprIi40rvBKIeUFiyB5CeTwOz5Fok/j4D0uXVLIBqRoJdYlKRquyH5EDaZmY/+5BAjJmuwsOqO6rwAFy6KXnIWXafzixUXKKUxKbx5aRLnzDw+JXb8N/6wJcZQ9UhRbnMQkyb9wZxfv1lruVCmK+OrVShIMaRb1df36khY1uj53ISVjSkY1FD3mECUP1u0nHQiE6aqccAa+9+rPD+6lgXD7eljLUiIc9lb+JqYmzDYM/BC8NZegw1hw1hU22Y3Uq3nFec798CyD1PtNkIFKjxg1VFVNw== airavata@gw111.iu.xsede.org
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 3988fd0..34ec83a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -18,6 +18,9 @@
<prerequisites>
<maven>3.0</maven>
</prerequisites>
+ <modules>
+ <module>modules/cluster-monitoring</module>
+ </modules>
<parent>
<groupId>org.apache</groupId>
@@ -564,6 +567,7 @@
<module>modules/workflow</module>
<module>modules/test-suite</module>
<module>modules/group-manager</module>
+ <module>modules/cluster-monitoring</module>
<!-- Deprecated Modules-->
<!--<module>modules/integration-tests</module>-->
<!--<module>modules/workflow-model</module>-->