blob: cc33c181324e32f16d1c7bb5d7207913ff74cd89 [file] [log] [blame]
/*
* Copyright 2019 WeBank
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.webank.wedatasphere.linkis.engine.pipeline.parser
import com.webank.wedatasphere.linkis.common.utils.Utils
import com.webank.wedatasphere.linkis.engine.pipeline.exception.PipeLineErrorException
import org.apache.commons.lang.StringUtils
/**
* Created by johnnwang on 2018/11/14.
* import and export parser
*/
object IEParser {
val RESTART_CODE = "@restart"
val APPLICATION_START_COMMAND = List("\\s*@restart\\s*",
"\\s*[@|%][a-zA-Z]{1,12}\\s*",
"^\\s*@set\\s*spark\\..+\\s*",
"^\\s*#.+\\s*",
"^\\s*//.+\\s*",
"^\\s*--.+\\s*",
"\\s*")
def needToRestart(code: String): Boolean = code.startsWith(RESTART_CODE)
private def getIndex(_code: String, start: Int): Int = {
val index1 = _code.indexOf("\n", start)
val index2 = _code.indexOf("\\n", start)
if (index1 > -1 && index2 > -1) Math.min(index1, index2) else Math.max(index1, index2)
}
def getKindString(code: String): String = {
val _code = StringUtils.strip(code)
var start = 0
if (_code.startsWith(RESTART_CODE)) {
start = getIndex(_code, 0) + 1
}
var index = getIndex(_code, start)
if (index == -1) index = _code.length
StringUtils.strip(_code.substring(start, index))
}
def getKind(code: String): String = {
val kindStr = getKindString(code)
if (kindStr.matches("[%|@][a-zA-Z]{1,12}[\\.][a-zA-Z]{1,12}")) kindStr.substring(1) else throw new PipeLineErrorException(70004,"unknown kind")
}
/**
* This method just removes @restart and language identifiers (such as %sql, %scala, etc.), that is, removes at most the first 2 rows.
* 该方法只是去掉了@restart和语言标识符(如%sql、%scala等),即最多只去掉最前面2行
*
* @param code
* @return
*/
def getRealCode(code: String): String = {
val _code = StringUtils.strip(code)
val kindStr = getKindString(_code)
if (kindStr.matches("[%|@][a-zA-Z]{1,12}"))
StringUtils.strip(_code.substring(_code.indexOf(kindStr) + kindStr.length))
else if (_code.startsWith(RESTART_CODE)) StringUtils.strip(_code.substring(RESTART_CODE.length))
else _code
}
/**
* This method removes all code-independent setting parameters and identifiers (including comments)
* 该方法去掉一切与代码无关的设置参数和标识符(包括注释)
*
* @param code
* @return
*/
def getFormatCode(code: String): String = {
val msg = new StringBuilder
val restartRegex = "\\s*@restart\\s*".r
val kindRegex = "\\s*[@|%][a-zA-Z]{1,12}\\s*".r
val setRegex = "^\\s*@set\\s*.+\\s*".r
val symbolRegex1 = "^\\s*#.+\\s*".r
val symbolRegex2 = "^\\s*//.+\\s*".r
val symbolRegex3 = "\\s*--.+\\s*".r
val blankRegex = "\\s*".r
code.split("\n").foreach {
case blankRegex() | setRegex() | symbolRegex1() | symbolRegex2() | symbolRegex3() | restartRegex() | kindRegex() =>
case str => msg ++= str ++ "\n"
}
StringUtils.strip(msg.toString())
}
}