blob: e9b62c0d523edd6159dd349847eb6189af891a70 [file]
/*---------------------------------------------------------------------------------------------
* Copyright (c) 2020 DeltaXML Ltd. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { EOL } from 'os'
import * as vscode from 'vscode'
import { XPathLexer, ExitCondition, LexPosition, Token } from './xpLexer'
import { XslLexer } from './xslLexer'
import { XsltTokenDiagnostics } from './xsltTokenDiagnostics'
import { DocumentChangeHandler } from './documentChangeHandler'
const tokenModifiers = new Map<string, number>()
const startList: vscode.Position[] = []
const endList: vscode.Position[] = []
const legend = (function () {
const tokenTypesLegend = XslLexer.getTextmateTypeLegend()
const tokenModifiersLegend = [
'declaration',
'documentation',
'member',
'static',
'abstract',
'deprecated',
'modification',
'async',
]
tokenModifiersLegend.forEach((tokenModifier, index) =>
tokenModifiers.set(tokenModifier, index)
)
return new vscode.SemanticTokensLegend(tokenTypesLegend, tokenModifiersLegend)
})()
export class XPathSemanticTokensProvider
implements vscode.DocumentSemanticTokensProvider
{
private xpLexer = new XPathLexer()
private collection: vscode.DiagnosticCollection
private diagnosticList = new Array<vscode.Diagnostic>()
public constructor(collection: vscode.DiagnosticCollection) {
this.collection = collection
}
async provideDocumentSemanticTokens(
document: vscode.TextDocument,
token: vscode.CancellationToken
): Promise<vscode.SemanticTokens> {
this.xpLexer.documentTokens = []
let [variables, namespaces]: [string[], string[]] =
this.findAllVariablesandNamespaces(document.getText())
let tokens: Token[] = []
const tokenPositions = this.findAllXPath(document.getText())
for (let i = 0; i < tokenPositions.length; i++) {
const line = tokenPositions[i][0]
const startCharacter = tokenPositions[i][1]
const documentOffset = tokenPositions[i][2]
const lexPositions: LexPosition = {
line: line,
startCharacter: startCharacter,
documentOffset: documentOffset,
}
let start = new vscode.Position(line, startCharacter)
let tmpTokens = this.xpLexer.analyse(
document.getText(),
ExitCondition.CurlyBrace,
lexPositions
)
let end = new vscode.Position(
lexPositions.line,
lexPositions.startCharacter
)
//Add start and end positions of XPath so we don't have to recalculate every time Intellisense is triggered
startList.push(start)
endList.push(end)
tokens = tokens.concat(tmpTokens)
// This was moved to inside the loop. If it isn't, the sections of XPath will be treated
// as a single XPath section instead of multiples
setTimeout(
() => this.reportProblems(tmpTokens, document, variables, namespaces),
0
)
// Reset the xpLexer. If this is not done, existing tokens will not be flushed
// and will be re-added to the tokens list. This might not affect the operation, but it does
// increase the memory required by the tokenizer, potentially running out of memory.
this.xpLexer.reset()
}
const builder = new vscode.SemanticTokensBuilder()
tokens.forEach((token) => {
builder.push(
token.line,
token.startCharacter,
token.length,
token.tokenType,
0
)
})
this.diagnosticList = new Array<vscode.Diagnostic>()
return builder.build()
}
// Identify all sections in the full document that should be treated as XPath
private findAllXPath(document: String): [number, number, number][] {
let tokensFound: [number, number, number][] = []
let charCount = 0
const lines = document.split('\n')
// Regex should match up to the character before we need to start detecting XPath
// In these cases, there is a left curly brace right after the regex match, so
// we may need to adjust the exact points if there are schemas with spaces between
// the open quote and the left curly brace.
// Also note that the start location that we return for processing should NOT include the
// left curly brace. The way that the tokenizer determines when to stop processing
// is to find an extra closing character (curly brace, single quote, or double quote)
// If it doesn't terminate, it will tokenize the remainder of the file.
const xPathRegex = /(\w+)=("|')(?=\{)/
let isComment: Boolean = false
for (let i = 0; i < lines.length; i++) {
let xPathMatch = lines[i].match(xPathRegex)
if (!isComment && lines[i].includes('<!--')) {
isComment = true
}
if (isComment) {
let closeIndex = lines[i].search('-->')
if (closeIndex !== -1) {
isComment = false
if (xPathMatch) {
if (closeIndex > lines[i].search(xPathMatch[0])) {
xPathMatch = null
}
}
} else {
xPathMatch = null
}
}
// The items in the tuple are used to determine the start point for the tokenizer. They are
// the line number, position offset in the line, and document offset.
// The +1 on the position offset accounts for the opening curly brace.
if (xPathMatch) {
const lineOffset =
lines[i].search(xPathMatch[0]) + xPathMatch[0].length + 1
tokensFound.push([
i,
(xPathMatch.index || 0) + xPathMatch[0].length + 1,
charCount + lineOffset,
])
}
// Used to keep track of the document offset. The +1 accounts for newlines.
charCount += lines[i].length + 1
}
return tokensFound
}
// Find the names of all variables in the file
private findAllVariablesandNamespaces(
document: String | undefined
): [string[], string[]] {
if (document === undefined) {
return [[], []]
}
const lines = document.split(EOL)
const variableRegex = /(dfdl:defineVariable.*name=\")(.*?)\"/
// These are built-in predefined variables. Double check if we still need to hardcode these
// once we have the ability to create a DFDL model outside of a Parse operation.
const variables: string[] = [
'dfdl:encoding',
'dfdl:byteOrder',
'dfdl:binaryFloatRep',
'dfdl:outputNewLine',
]
const namespaceRegex = /xmlns:(.*?)=\"/g
const namespaces: string[] = []
// Capture and return a list of variable names
for (let i = 0; i < lines.length; i++) {
const variableMatch = lines[i].match(variableRegex)
if (variableMatch) {
variables.push(variableMatch[2])
}
// If a DFDL schema has multiple namespaces defined on the same line, we need to make sure we are globally
// matching for the namespace pattern so that we don't miss any
let namespaceMatch: RegExpExecArray | null = null
// Continue matching until we don't find any more namespaces on the same line
do {
namespaceMatch = namespaceRegex.exec(lines[i])
if (namespaceMatch) {
namespaces.push(namespaceMatch[1])
}
} while (namespaceMatch)
}
return [variables, namespaces]
}
// This function will produce the error/warning list for vscode
private reportProblems(
allTokens: Token[],
document: vscode.TextDocument,
variables: string[],
namespaces: string[]
) {
let diagnostics = XsltTokenDiagnostics.calculateDiagnostics(
document,
allTokens,
DocumentChangeHandler.lastXMLDocumentGlobalData,
[],
[],
variables,
namespaces
)
diagnostics.forEach((diag) => {
this.diagnosticList.push(diag)
})
if (this.diagnosticList.length > 0) {
this.collection.set(document.uri, this.diagnosticList)
} else {
this.collection.clear()
}
}
}
export function isXPath(position: vscode.Position): boolean {
for (let i = 0; i < startList.length; i++) {
if (
position.isBeforeOrEqual(endList[i]) &&
position.isAfterOrEqual(startList[i])
)
return true
}
return false
}
export function activate(context: vscode.ExtensionContext) {
const docChangeHandler = new DocumentChangeHandler()
let activeEditor = vscode.window.activeTextEditor
if (activeEditor) {
docChangeHandler.registerXMLEditor(activeEditor)
}
const xpathDiagnosticsCollection =
vscode.languages.createDiagnosticCollection('dfdl')
context.subscriptions.push(
vscode.languages.registerDocumentSemanticTokensProvider(
{ language: 'dfdl' },
new XPathSemanticTokensProvider(xpathDiagnosticsCollection),
legend
)
)
}