blob: 270d5259483dcee2a6a9409fc41b2ea0c9603553 [file] [log] [blame]
#!/usr/bin/env node
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import fs from 'fs-extra';
import snarkdown from 'snarkdown';
const INPUT_FILELIST_FILE = 'script/sql-doc-files.txt';
const OUTPUT_FILE = 'lib/sql-docs.ts';
const MINIMUM_EXPECTED_NUMBER_OF_FUNCTIONS = 198;
const MINIMUM_EXPECTED_NUMBER_OF_DATA_TYPES = 15;
const initialFunctionDocs = {
TABLE: ['external', convertMarkdownToHtml('Defines a logical table from an external.')],
EXTERN: ['inputSource, inputFormat, rowSignature?', convertMarkdownToHtml('Reads external data.')],
TYPE: [
'nativeType',
convertMarkdownToHtml(
'A purely type system modification function what wraps a Druid native type to make it into a SQL type.',
),
],
UNNEST: [
'arrayExpression',
convertMarkdownToHtml("Unnests ARRAY typed values. The source for UNNEST can be an array type column, or an input that's been transformed into an array, such as with helper functions like `MV_TO_ARRAY` or `ARRAY`.")
]
};
function hasHtmlTags(str) {
return /<(a|br|span|div|p|code)\/?>/.test(str);
}
function sanitizeArguments(str) {
str = str.replace(/`<code>&#124;<\/code>`/g, '|'); // convert the hack to get | in a table to a normal pipe
// Ensure there are no more html tags other than the <code> we just removed
if (hasHtmlTags(str)) {
throw new Error(`Arguments contain HTML: ${str}`);
}
return str;
}
function convertMarkdownToHtml(markdown) {
markdown = markdown.replace(/<br\/?>/g, '\n'); // Convert inline <br> to newlines
// Ensure there are no more html tags other than the <br> we just removed
if (hasHtmlTags(markdown)) {
throw new Error(`Markdown contains HTML: ${markdown}`);
}
// Concert to markdown
markdown = snarkdown(markdown);
return markdown.replace(/<a[^>]*>(.*?)<\/a>/g, '$1'); // Remove links
}
const readDoc = async () => {
// Read the list of files from sql-doc-files.txt
const fileList = await fs.readFile(INPUT_FILELIST_FILE, 'utf-8');
const filePaths = fileList
.split('\n')
.map(line => line.trim())
.filter(line => line && !line.startsWith('#')); // Skip empty lines and comments
// Read all files in parallel
const fileContents = await Promise.all(
filePaths.map(filePath => fs.readFile(filePath, 'utf-8'))
);
const data = fileContents.join('\n');
const lines = data.split('\n');
const functionDocs = initialFunctionDocs;
const dataTypeDocs = {};
for (const line of lines) {
const functionMatch = line.match(/^\|\s*`(\w+)\(([^|]*)\)`\s*\|([^|]+)\|(?:([^|]+)\|)?$/);
if (functionMatch) {
const functionName = functionMatch[1];
const args = sanitizeArguments(functionMatch[2]);
const description = convertMarkdownToHtml(functionMatch[3].trim());
functionDocs[functionName] = [args, description];
}
const dataTypeMatch = line.match(/^\|([A-Z]+)\|([A-Z]+)\|([^|]*)\|([^|]*)\|$/);
if (dataTypeMatch) {
dataTypeDocs[dataTypeMatch[1]] = [dataTypeMatch[2], convertMarkdownToHtml(dataTypeMatch[4])];
}
}
// Make sure there are enough functions found
const numFunction = Object.keys(functionDocs).length;
if (!(MINIMUM_EXPECTED_NUMBER_OF_FUNCTIONS <= numFunction)) {
throw new Error(
`Did not find enough function entries did the file structure change? (found ${numFunction} but expected at least ${MINIMUM_EXPECTED_NUMBER_OF_FUNCTIONS})`,
);
}
// Make sure there are at least 10 data types for sanity
const numDataTypes = Object.keys(dataTypeDocs).length;
if (!(MINIMUM_EXPECTED_NUMBER_OF_DATA_TYPES <= numDataTypes)) {
throw new Error(
`Did not find enough data type entries did the file structure change? (found ${numDataTypes} but expected at least ${MINIMUM_EXPECTED_NUMBER_OF_DATA_TYPES})`,
);
}
const content = `/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// This file is auto generated and should not be modified
// prettier-ignore
export const SQL_DATA_TYPES = new Map<string, [runtime: string, description: string]>(Object.entries(${JSON.stringify(
dataTypeDocs,
null,
2,
)}));
// prettier-ignore
export const SQL_FUNCTIONS = new Map<string, [args: string, description: string]>(Object.entries(${JSON.stringify(
functionDocs,
null,
2,
)}));
`;
// eslint-disable-next-line no-undef
console.log(`Found ${numDataTypes} data types and ${numFunction} functions`);
await fs.writeFile(OUTPUT_FILE, content, 'utf-8');
};
readDoc();