Web console: adding format notice for CSV and TSV (#14783)
* adding format notice for CSV and TSV
* Update web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx
Co-authored-by: 317brian <53799971+317brian@users.noreply.github.com>
* Update web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx
Co-authored-by: 317brian <53799971+317brian@users.noreply.github.com>
* Update web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx
Co-authored-by: 317brian <53799971+317brian@users.noreply.github.com>
* fix tests
---------
Co-authored-by: 317brian <53799971+317brian@users.noreply.github.com>
diff --git a/licenses.yaml b/licenses.yaml
index 7fc8851..df4952f 100644
--- a/licenses.yaml
+++ b/licenses.yaml
@@ -5319,6 +5319,16 @@
 
 ---
 
+name: "commander"
+license_category: binary
+module: web-console
+license_name: MIT License
+copyright: TJ Holowaychuk
+version: 2.20.0
+license_file_path: licenses/bin/commander.MIT
+
+---
+
 name: "constant-case"
 license_category: binary
 module: web-console
@@ -5419,6 +5429,16 @@
 
 ---
 
+name: "d3-dsv"
+license_category: binary
+module: web-console
+license_name: BSD-3-Clause License
+copyright: Mike Bostock
+version: 2.0.0
+license_file_path: licenses/bin/d3-dsv.BSD3
+
+---
+
 name: "d3-format"
 license_category: binary
 module: web-console
@@ -5787,6 +5807,16 @@
 
 ---
 
+name: "iconv-lite"
+license_category: binary
+module: web-console
+license_name: MIT License
+copyright: Alexander Shtuchkin
+version: 0.4.24
+license_file_path: licenses/bin/iconv-lite.MIT
+
+---
+
 name: "import-fresh"
 license_category: binary
 module: web-console
@@ -6336,6 +6366,16 @@
 
 ---
 
+name: "rw"
+license_category: binary
+module: web-console
+license_name: BSD-3-Clause License
+copyright: Mike Bostock
+version: 1.3.3
+license_file_path: licenses/bin/rw.BSD3
+
+---
+
 name: "safe-buffer"
 license_category: binary
 module: web-console
@@ -6346,6 +6386,16 @@
 
 ---
 
+name: "safer-buffer"
+license_category: binary
+module: web-console
+license_name: MIT License
+copyright: Nikita Skovoroda
+version: 2.1.2
+license_file_path: licenses/bin/safer-buffer.MIT
+
+---
+
 name: "scheduler"
 license_category: binary
 module: web-console
diff --git a/web-console/package-lock.json b/web-console/package-lock.json
index 19e0fe5..f8fb8af 100644
--- a/web-console/package-lock.json
+++ b/web-console/package-lock.json
@@ -24,6 +24,7 @@
         "core-js": "^3.10.1",
         "d3-array": "^2.12.1",
         "d3-axis": "^2.1.0",
+        "d3-dsv": "^2.0.0",
         "d3-scale": "^3.3.0",
         "d3-selection": "^2.0.0",
         "echarts": "^5.4.1",
@@ -60,6 +61,7 @@
         "@types/classnames": "^2.2.9",
         "@types/d3-array": "^2.12.3",
         "@types/d3-axis": "^2.1.3",
+        "@types/d3-dsv": "^2.0.0",
         "@types/d3-scale": "^3.3.2",
         "@types/d3-selection": "^2.0.1",
         "@types/enzyme": "^3.10.3",
@@ -4601,6 +4603,12 @@
         "@types/d3-selection": "^2"
       }
     },
+    "node_modules/@types/d3-dsv": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/@types/d3-dsv/-/d3-dsv-2.0.3.tgz",
+      "integrity": "sha512-15sp4Z+ZVWuZuV0QEDu4cu/0C5vlD+JYXaUMDs8JTWpTJjcrAtjyR1vVwEfbgmU5kLNOOMRTlDCYyWWFx7eh/w==",
+      "dev": true
+    },
     "node_modules/@types/d3-scale": {
       "version": "3.3.2",
       "resolved": "https://registry.npmjs.org/@types/d3-scale/-/d3-scale-3.3.2.tgz",
@@ -7111,8 +7119,7 @@
     "node_modules/commander": {
       "version": "2.20.0",
       "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.0.tgz",
-      "integrity": "sha512-7j2y+40w61zy6YC2iRNpUe/NwhNyoXrYpHMrSunaMG64nRnaf96zO/KMQR4OyN/UnE5KLyEBnKHd4aG3rskjpQ==",
-      "dev": true
+      "integrity": "sha512-7j2y+40w61zy6YC2iRNpUe/NwhNyoXrYpHMrSunaMG64nRnaf96zO/KMQR4OyN/UnE5KLyEBnKHd4aG3rskjpQ=="
     },
     "node_modules/commondir": {
       "version": "1.0.1",
@@ -7668,6 +7675,27 @@
       "resolved": "https://registry.npmjs.org/d3-color/-/d3-color-2.0.0.tgz",
       "integrity": "sha512-SPXi0TSKPD4g9tw0NMZFnR95XVgUZiBH+uUTqQuDu1OsE2zomHU7ho0FISciaPvosimixwHFl3WHLGabv6dDgQ=="
     },
+    "node_modules/d3-dsv": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/d3-dsv/-/d3-dsv-2.0.0.tgz",
+      "integrity": "sha512-E+Pn8UJYx9mViuIUkoc93gJGGYut6mSDKy2+XaPwccwkRGlR+LO97L2VCCRjQivTwLHkSnAJG7yo00BWY6QM+w==",
+      "dependencies": {
+        "commander": "2",
+        "iconv-lite": "0.4",
+        "rw": "1"
+      },
+      "bin": {
+        "csv2json": "bin/dsv2json",
+        "csv2tsv": "bin/dsv2dsv",
+        "dsv2dsv": "bin/dsv2dsv",
+        "dsv2json": "bin/dsv2json",
+        "json2csv": "bin/json2dsv",
+        "json2dsv": "bin/json2dsv",
+        "json2tsv": "bin/json2dsv",
+        "tsv2csv": "bin/dsv2dsv",
+        "tsv2json": "bin/dsv2json"
+      }
+    },
     "node_modules/d3-format": {
       "version": "1.4.1",
       "resolved": "https://registry.npmjs.org/d3-format/-/d3-format-1.4.1.tgz",
@@ -11154,7 +11182,6 @@
       "version": "0.4.24",
       "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz",
       "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==",
-      "dev": true,
       "dependencies": {
         "safer-buffer": ">= 2.1.2 < 3"
       },
@@ -20223,6 +20250,11 @@
         "queue-microtask": "^1.2.2"
       }
     },
+    "node_modules/rw": {
+      "version": "1.3.3",
+      "resolved": "https://registry.npmjs.org/rw/-/rw-1.3.3.tgz",
+      "integrity": "sha512-PdhdWy89SiZogBLaw42zdeqtRJ//zFd2PgQavcICDUgJT5oW10QCRKbJ6bg4r0/UY2M6BWd5tkxuGFRvCkgfHQ=="
+    },
     "node_modules/safe-buffer": {
       "version": "5.1.2",
       "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
@@ -20254,8 +20286,7 @@
     "node_modules/safer-buffer": {
       "version": "2.1.2",
       "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
-      "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==",
-      "dev": true
+      "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="
     },
     "node_modules/sass": {
       "version": "1.59.3",
@@ -28288,6 +28319,12 @@
         "@types/d3-selection": "^2"
       }
     },
+    "@types/d3-dsv": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/@types/d3-dsv/-/d3-dsv-2.0.3.tgz",
+      "integrity": "sha512-15sp4Z+ZVWuZuV0QEDu4cu/0C5vlD+JYXaUMDs8JTWpTJjcrAtjyR1vVwEfbgmU5kLNOOMRTlDCYyWWFx7eh/w==",
+      "dev": true
+    },
     "@types/d3-scale": {
       "version": "3.3.2",
       "resolved": "https://registry.npmjs.org/@types/d3-scale/-/d3-scale-3.3.2.tgz",
@@ -30287,8 +30324,7 @@
     "commander": {
       "version": "2.20.0",
       "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.0.tgz",
-      "integrity": "sha512-7j2y+40w61zy6YC2iRNpUe/NwhNyoXrYpHMrSunaMG64nRnaf96zO/KMQR4OyN/UnE5KLyEBnKHd4aG3rskjpQ==",
-      "dev": true
+      "integrity": "sha512-7j2y+40w61zy6YC2iRNpUe/NwhNyoXrYpHMrSunaMG64nRnaf96zO/KMQR4OyN/UnE5KLyEBnKHd4aG3rskjpQ=="
     },
     "commondir": {
       "version": "1.0.1",
@@ -30713,6 +30749,16 @@
       "resolved": "https://registry.npmjs.org/d3-color/-/d3-color-2.0.0.tgz",
       "integrity": "sha512-SPXi0TSKPD4g9tw0NMZFnR95XVgUZiBH+uUTqQuDu1OsE2zomHU7ho0FISciaPvosimixwHFl3WHLGabv6dDgQ=="
     },
+    "d3-dsv": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/d3-dsv/-/d3-dsv-2.0.0.tgz",
+      "integrity": "sha512-E+Pn8UJYx9mViuIUkoc93gJGGYut6mSDKy2+XaPwccwkRGlR+LO97L2VCCRjQivTwLHkSnAJG7yo00BWY6QM+w==",
+      "requires": {
+        "commander": "2",
+        "iconv-lite": "0.4",
+        "rw": "1"
+      }
+    },
     "d3-format": {
       "version": "1.4.1",
       "resolved": "https://registry.npmjs.org/d3-format/-/d3-format-1.4.1.tgz",
@@ -33435,7 +33481,6 @@
       "version": "0.4.24",
       "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz",
       "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==",
-      "dev": true,
       "requires": {
         "safer-buffer": ">= 2.1.2 < 3"
       }
@@ -40293,6 +40338,11 @@
         "queue-microtask": "^1.2.2"
       }
     },
+    "rw": {
+      "version": "1.3.3",
+      "resolved": "https://registry.npmjs.org/rw/-/rw-1.3.3.tgz",
+      "integrity": "sha512-PdhdWy89SiZogBLaw42zdeqtRJ//zFd2PgQavcICDUgJT5oW10QCRKbJ6bg4r0/UY2M6BWd5tkxuGFRvCkgfHQ=="
+    },
     "safe-buffer": {
       "version": "5.1.2",
       "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
@@ -40321,8 +40371,7 @@
     "safer-buffer": {
       "version": "2.1.2",
       "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
-      "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==",
-      "dev": true
+      "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="
     },
     "sass": {
       "version": "1.59.3",
diff --git a/web-console/package.json b/web-console/package.json
index 3c39ae1..75ef5b1 100644
--- a/web-console/package.json
+++ b/web-console/package.json
@@ -78,6 +78,7 @@
     "core-js": "^3.10.1",
     "d3-array": "^2.12.1",
     "d3-axis": "^2.1.0",
+    "d3-dsv": "^2.0.0",
     "d3-scale": "^3.3.0",
     "d3-selection": "^2.0.0",
     "echarts": "^5.4.1",
@@ -114,6 +115,7 @@
     "@types/classnames": "^2.2.9",
     "@types/d3-array": "^2.12.3",
     "@types/d3-axis": "^2.1.3",
+    "@types/d3-dsv": "^2.0.0",
     "@types/d3-scale": "^3.3.2",
     "@types/d3-selection": "^2.0.1",
     "@types/enzyme": "^3.10.3",
diff --git a/web-console/src/components/auto-form/auto-form.tsx b/web-console/src/components/auto-form/auto-form.tsx
index 63a591e..3570468 100644
--- a/web-console/src/components/auto-form/auto-form.tsx
+++ b/web-console/src/components/auto-form/auto-form.tsx
@@ -366,6 +366,7 @@
         disabled={AutoForm.evaluateFunctor(field.disabled, model, false)}
         intent={required && modelValue == null ? AutoForm.REQUIRED_INTENT : undefined}
         multiline={AutoForm.evaluateFunctor(field.multiline, model, false)}
+        height={field.height}
       />
     );
   }
diff --git a/web-console/src/components/formatted-input/formatted-input.tsx b/web-console/src/components/formatted-input/formatted-input.tsx
index a8620e1..c93f3f2 100644
--- a/web-console/src/components/formatted-input/formatted-input.tsx
+++ b/web-console/src/components/formatted-input/formatted-input.tsx
@@ -48,6 +48,7 @@
     intent,
     placeholder,
     multiline,
+    height,
     ...rest
   } = props;
 
@@ -105,6 +106,7 @@
           onBlur={myOnBlur}
           intent={myIntent}
           placeholder={placeholder}
+          style={height ? { height } : undefined}
         />
       ) : (
         <InputGroup
diff --git a/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx b/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx
index 42612b1..db3587b 100644
--- a/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx
+++ b/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx
@@ -18,6 +18,7 @@
 
 import { Code } from '@blueprintjs/core';
 import { range } from 'd3-array';
+import { csvParseRows, tsvParseRows } from 'd3-dsv';
 import type { JSX } from 'react';
 import React from 'react';
 
@@ -2135,33 +2136,53 @@
   return newSpec;
 }
 
+function findValueWithNewline(rows: string[][]): string | undefined {
+  return findMap(rows, row => findMap(row, value => (value.includes('\n') ? value : undefined)));
+}
+
 export function issueWithSampleData(
-  sampleData: SampleResponse,
-  spec: Partial<IngestionSpec>,
+  sampleLines: string[],
+  isStreaming: boolean,
 ): JSX.Element | undefined {
-  if (isStreamingSpec(spec)) return;
+  if (!sampleLines.length) return;
 
-  const firstData: string = findMap(sampleData.data, l => l.input?.raw);
-  if (firstData) return;
+  const firstLine = sampleLines[0];
+  if (!isStreaming) {
+    if (firstLine === '{') {
+      return (
+        <>
+          This data looks like a multi-line formatted JSON object. For Druid to parse a text file,
+          it must have one row per event. Consider reformatting your data as{' '}
+          <ExternalLink href="https://jsonlines.org">JSON Lines</ExternalLink>.
+        </>
+      );
+    }
 
-  if (firstData === '{') {
-    return (
-      <>
-        This data looks like multi-line formatted JSON object. For Druid to parse a text file it
-        must have one row per event. Consider reformatting your data as{' '}
-        <ExternalLink href="http://ndjson.org/">newline delimited JSON</ExternalLink>.
-      </>
-    );
+    if (oneOf(firstLine, '[', '[]')) {
+      return (
+        <>
+          This data looks like a multi-line JSON array. For Druid to parse a text file, it must have
+          one row per event. Consider reformatting your data as{' '}
+          <ExternalLink href="https://jsonlines.org">JSON Lines</ExternalLink>.
+        </>
+      );
+    }
   }
 
-  if (oneOf(firstData, '[', '[]')) {
-    return (
-      <>
-        This data looks like a multi-line JSON array. For Druid to parse a text file it must have
-        one row per event. Consider reformatting your data as{' '}
-        <ExternalLink href="http://ndjson.org/">newline delimited JSON</ExternalLink>.
-      </>
+  const format = guessSimpleInputFormat(sampleLines, isStreaming);
+  const text = sampleLines.join('\n');
+  if (oneOf(format.type, 'csv', 'tsv')) {
+    const valueWithNewline = findValueWithNewline(
+      format.type === 'csv' ? csvParseRows(text) : tsvParseRows(text),
     );
+    if (valueWithNewline) {
+      const formatLabel = format.type.toUpperCase();
+      return (
+        <>
+          {`This ${formatLabel} data has values that contain new lines. Druid requires ${formatLabel} files to have one event per line, so ${formatLabel} values can not contain new lines. Consider encoding new lines in the values of your ${formatLabel} with some special delimiter.`}
+        </>
+      );
+    }
   }
 
   return;
diff --git a/web-console/src/druid-models/input-source/input-source.tsx b/web-console/src/druid-models/input-source/input-source.tsx
index 0a35cea..4e06473 100644
--- a/web-console/src/druid-models/input-source/input-source.tsx
+++ b/web-console/src/druid-models/input-source/input-source.tsx
@@ -25,6 +25,8 @@
 
 export const FILTER_SUGGESTIONS: string[] = [
   '*',
+  '*.jsonl',
+  '*.jsonl.gz',
   '*.json',
   '*.json.gz',
   '*.csv',
@@ -179,6 +181,7 @@
     required: true,
     placeholder: 'Paste your data here',
     multiline: true,
+    height: '400px',
     info: <p>Put you inline data here</p>,
   },
 
diff --git a/web-console/src/utils/general.tsx b/web-console/src/utils/general.tsx
index 4d821ae..a9264a9 100644
--- a/web-console/src/utils/general.tsx
+++ b/web-console/src/utils/general.tsx
@@ -322,15 +322,6 @@
 
 // ----------------------------
 
-export function validJson(json: string): boolean {
-  try {
-    JSONBig.parse(json);
-    return true;
-  } catch (e) {
-    return false;
-  }
-}
-
 export function filterMap<T, Q>(xs: readonly T[], f: (x: T, i: number) => Q | undefined): Q[] {
   return xs.map(f).filter((x: Q | undefined) => typeof x !== 'undefined') as Q[];
 }
diff --git a/web-console/src/views/load-data-view/load-data-view.tsx b/web-console/src/views/load-data-view/load-data-view.tsx
index 5ed3ab9..d383556 100644
--- a/web-console/src/views/load-data-view/load-data-view.tsx
+++ b/web-console/src/views/load-data-view/load-data-view.tsx
@@ -1366,13 +1366,16 @@
 
               this.updateSpec(fillDataSourceNameIfNeeded(newSpec));
             } else {
-              const issue = issueWithSampleData(inputData, spec);
+              const issue = issueWithSampleData(
+                filterMap(inputData.data, l => l.input?.raw),
+                isStreamingSpec(spec),
+              );
               if (issue) {
                 AppToaster.show({
                   icon: IconNames.WARNING_SIGN,
                   intent: Intent.WARNING,
                   message: issue,
-                  timeout: 10000,
+                  timeout: 30000,
                 });
                 return false;
               }
diff --git a/web-console/src/views/workbench-view/input-source-step/input-source-step.tsx b/web-console/src/views/workbench-view/input-source-step/input-source-step.tsx
index bb36e77..b8ed5a7 100644
--- a/web-console/src/views/workbench-view/input-source-step/input-source-step.tsx
+++ b/web-console/src/views/workbench-view/input-source-step/input-source-step.tsx
@@ -42,6 +42,7 @@
   getIngestionTitle,
   guessSimpleInputFormat,
   INPUT_SOURCE_FIELDS,
+  issueWithSampleData,
   PLACEHOLDER_TIMESTAMP_SPEC,
 } from '../../../druid-models';
 import {
@@ -50,7 +51,7 @@
   submitTaskQuery,
 } from '../../../helpers';
 import { useQueryManager } from '../../../hooks';
-import { UrlBaser } from '../../../singletons';
+import { AppToaster, UrlBaser } from '../../../singletons';
 import { filterMap, IntermediateQueryState } from '../../../utils';
 import type { SampleSpec } from '../../../utils/sampler';
 import { postToSampler } from '../../../utils/sampler';
@@ -93,7 +94,7 @@
     Execution
   >({
     processQuery: async ({ inputSource, suggestedInputFormat }, cancelToken) => {
-      let guessedInputFormat: InputFormat | undefined;
+      let sampleLines: string[];
       if (mode === 'sampler') {
         const sampleSpec: SampleSpec = {
           type: 'index_parallel',
@@ -125,12 +126,7 @@
 
         const sampleResponse = await postToSampler(sampleSpec, 'input-source-step');
 
-        const sampleLines: string[] = filterMap(sampleResponse.data, l =>
-          l.input ? l.input.raw : undefined,
-        );
-
-        if (!sampleLines.length) throw new Error('No data returned from sampler');
-        guessedInputFormat = guessSimpleInputFormat(sampleLines);
+        sampleLines = filterMap(sampleResponse.data, l => (l.input ? l.input.raw : undefined));
       } else {
         const tableExpression = externalConfigToTableExpression({
           inputSource,
@@ -154,9 +150,24 @@
         );
 
         if (result instanceof IntermediateQueryState) return result;
-        guessedInputFormat = resultToInputFormat(result);
+        sampleLines = result.rows.map((r: string[]) => r[0]);
       }
 
+      if (!sampleLines.length) throw new Error('No data returned from sampler');
+
+      const issue = issueWithSampleData(sampleLines, false);
+      if (issue) {
+        AppToaster.show({
+          icon: IconNames.WARNING_SIGN,
+          intent: Intent.WARNING,
+          message: issue,
+          timeout: 30000,
+        });
+        throw new Error(`Issue detected in sample data.`);
+      }
+
+      const guessedInputFormat = guessSimpleInputFormat(sampleLines);
+
       if (suggestedInputFormat?.type === guessedInputFormat.type) {
         return suggestedInputFormat;
       }
@@ -265,18 +276,18 @@
                 <p>Your raw data can be in any of the following formats:</p>
                 <ul>
                   <li>
-                    <ExternalLink href="http://ndjson.org/">JSON (new line delimited)</ExternalLink>
+                    <ExternalLink href="https://jsonlines.org">JSON Lines</ExternalLink>
                   </li>
                   <li>CSV</li>
                   <li>TSV</li>
                   <li>
-                    <ExternalLink href="https://parquet.apache.org/">Parquet</ExternalLink>
+                    <ExternalLink href="https://parquet.apache.org">Parquet</ExternalLink>
                   </li>
                   <li>
-                    <ExternalLink href="https://orc.apache.org/">ORC</ExternalLink>
+                    <ExternalLink href="https://orc.apache.org">ORC</ExternalLink>
                   </li>
                   <li>
-                    <ExternalLink href="https://avro.apache.org/">Avro</ExternalLink>
+                    <ExternalLink href="https://avro.apache.org">Avro</ExternalLink>
                   </li>
                   <li>
                     Any line format that can be parsed with a custom regular expression (regex)