blob: 5512c636f10af13d6558abbed57d785f645ac337 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.metron.parsers;
import org.apache.commons.io.FileUtils;
import org.apache.metron.parsers.interfaces.MessageParser;
import org.apache.metron.parsers.interfaces.MessageParserResult;
import org.json.simple.JSONObject;
import org.junit.Rule;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.migrationsupport.rules.EnableRuleMigrationSupport;
import org.junit.rules.TemporaryFolder;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.*;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.not;
import static org.hamcrest.MatcherAssert.assertThat;
@EnableRuleMigrationSupport
public class BasicParserTest {
private static final String KEY1 = "key1";
private static class SomeParserWithCharset extends BasicParser {
@Override
public void init() {
}
@Override
public void configure(Map<String, Object> config) {
setReadCharset(config);
}
@Override
public Optional<MessageParserResult<JSONObject>> parseOptionalResult(byte[] parseMessage) {
String message = new String(parseMessage, getReadCharset());
Map<String, Object> out = new HashMap<>();
out.put(KEY1, message);
MessageParserResult<JSONObject> result = new DefaultMessageParserResult<>(
Collections.singletonList(new JSONObject(out)));
return Optional.of(result);
}
}
private static class SomeParserNoCharset extends SomeParserWithCharset {
@Override
public void configure(Map<String, Object> config) {
// don't set the charset
}
}
private static final String SAMPLE_DATA = "Here is some sample data";
private SomeParserWithCharset parserWithCharset;
private SomeParserNoCharset parserNoCharset;
private Map<String, Object> parserConfig;
private File fileUTF_16;
private File fileUTF_8;
@Rule
public TemporaryFolder tempFolder = new TemporaryFolder();
@BeforeEach
public void setup() throws IOException {
tempFolder.create();
parserWithCharset = new SomeParserWithCharset();
parserNoCharset = new SomeParserNoCharset();
parserConfig = new HashMap<>();
fileUTF_16 = new File(tempFolder.getRoot(), "fileUTF-16");
fileUTF_8 = new File(tempFolder.getRoot(), "fileUTF-8");
writeDataEncodedAs(fileUTF_16, SAMPLE_DATA, StandardCharsets.UTF_16);
writeDataEncodedAs(fileUTF_8, SAMPLE_DATA, StandardCharsets.UTF_8);
}
private void writeDataEncodedAs(File file, String data, Charset charset) throws IOException {
byte[] bytes = data.getBytes(charset);
FileUtils.writeByteArrayToFile(file, bytes);
}
@Test
public void verify_encoding_translation_assumptions() throws IOException {
// read in file encoded as UTF_16 bytes to a String using UTF_8 and UTF_16 encoding
// the UTF_8 translation here should be a garbled mess because UTF_16 needs to have a
// translation step for it to be correct in UTF_8
String utf16_8 = readDataEncodedAs(fileUTF_16, StandardCharsets.UTF_8);
String utf16_16 = readDataEncodedAs(fileUTF_16, StandardCharsets.UTF_16);
File utf16_16_8 = new File(tempFolder.getRoot(), "outUTF-8");
writeDataEncodedAs(utf16_16_8, utf16_16, StandardCharsets.UTF_8);
String utf8_8 = readDataEncodedAs(utf16_16_8, StandardCharsets.UTF_8);
assertThat(utf8_8, equalTo(utf16_16));
assertThat(utf8_8, not(equalTo(utf16_8)));
assertThat(utf8_8, equalTo(utf16_16));
assertThat(utf8_8, not(equalTo(utf16_8)));
}
private String readDataEncodedAs(File file, Charset charset) throws IOException {
return FileUtils.readFileToString(file, charset);
}
@Test
public void parses_with_specified_encoding() {
parserConfig.put(MessageParser.READ_CHARSET, StandardCharsets.UTF_16.toString());
parserWithCharset.configure(parserConfig);
Optional<MessageParserResult<JSONObject>> result = parserWithCharset
.parseOptionalResult(SAMPLE_DATA.getBytes(StandardCharsets.UTF_16));
MessageParserResult<JSONObject> json = result.get();
assertThat(json.getMessages().size(), equalTo(1));
assertThat(json.getMessages().get(0).get(KEY1), equalTo(SAMPLE_DATA));
}
@Test
public void values_will_not_match_when_specified_encoding_is_wrong() {
parserConfig.put(MessageParser.READ_CHARSET, StandardCharsets.UTF_8.toString());
parserWithCharset.configure(parserConfig);
Optional<MessageParserResult<JSONObject>> result = parserWithCharset
.parseOptionalResult(SAMPLE_DATA.getBytes(StandardCharsets.UTF_16));
MessageParserResult<JSONObject> json = result.get();
assertThat(json.getMessages().size(), equalTo(1));
assertThat(json.getMessages().get(0).get(KEY1), not(equalTo(SAMPLE_DATA)));
}
@Test
public void parses_with_default_encoding_when_not_configured() {
parserWithCharset.configure(parserConfig);
Optional<MessageParserResult<JSONObject>> result = parserWithCharset
.parseOptionalResult(SAMPLE_DATA.getBytes(StandardCharsets.UTF_8));
MessageParserResult<JSONObject> json = result.get();
assertThat(json.getMessages().size(), equalTo(1));
assertThat(json.getMessages().get(0).get(KEY1), equalTo(SAMPLE_DATA));
}
@Test
public void parses_with_default_encoding_from_basic_parser() {
parserNoCharset.configure(parserConfig);
Optional<MessageParserResult<JSONObject>> result = parserNoCharset
.parseOptionalResult(SAMPLE_DATA.getBytes(StandardCharsets.UTF_8));
MessageParserResult<JSONObject> json = result.get();
assertThat(json.getMessages().size(), equalTo(1));
assertThat(json.getMessages().get(0).get(KEY1), equalTo(SAMPLE_DATA));
}
}