blob: cda97e0e7668d64146521e77fcd77403771d2ba0 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tajo.storage.regex;
import org.apache.tajo.catalog.Column;
import org.apache.tajo.catalog.Schema;
import org.apache.tajo.catalog.TableMeta;
import org.apache.tajo.storage.text.TextLineDeserializer;
import org.apache.tajo.storage.text.TextLineSerDe;
import org.apache.tajo.storage.text.TextLineSerializer;
/**
* This is an implementation copied from hive RegexSerDe
*
* RegexSerDe uses regular expression (regex) to serialize/deserialize.
*
* It can deserialize the data using regex and extracts groups as columns. It
* can also serialize the tuple using a format string.
*
* In deserialization stage, if a row does not match the regex, then all columns
* in the row will be NULL. If a row matches the regex but has less than
* expected groups, the missing groups will be NULL. If a row matches the regex
* but has more than expected groups, the additional groups are just ignored.
*
* In serialization stage, it uses java string formatter to format the columns
* into a row. If the output type of the column in a query is not a string, it
* will be automatically converted to String by tajo.
*
* For the format of the format String, please refer to
* {@link http://java.sun.com/j2se/1.5.0/docs/api/java/util/Formatter.html#syntax}
*/
public class RegexLineSerDe extends TextLineSerDe {
@Override
public TextLineDeserializer createDeserializer(Schema schema, TableMeta meta, Column [] projected) {
return new RegexLineDeserializer(schema, meta, projected);
}
@Override
public TextLineSerializer createSerializer(Schema schema, TableMeta meta) {
return new RegexLineSerializer(schema, meta);
}
}