blob: aaa9d62a598e0e6c4fe88e48fb5a96aa88c1b7ef [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.scripting.jruby;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.Arrays;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.util.Utils;
import org.apache.pig.data.DataType;
import org.apache.pig.parser.ParserException;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.jruby.Ruby;
import org.jruby.RubyHash;
import org.jruby.RubyArray;
import org.jruby.RubyClass;
import org.jruby.RubyFixnum;
import org.jruby.RubyModule;
import org.jruby.RubyObject;
import org.jruby.RubyRange;
import org.jruby.RubyString;
import org.jruby.RubySymbol;
import org.jruby.anno.JRubyClass;
import org.jruby.anno.JRubyMethod;
import org.jruby.runtime.ObjectAllocator;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.Block;
import org.jruby.runtime.builtin.IRubyObject;
//TODO implement all of the merge functions
/**
* This class encapsulated a native Schema object, and provides a more convenient
* interface for manipulating Schemas. It hides the Schema/FieldSchema distinction
* from the user, and tries to present a cleaner, more Ruby-esque API to the user.
* For general information on JRuby's API definition annotations,
* see {@link RubyDataBag}.
*/
@JRubyClass(name = "Schema")
public class RubySchema extends RubyObject {
private static final long serialVersionUID = 1L;
/**
* This is a pattern used in the conversion from ruby arguments to a valid Schema. It detects
* cases where there is a bag, map, or tuple without being followed by {}, [], or () respectively.
* It is used for convenience.
*/
private static final Pattern bmtPattern = Pattern.compile("(?:\\S+:)?(bag|map|tuple)\\s*(?:,|$)", Pattern.CASE_INSENSITIVE);
/**
* This is the encapsulated Schema object.
*/
private Schema internalSchema;
private static final ObjectAllocator ALLOCATOR = new ObjectAllocator() {
public IRubyObject allocate(Ruby runtime, RubyClass klass) {
return new RubySchema(runtime, klass);
}
};
/**
* This method registers the class with the given runtime.
*
* @param runtime an instance of the Ruby runtime
* @return a RubyClass object with metadata about the registered class
*/
public static RubyClass define(Ruby runtime) {
RubyClass result = runtime.defineClass("Schema",runtime.getObject(), ALLOCATOR);
result.kindOf = new RubyModule.KindOf() {
public boolean isKindOf(IRubyObject obj, RubyModule type) {
return obj instanceof RubySchema;
}
};
result.includeModule(runtime.getEnumerable());
result.defineAnnotatedMethods(RubySchema.class);
return result;
}
protected RubySchema(final Ruby ruby, RubyClass rc) {
super(ruby,rc);
internalSchema = new Schema();
}
/**
* This constructor sets the encapsulated Schema to be equal to
* the given Schema. If copy is true, it is set equal to a copy.
* If it is false, it is set directly equal.
*
* @param ruby an instance of the ruby runtime
* @param rc an instance of the class object with meatadata
* @param s a Schema to encapsulate
* @param copy a boolean value. If true, s will be copied and the copy
* will be encapsulated. If false, it will be encapsulated
* directly.
*/
protected RubySchema(final Ruby ruby, RubyClass rc, Schema s, boolean copy) {
super(ruby,rc);
if (copy) {
internalSchema = new Schema(s);
} else {
internalSchema = s;
}
}
/**
* This constructor sets the encapsulated Schema to be equal to the
* given Schema.
*
* @param ruby an instance of the ruby runtime
* @param rc an instance of the class object with meatadata
* @param s a Schema to encapsulate
*/
protected RubySchema(final Ruby ruby, RubyClass rc, Schema s) {
this(ruby, rc, s, true);
}
/**
* This constructor is provided for convenience and sets the
* internal Schema equal to the result of a call to
* {@link Utils#getSchemaFromString}.
*
* @param ruby an instance of the ruby runtime
* @param rc an instance of the class object with meatadata
* @param s a String which will be passed to
* {@link Utils#getSchemaFromString}
*/
protected RubySchema(final Ruby ruby, RubyClass rc, String s) {
super(ruby, rc);
try {
internalSchema = Utils.getSchemaFromString(s);
} catch (ParserException e) {
throw new RuntimeException("Error converting String to Schema: " + s, e);
}
}
/**
* The ruby initializer accepts any number of arguments. With no arguments,
* it will return an empty Schema object. It can accept any number of arguments.
* To understand the valid arguments, see the documentation for {@link #rubyArgToSchema}.
*
* @param args a varargs which can take any number of valid arguments to
* {@link #rubyArgToSchema}
* @return the initialized RubySchema
*/
@JRubyMethod(rest = true)
public RubySchema initialize(IRubyObject[] args) {
internalSchema = new Schema();
for (IRubyObject arg : args) {
Schema rs = rubyArgToSchema(arg);
for (Schema.FieldSchema i : rs.getFields())
internalSchema.add(i);
}
RubySchema.fixSchemaNames(internalSchema);
return this;
}
/**
* This is a static helper method to create a null aliased bytearray Schema.
* This is useful in cases where you do not want the output to have an explicit
* name, which {@link Utils#getSchemaFromString} will assign.
*
* @param context the context the method is being executed in
* @param self an instance of the RubyClass with metadata on
* the Ruby class object this method is being
* statically invoked against
* @return a null-aliased bytearray schema
*/
@JRubyMethod(meta = true, name = {"by", "bytearray"})
public static RubySchema nullBytearray(ThreadContext context, IRubyObject self) {
return makeNullAliasRubySchema(context, DataType.BYTEARRAY);
}
/**
* This is a static helper method to create a null aliased Boolean Schema.
* This is useful in cases where you do not want the output to have an explicit
* name, which {@link Utils#getSchemaFromString} will assign.
*
* @param context the context the method is being executed in
* @param self an instance of the RubyClass with metadata on
* the Ruby class object this method is being
* statically invoked against
* @return a null-aliased bytearray schema
*/
@JRubyMethod(meta = true, name = {"bool", "boolean"})
public static RubySchema nullBoolean(ThreadContext context, IRubyObject self) {
return makeNullAliasRubySchema(context, DataType.BOOLEAN);
}
/**
* This is a static helper method to create a null aliased chararray Schema.
* This is useful in cases where you do not want the output to have an explicit
* name, which {@link Utils#getSchemaFromString} will assign.
*
* @param context the context the method is being executed in
* @param self an instance of the RubyClass with metadata on
* the Ruby class object this method is being
* statically invoked against
* @return a null-aliased bytearray schema
*/
@JRubyMethod(meta = true, name = {"c", "chararray"})
public static RubySchema nullChararray(ThreadContext context, IRubyObject self) {
return makeNullAliasRubySchema(context, DataType.CHARARRAY);
}
/**
* This is a static helper method to create a null aliased long Schema.
* This is useful in cases where you do not want the output to have an explicit
* name, which {@link Utils#getSchemaFromString} will assign.
*
* @param context the context the method is being executed in
* @param self an instance of the RubyClass with metadata on
* the Ruby class object this method is being
* statically invoked against
* @return a null-aliased bytearray schema
*/
@JRubyMethod(meta = true, name = {"l", "long"})
public static RubySchema nullLong(ThreadContext context, IRubyObject self) {
return makeNullAliasRubySchema(context, DataType.LONG);
}
/**
* This is a static helper method to create a null aliased int Schema.
* This is useful in cases where you do not want the output to have an explicit
* name, which {@link Utils#getSchemaFromString} will assign.
*
* @param context the context the method is being executed in
* @param self an instance of the RubyClass with metadata on
* the Ruby class object this method is being
* statically invoked against
* @return a null-aliased bytearray schema
*/
@JRubyMethod(meta = true, name = {"i", "int"})
public static RubySchema nullInt(ThreadContext context, IRubyObject self) {
return makeNullAliasRubySchema(context, DataType.INTEGER);
}
/**
* This is a static helper method to create a null aliased double Schema.
* This is useful in cases where you do not want the output to have an explicit
* name, which {@link Utils#getSchemaFromString} will assign.
*
* @param context the context the method is being executed in
* @param self an instance of the RubyClass with metadata on
* the Ruby class object this method is being
* statically invoked against
* @return a null-aliased bytearray schema
*/
@JRubyMethod(meta = true, name = {"d", "double"})
public static RubySchema nullDouble(ThreadContext context, IRubyObject self) {
return makeNullAliasRubySchema(context, DataType.DOUBLE);
}
/**
* This is a static helper method to create a null aliased float Schema.
* This is useful in cases where you do not want the output to have an explicit
* name, which {@link Utils#getSchemaFromString} will assign.
*
* @param context the context the method is being executed in
* @param self an instance of the RubyClass with metadata on
* the Ruby class object this method is being
* statically invoked against
* @return a null-aliased bytearray schema
*/
@JRubyMethod(meta = true, name = {"f", "float"})
public static RubySchema nullFloate(ThreadContext context, IRubyObject self) {
return makeNullAliasRubySchema(context, DataType.FLOAT);
}
/**
* This is a static helper method to create a null aliased datetime Schema.
* This is useful in cases where you do not want the output to have an explicit
* name, which {@link Utils#getSchemaFromString} will assign.
*
* @param context the context the method is being executed in
* @param self an instance of the RubyClass with metadata on
* the Ruby class object this method is being
* statically invoked against
* @return a null-aliased bytearray schema
*/
@JRubyMethod(meta = true, name = {"dt", "datetime"})
public static RubySchema nullDateTime(ThreadContext context, IRubyObject self) {
return makeNullAliasRubySchema(context, DataType.DATETIME);
}
/**
* This is a static helper method to create a null aliased tuple Schema.
* This is useful in cases where you do not want the output to have an explicit
* name, which {@link Utils#getSchemaFromString} will assign.
*
* @param context the context the method is being executed in
* @param self an instance of the RubyClass with metadata on
* the Ruby class object this method is being
* statically invoked against
* @return a null-aliased bytearray schema
*/
@JRubyMethod(meta = true, name = {"t", "tuple"})
public static RubySchema nullTuple(ThreadContext context, IRubyObject self) {
return makeNullAliasRubySchema(context, DataType.TUPLE);
}
/**
* This is a static helper method to create a null aliased bag Schema.
* This is useful in cases where you do not want the output to have an explicit
* name, which {@link Utils#getSchemaFromString} will assign.
*
* @param context the context the method is being executed in
* @param self an instance of the RubyClass with metadata on
* the Ruby class object this method is being
* statically invoked against
* @return a null-aliased bytearray schema
*/
@JRubyMethod(meta = true, name = {"b", "bag"})
public static RubySchema nullBag(ThreadContext context, IRubyObject self) {
return makeNullAliasRubySchema(context, DataType.BAG);
}
/**
* This is a static helper method to create a null aliased map Schema.
* This is useful in cases where you do not want the output to have an explicit
* name, which {@link Utils#getSchemaFromString} will assign.
*
* @param context the context the method is being executed in
* @param self an instance of the RubyClass with metadata on
* the Ruby class object this method is being
* statically invoked against
* @return a null-aliased bytearray schema
*/
@JRubyMethod(meta = true, name = {"m", "map"})
public static RubySchema nullMap(ThreadContext context, IRubyObject self) {
return makeNullAliasRubySchema(context, DataType.MAP);
}
/**
* This is a helper method to generate a RubySchema of the given type without an alias.
*
* @param context the context the method is being executed in
* @param type the DataType.PIGTYPE value to make the Schema from
* @return a RubySchema object encapsulated a Schema of the specified type
*/
private static RubySchema makeNullAliasRubySchema(ThreadContext context, byte type) {
Ruby runtime = context.getRuntime();
return new RubySchema(runtime, runtime.getClass("Schema"), new Schema(new Schema.FieldSchema(null, type)));
}
/**
* This is a helper function which converts objects into Schema objects. The valid
* options are as follows:
* <p>
* A RubyString, which will have {@link Utils#getSchemaFromString} called on it, and
* it will be added.
* <p>
* A RubySchema, which will be added directly. IMPORTANT NOTE: since this API abstracts
* away from the distinction between Schema/FieldSchema, its important to understand
* how a Schema is added to another. In this case, the FieldSchema is pulled directly
* out of the given Schema. Thus, where in Pig a Schema.FieldSchema might be passed around,
* internally to this class, generally a Schema will be passed around encapsulating it.
* <p>
* A list will create the Schema for a Tuple whose elements will be the elements of the
* list. Each element will be subjected to the same rules applied here.
* <p>
* A hash in the form of:<br>
* <code>{"name:tuple"=>["x:int","y:int,z:int"], "name2:bag"=>["a:chararray"]}</code><br>
* The keys must be a tuple, bag, or map, and the value must be an array.
*
* @param arg an object (generally an IRubyObject or String) to convert. See above for
the rules on valid arguments
* @return the Schema constructed for the given argument
*/
public static Schema rubyArgToSchema(Object arg) {
try {
/**
* Given a String or a RubyString, calls {@link Utils#getSchemaFromString}.
* Additionally, as a convenience to the user, this method uses a regex to
* detect any case where a schema declaration of "bag", "tuple", or "map"
* does not have the trailing "{}", "()", or "[]" that
* {@link Utils#getSchemaFromString} requires.
*/
if (arg instanceof String || arg instanceof RubyString) {
String s = arg.toString();
Matcher m = bmtPattern.matcher(s);
while (m.find()) {
String type = m.group(1);
String inter = s.substring(0, m.start(1));
if (type.equalsIgnoreCase("bag")) {
inter += "{}";
} else if (type.equalsIgnoreCase("map")) {
inter += "[]";
} else if (type.equalsIgnoreCase("tuple")) {
inter += "()";
} else {
throw new RuntimeException("Arriving here should be impossible");
}
s = inter + s.substring(m.end(1));
m = bmtPattern.matcher(s);
}
return Utils.getSchemaFromString(s);
// In the case of a RubySchema, can just return the encapsulated Schema
} else if (arg instanceof RubySchema) {
return ((RubySchema)arg).getInternalSchema();
// In the case of a RubyArray, the elements of the array are passed to this
// method, and they will be treated as elements of a Tuple Schema.
} else if (arg instanceof RubyArray) {
RubyArray ary = (RubyArray)arg;
Schema s = new Schema();
for (Object o : ary) {
Schema ts = rubyArgToSchema(o);
for (Schema.FieldSchema fs : ts.getFields()) {
s.add(fs);
}
}
return new Schema(new Schema.FieldSchema("tuple_0", s, DataType.TUPLE));
/**
* In the case of a RubyHash, the key serves defines a Schema that will encapsulate
* other elements. This mainly is for the convenience of being able to name
* bags, maps, and tuples while easily being able to have interchangeable elements.
* The key will be given to this method, but must return a singular map, tuple, or
* bag, or an error will be thrown. The value to that key must be an array, and
* each element will be passed to this method and then added to the Schema for
* the key.
*/
} else if (arg instanceof RubyHash) {
RubyHash hash = (RubyHash)arg;
Schema hashSchema = new Schema();
for (Object o : hash.keySet()) {
Schema s = rubyArgToSchema(o);
if (s.size() != 1) {
throw new RuntimeException("Hash key must be singular");
}
Schema.FieldSchema fs = s.getField(0);
Object v = hash.get(o);
if (v instanceof RubyArray) {
byte type = fs.type;
if (type == DataType.BAG) {
fs.schema = rubyArgToSchema(v);
} else if (type == DataType.TUPLE || type == DataType.MAP) {
fs.schema = rubyArgToSchema(v).getField(0).schema;
} else {
throw new RuntimeException("Hash key must be tuple map or bag");
}
} else {
throw new RuntimeException("Hash value must be an Array");
}
hashSchema.add(fs);
}
return hashSchema;
} else {
throw new RuntimeException("Bad argument given to rubyToSchema: " + arg + (arg != null ? " class type " + arg.getClass().toString() : ""));
}
} catch (IOException e) {
throw new RuntimeException("Error converting ruby to Schema: " + arg, e);
}
}
/**
* This is a ruby method which takes a name and an array of arguments and constructs a Tuple schema
* from them.
*
* @param context the context the method is being executed in
* @param self the RubyClass for the Class object this was invoked on
* @param arg1 the name for the RubySchema
* @param arg2 a list of arguments to instantiate the new RubySchema
* @return the new Tuple RubySchema
*/
@JRubyMethod(meta = true, name = {"t", "tuple"})
public static RubySchema tuple(ThreadContext context, IRubyObject self, IRubyObject arg1, IRubyObject arg2) {
RubySchema rs = tuple(context, self, arg2);
rs.setNameIf(arg1);
return rs;
}
/**
* This is a ruby method which takes an array of arguments and constructs a Tuple schema from them. The name
* will be set automatically.
*
* @param context the context the method is being executed in
* @param self the RubyClass for the Class object this was invoked on
* @param arg a list of arguments to instantiate the new RubySchema
* @return the new RubySchema
*/
@JRubyMethod(meta = true, name = {"t", "tuple"})
public static RubySchema tuple(ThreadContext context, IRubyObject self, IRubyObject arg) {
if (arg instanceof RubyArray) {
Schema s = rubyArgToSchema(arg);
Ruby runtime = context.getRuntime();
return new RubySchema(runtime, runtime.getClass("Schema"), s);
} else {
throw new RuntimeException("Bad argument given to Schema.tuple");
}
}
/**
* This is a ruby method which takes a name and an array of arguments and constructs a Map schema
* from them.
*
* @param context the context the method is being executed in
* @param self the RubyClass for the Class object this was invoked on
* @param arg1 the name for the RubySchema
* @param arg2 a list of arguments to instantiate the new RubySchema
* @return the new RubySchema
*/
@JRubyMethod(meta = true, name = {"m", "map"})
public static RubySchema map(ThreadContext context, IRubyObject self, IRubyObject arg1, IRubyObject arg2) {
RubySchema rs = map(context, self, arg2);
rs.setNameIf(arg1);
return rs;
}
/**
* This is a ruby method which takes an array of arguments and constructs a Map schema from them. The name
* will be set automatically.
*
* @param context the context the method is being executed in
* @param self the RubyClass for the Class object this was invoked on
* @param arg a list of arguments to instantiate the new RubySchema
* @return the new RubySchema
*/
@JRubyMethod(meta = true, name = {"m", "map"})
public static RubySchema map(ThreadContext context, IRubyObject self, IRubyObject arg) {
Schema s = tuple(context, self, arg).getInternalSchema();
Ruby runtime = context.getRuntime();
try {
return new RubySchema(runtime, runtime.getClass("Schema"), new Schema(new Schema.FieldSchema("map_0", s.getField(0).schema, DataType.MAP)));
} catch (FrontendException e) {
throw new RuntimeException("Error making map", e);
}
}
/**
* This is a ruby method which takes a name and an array of arguments and constructs a Bag schema
* from them.
*
* @param context the context the method is being executed in
* @param self the RubyClass for the Class object this was invoked on
* @param arg1 the name for the RubySchema
* @param arg2 a list of arguments to instantiate the new RubySchema
* @return the new RubySchema
*/
@JRubyMethod(meta = true, name={"b", "bag"})
public static RubySchema bag(ThreadContext context, IRubyObject self, IRubyObject arg1, IRubyObject arg2) {
RubySchema rs = bag(context, self, arg2);
rs.setNameIf(arg1);
return rs;
}
/**
* This is a ruby method which takes an array of arguments and constructs a Bag schema from them. The name
* will be set automatically.
*
* @param context the context the method is being executed in
* @param self the RubyClass for the Class object this was invoked on
* @param arg a list of arguments to instantiate the new RubySchema
* @return the new RubySchema
*/
@JRubyMethod(meta = true, name = {"b", "bag"})
public static RubySchema bag(ThreadContext context, IRubyObject self, IRubyObject arg) {
Schema s = tuple(context, self, arg).getInternalSchema();
Ruby runtime = context.getRuntime();
try {
return new RubySchema(runtime, runtime.getClass("Schema"), new Schema(new Schema.FieldSchema("bag_0", s, DataType.BAG)));
} catch (FrontendException e) {
throw new RuntimeException("Error making map", e);
}
}
/**
* This method will fix any name conflicts in a schema. It's important to note that
* this will change the Schema object itself. It will deal with any collisions in things
* named tuple_#, bag_#, map_#, or val_#, as these are generally names generated by
* Util.getSchemaFromString. In the case of another name conflict, it will not be
* changed, as that name conflict was created by the user.
*
* @param s a Schema object to fix in place
*/
private static void fixSchemaNames(Schema s) {
if (s == null)
return;
// This regex detects names that could possibly collide that we should change
Pattern p = Pattern.compile("(bag_|tuple_|map_|val_)(\\d+)", Pattern.CASE_INSENSITIVE);
Set<String> names = new HashSet<String>(s.size(), 1.0f);
for (Schema.FieldSchema fs : s.getFields()) {
if (fs.alias == null)
continue;
Matcher m = p.matcher(fs.alias);
if (m.matches() && names.contains(fs.alias)) {
String prefix = m.group(1);
int suffix = Integer.parseInt(m.group(2));
while (names.contains(prefix + suffix))
suffix++;
fs.alias = prefix + suffix;
}
names.add(fs.alias);
if (fs.schema != null) {
if (fs.type == DataType.BAG) {
try {
fixSchemaNames(fs.schema.getField(0).schema);
} catch (FrontendException e) {
throw new RuntimeException("Error recursively fixing schema: " + s, e);
}
} else {
fixSchemaNames(fs.schema);
}
}
}
}
/**
* This is just a convenience method which sets the name of the internalSchema to the argument that was given.
*
* @param arg a RubyString to set the name of the encapsulated Schema object
*/
private void setNameIf(IRubyObject arg) {
if (arg instanceof RubyString) {
setName(arg.toString());
} else {
throw new RuntimeException("Bad name given");
}
}
/**
* This method sets the name of a RubySchema to the name given. It's important to note that
* if the RubySchema represents anything other than a tuple, databag, or map then an error
* will be thrown.
*
* @param name a String to set the name of the encapsulated Schema object
*/
private void setName(String name) {
Schema.FieldSchema fs;
try {
fs = internalSchema.getField(0);
} catch (FrontendException e) {
throw new RuntimeException("Error getting field from schema: " + internalSchema, e);
}
byte type = fs.type;
if (type == DataType.TUPLE || type == DataType.BAG || type == DataType.MAP) {
fs.alias = name;
} else {
throw new RuntimeException("setName cannot be set on Schema: " + internalSchema);
}
}
/**
* The toString method just leverages Schema's printing.
*
* @param context the context the method is being executed in
* @return a String representation of the encapsulated Schema object
*/
@JRubyMethod(name = {"to_s", "inspect"})
public RubyString toString(ThreadContext context) {
return RubyString.newString(context.getRuntime(), internalSchema.toString());
}
/**
* This is the ruby method which allows people to access elements of the RubySchema object.
* It can be given either a single numeric index, or a Range object to specify a range of indices.
* It's important to note that the Schema object returned from this references the Schema stored
* internally, so if the user wants to make changes without affecting this object, it must be cloned.
*
* @param context the context the method is being executed in
* @param arg a Fixnum index, Range object to specify a range of values to return, or
* a String to look up by alias name
* @return the RubySchema object encapsulated the found Schema
*/
@JRubyMethod(name = {"[]", "slice"})
public RubySchema get(ThreadContext context, IRubyObject arg) {
Ruby runtime = context.getRuntime();
if (arg instanceof RubyFixnum) {
int index = (int)((RubyFixnum)arg).getLongValue();
Schema s;
try {
s = new Schema(internalSchema.getField(index));
} catch (FrontendException e) {
throw new RuntimeException("Invalid index given to get function: " + index, e);
}
return new RubySchema(runtime, runtime.getClass("Schema"), s, false); //returns the actual object itself
} else if (arg instanceof RubyRange) {
int min = (int)((RubyFixnum)((RubyRange)arg).min(context, Block.NULL_BLOCK)).getLongValue();
int max = (int)((RubyFixnum)((RubyRange)arg).max(context, Block.NULL_BLOCK)).getLongValue();
return new RubySchema(runtime, runtime.getClass("Schema"), new Schema(internalSchema.getFields().subList(min, max + 1)), false);
} else if (arg instanceof RubyString) {
try {
return new RubySchema(runtime, runtime.getClass("Schema"), new Schema(internalSchema.getField(arg.toString())), false);
} catch (FrontendException e) {
throw new RuntimeException("Unable to find field " + arg.toString() + " in schema " + internalSchema, e);
}
} else {
throw new RuntimeException("Invalid argument given to get function: " + arg.toString());
}
}
/**
* This is a version of [] which allows the range to be specified as such: [1,2].
*
* @param context the context the method is being executed in
* @param arg1 a Fixnum start index
* @param arg2 a Fixnum end index
* @return the RubySchema object encapsulated the found Schema
*/
@JRubyMethod(name = {"[]", "slice"})
public RubySchema get(ThreadContext context, IRubyObject arg1, IRubyObject arg2) {
if (arg1 instanceof RubyFixnum && arg2 instanceof RubyFixnum) {
Ruby runtime = context.getRuntime();
int min = (int)((RubyFixnum)arg1).getLongValue();
int max = (int)((RubyFixnum)arg2).getLongValue() - 1;
return new RubySchema(runtime, runtime.getClass("Schema"), new Schema(internalSchema.getFields().subList(min, max + 1)), false);
} else {
throw new RuntimeException("Bad arguments given to get function: ( " + arg1.toString() + " , " + arg2.toString()+ " )");
}
}
/**
* This allows the users to set an index or a range of values to
* a specified RubySchema. The first argument must be a Fixnum or Range,
* and the second argument may optionally be a Fixnum. The given index
* (or range of indices) will be replaced by a RubySchema instantiated
* based on the remaining arguments.
*
* @param context the contextthe method is being executed in
* @param args a varargs which has to be at least length two.
* @return the RubySchema that was added
*/
@JRubyMethod(name = {"[]=", "set"}, required = 2, rest = true)
public RubySchema set(ThreadContext context, IRubyObject[] args) {
IRubyObject arg1 = args[0];
IRubyObject arg2 = args[1];
IRubyObject[] arg3 = Arrays.copyOfRange(args, 1, args.length);
Schema s = internalSchema;
Ruby runtime = context.getRuntime();
List<Schema.FieldSchema> lfs = s.getFields();
int min, max;
if (arg1 instanceof RubyFixnum && arg2 instanceof RubyFixnum) {
min = (int)((RubyFixnum)arg1).getLongValue();
max = (int)((RubyFixnum)arg2).getLongValue();
arg3 = Arrays.copyOfRange(args, 2, args.length);
} else if (arg1 instanceof RubyFixnum) {
min = (int)((RubyFixnum)arg1).getLongValue();
max = min + 1;
} else if (arg1 instanceof RubyRange) {
min = (int)((RubyFixnum)((RubyRange)arg1).min(context, Block.NULL_BLOCK)).getLongValue();
max = (int)((RubyFixnum)((RubyRange)arg1).max(context, Block.NULL_BLOCK)).getLongValue() + 1;
} else {
throw new RuntimeException("Bad arguments given to get function: ( " + arg1.toString() + " , " + arg2.toString()+ " )");
}
for (int i = min; i < max; i++)
lfs.remove(min);
if (arg3 == null || arg3.length == 0)
throw new RuntimeException("Must have schema argument for []=");
RubySchema rs = new RubySchema(runtime, runtime.getClass("Schema")).initialize(arg3);
for (Schema.FieldSchema fs : rs.getInternalSchema().getFields())
lfs.add(min++, fs);
RubySchema.fixSchemaNames(internalSchema);
return rs;
}
/**
* This method provides addition semantics, without modifying the original Schema.
* This method can be given any number of arguments, much as with the constructor.
*
* @param context the context the method is being executed in
* @param args a varargs which can be any valid set of arguments that
* can initialize a RubySchema
* @return the Rresult of the addition
*/
@JRubyMethod(name = {"add", "+"}, rest = true)
public RubySchema add(ThreadContext context, IRubyObject[] args) {
RubySchema rsClone = clone(context);
rsClone.addInPlace(context, args);
return rsClone;
}
/**
* This method provides addition semantics, modifying the original Schema in place.
* This method can be given any number of arguments, much as with the constructor.
*
* @param context the context the method is being executed in
* @param args a varargs which can be any valid set of arguments that
* can initialize a RubySchema
*/
@JRubyMethod(name = "add!", rest = true)
public void addInPlace(ThreadContext context, IRubyObject[] args) {
Ruby runtime = context.getRuntime();
List<Schema.FieldSchema> lfs = internalSchema.getFields();
RubySchema rs = new RubySchema(runtime, runtime.getClass("Schema")).initialize(args);
for (Schema.FieldSchema fs : rs.getInternalSchema().getFields())
lfs.add(fs);
RubySchema.fixSchemaNames(internalSchema);
}
/**
* @param context the context the method is being executed in
* @return a RubySchema copy of the Schema
*/
@JRubyMethod
public RubySchema clone(ThreadContext context) {
Ruby runtime = context.getRuntime();
return new RubySchema(runtime, runtime.getClass("Schema"), internalSchema);
}
/**
* Given a field name this string will search the RubySchema for a FieldSchema
* with that name and return it encapsulated in a Schema.
*
* @param context the context the method is being executed in
* @param arg a RubyString serving as an alias to look
* for in the Schema
* @return the found RubySchema
*/
@JRubyMethod
public RubySchema find(ThreadContext context, IRubyObject arg) {
if (arg instanceof RubyString) {
Ruby runtime = context.getRuntime();
return new RubySchema(runtime, runtime.getClass("Schema"), RubySchema.find(internalSchema, arg.toString()), false);
} else {
throw new RuntimeException("Invalid arguement passed to find: " + arg);
}
}
/**
* This is a helper method which recursively searches for an alias in the Schema
* encapsulated by RubySchema. This is necessary because findFieldSchema uses
* canonicalName, not name.
*
* @param s the Schema to search through
* @param alias
* @return the found RubySchema
*/
private static Schema find(Schema s, String alias) {
for (Schema.FieldSchema fs : s.getFields())
if (alias.equals(fs.alias))
return new Schema(fs);
for (Schema.FieldSchema fs : s.getFields())
if (fs.schema != null) {
Schema r = RubySchema.find(fs.schema, alias);
if (r != null)
return r;
}
return new Schema();
}
/**
* Given a field name, this will return the index of it in the schema.
*
* @param context the context the method is being executed in
* @param arg a field name to look for
* @return the index for that field name
*/
@JRubyMethod
public RubyFixnum index(ThreadContext context, IRubyObject arg) {
if (arg instanceof RubyString) {
try {
return new RubyFixnum(context.getRuntime(), internalSchema.getPosition(arg.toString()));
} catch (FrontendException e) {
throw new RuntimeException("Unable to find position for argument: " + arg);
}
} else {
throw new RuntimeException("Invalid arguement passed to index: " + arg);
}
}
/**
* @param context the context the method is being executed in
* @return the size of the encapsulated Schema
*/
@JRubyMethod(name = {"size", "length"})
public RubyFixnum size(ThreadContext context) {
return new RubyFixnum(context.getRuntime(), internalSchema.size());
}
/**
* This is a helper method to pull out the native Java type from the ruby object.
*
* @return the encapsulated Schema
*/
public Schema getInternalSchema() {
return internalSchema;
}
/**
* This method allows access into the Schema nested in the encapsulated Schema. For example,
* if the encapsulated Schema is a bag Schema, this allows the user to access the schema of
* the interior Tuple.
*
* @param context the context the method is being executed in
* @return a RubySchema encapsulating the nested Schema
*/
@JRubyMethod(name = {"get", "inner", "in"})
public RubySchema get(ThreadContext context) {
if (internalSchema.size() != 1)
throw new RuntimeException("Can only return nested schema if there is one schema to get");
Ruby runtime = context.getRuntime();
try {
return new RubySchema(runtime, runtime.getClass("Schema"), internalSchema.getField(0).schema, false);
} catch (FrontendException e) {
throw new RuntimeException("Schema does not have a nested FieldScema", e);
}
}
/**
* This method allows the user to see the name of the alias of the FieldSchema of the encapsulated
* Schema. This method only works if the Schema has one FieldSchema.
*
* @param context the context the method is being executed in
* @return the name of the Schema
*/
@JRubyMethod(name = "name")
public RubyString getName(ThreadContext context) {
try {
if (internalSchema.size() != 1)
throw new RuntimeException("Can only get name if there is one schema present");
return RubyString.newString(context.getRuntime(), internalSchema.getField(0).alias);
} catch (FrontendException e) {
throw new RuntimeException("Unable to get field from Schema", e);
}
}
/**
* This method allows the user to set the name of the alias of the FieldSchema of the encapsulated
* Schema. This method only works if the Schema has one FieldSchema.
*
* @param arg a RubyString to set the name to
* @return the new name
*/
@JRubyMethod(name = "name=")
public RubyString setName(IRubyObject arg) {
if (arg instanceof RubyString) {
if (internalSchema.size() != 1)
throw new RuntimeException("Can only set name if there is one schema present");
try {
internalSchema.getField(0).alias = arg.toString();
return (RubyString)arg;
} catch (FrontendException e) {
throw new RuntimeException("Unable to get field from Schema", e);
}
} else {
throw new RuntimeException("Improper argument passed to 'name=':" + arg);
}
}
}