src/main/jruby/pigudf.rb - pig - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #  http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 throw "pigudf.rb only works under JRuby!" unless RUBY_PLATFORM=="java"
 require 'jruby'
 org.apache.pig.scripting.jruby.PigJrubyLibrary.new.load(JRuby.runtime, false)

 #TODO output_schema should accept a Schema object as well, and use Schema objects
 #TODO AccumulatorEvalFunc output_schema should not allow you to give a block and defined it

 # This is the base class for runy of the mill EvalFuncs. A class just serves to
 # contain similar jobs, as well as allow for method reuse. In the case of simple
 # EvalFuncs, each method will be turned into a UDF (though they do not have to be called).
 #
 # TODO: EXPLAIN SYNTAX

 class PigUdf

   # Here we initialize the variables we'll be using at the class level (generally
   # analogous to static in Java). The nice thing about this method is that these
   # values are all set in the same way, even from children. Thus, all of the children
   # will update PigUdf.@@functions_to_register, and on the Java side when we want to
   # access this, we can return that Map. This means it is no longer necessary to keep
   # track of descendent children, etc, since all that matters are the methods that
   # are registered with subclasses.
   #
   # The @@class_object_to_name_and_add variable is used by self.evalfunc and self.filterfunc.
   # See the documentation for the former to understand why it is necessary. @@schema holds
   # the last schema given by output_schema or output_schema_function, see the documentation
   # on output_schema for more.

   @@functions_to_register = {}
   @@class_object_to_name_and_add = nil
   @@schema = nil

   # See the documentation on self.evalfunc for why this is necessary. This takes the current class
   # object and registers it. This is necessary because self.evalfunc has to return before .to_s
   # will return something meaningful and not gibberish.

   def self.name_and_add_class_object
     if @@class_object_to_name_and_add
       name = @@class_object_to_name_and_add.class_object.to_s
       @@class_object_to_name_and_add.method_name = "eval"
       @@functions_to_register[name] = @@class_object_to_name_and_add
     end
     @@class_object_to_name_and_add = nil
   end

   # This is the core function that registers a method as a UDF. The pig_func_name
   # identifies it, and in most cases, is the method name (the exception begin
   # UDFs created using self.evalfunc). The class_object is the class against an instance
   # of which the method will be called. The arity is so Pig knows how many arguments
   # to pass to the UDF, and the output_schema defines the Schema of the output, either
   # as a string, or as a function.

   def self.register_function pig_func_name, class_object, arity, output_schema
     self.name_and_add_class_object

     pig_func_name = pig_func_name.to_s

     reg = EvalFunc.new class_object, pig_func_name, arity, output_schema

     @@functions_to_register[pig_func_name] = reg
   end

   def self.set_class_object_to_name_and_add func
     self.name_and_add_class_object
     @@class_object_to_name_and_add = func
   end

   # This method provides the most succinct way to define a UDF. The syntax is as follows:
   #
   # UdfName = PigUdf.evalfunc('int') do |arg1|
   #   return arg.length
   # end
   #
   # EvalFunc takes one parameter, the schema to be returned, and a block which will represent
   # the method call.
   #
   # In the case that this will be used, then it will be one class with one function,
   # and the function name will be UdfName. It is essential that UdfName begin with
   # a capital letter, as this method uses a hook given to ruby where Name = Class.new
   # will generate a class of name Name, but only if Name begins with a capital letter.
   #
   # The reason for naming the function "GETCLASSFROMOBJECT" is that the class object must first
   # be returned for its name to be available. Asking it for its name before allowing "evalfunc"
   # to return will not yield the name it is given. Thus, we plant "GETCLASSFROMOBJECT" so the next
   # time we access @functions_to_register, we know to check.

   def self.evalfunc output_schema, &blk
     c=Class.new do
       define_method :eval do |*args|
         blk.call(*args)
       end
     end
     self.set_class_object_to_name_and_add EvalFunc.new c, "GETFROMCLASSOBJECT", blk.arity, output_schema
     c
   end

   # This method functions identically to evalfunc above, the only difference being that no schema
   # needs to be given.

   def self.filterfunc &blk
     c=Class.new do
       define_method :eval do |*args|
         blk.call(*args)
       end
     end
     self.set_class_object_to_name_and_add EvalFunc.new c, "GETFROMCLASSOBJECT", blk.arity, Schema.boolean
     c
   end

   # This is the function which register the schema associated with a given function. There are
   # two ways that it can be invoked, with one argument or two (thus the vague argument names).
   #
   # case 1: one argument
   # In this case, output_schema's argument is the schema to be set for the next method declaration.
   # For example:
   # output_schema "long"
   #
   # The above would mean that the schema for the function following it would be set to long. The mechanism
   # by which this is achieved is by setting a class schema variable to the schema, and the next time
   # a method is declared in the class, the class uses the schema that was set to register the function being
   # declared. For more information on that, see self.method_added, as this is the Ruby provided hook
   # that is used to allow this disconnect between declaring a schema and the method declaration that follows.
   #
   # case 2: two arguments
   # In this case, arg1 is the name of the function whose schema we want to set, and arg2 is
   # the schema, ie
   #
   # output_schema :sum, "long"
   #
   # You can only use this after the function is declared, otherwise there will be an error.
   # In this case, the information passed to the registration function is the function name,
   # an instance of the class (so that on the Java side we can instantiate a version), the arity,
   # and the schema. For more information on how that information is used, see self.register_function.
   #
   # The following two uses are identical:
   #
   # use 1:
   # output_schema "long"
   # def sum x, y
   #   return x + y
   # end
   #
   # use 2:
   # def sum x,y
   #   return x + y
   # end
   # output_schema :sum, "long"

   def self.output_schema arg1, arg2=nil
     if arg2
       function_name = arg1.to_s
       schema = arg2.to_s
       self.register_function function_name, self, function_name, schema
     else
       @@schema = arg1
     end
   end

   # This function acts identically to output_schema, except that it is not necessary to provide a schema string
   # because a filter func will always have a set schema (it will return boolean).

   def self.filter_func arg1=nil
     schema = "FILTERFUNC"
     if arg1
       function_name = arg1.to_s
       self.output_schema function_name, schema
     else
       self.output_schema schema
     end
   end

   # output_schema is only useful when the function at hand has a deterministic schema. In the case that the schema
   # needs to be dynamic, it is useful to be able to process the input schema with a function and return the appropriate
   # output schema. An example of this might be a concat function, which takes two values and concatenates them together.
   # This function could work for chararrays, but also for bytearrays. In that case, the output schema depends on the input schema.
   #
   # As with output_schema, there are two cases, and they are identical (see output_schema for a more detailed explanation).
   # The difference, however, is that instead of passing a string ie "long", the user gives a function name. Note: the schema
   # function does not yet have to be defined. In the case of two arguments, the same information is passed to register_function
   # as in the case of output_schema, the difference being that while the schema is passed as a string, it has an identifier
   # appended to it so that when this function is running in Java, we'll know that we should be using a function.

   def self.output_schema_function arg1, arg2=nil #TODO allow it to also accept a block, as in ComplexPigUdf
     schema_func = (arg2||arg1).to_sym
     if arg2
       function_name = arg1.to_s
       self.register_function function_name, self, function_name, schema_func.to_sym
     else
       @@schema = arg1.to_sym
     end
   end

   # Javaists love their camelCase
   class << self
     alias :outputSchema :output_schema
     alias :filterFunc :filter_func
     alias :outputSchemaFunction :output_schema_function
   end

   # This is a hook that Ruby provides that is called whenever a method is declared on the subclass.
   # This is used so that we have visibility on the methods as they are declared, which is useful because
   # every declared method will be registered as a UDF for use in Pig. In the case of a method that doesn't
   # yet have a schema declared, it's return type will just be a bytearray, as in Pig.

   def self.method_added function_name
     if @@schema
       self.register_function function_name, self, function_name, @@schema
     elsif !@@functions_to_register[function_name]
       self.register_function function_name, self, function_name, nil
     end
     @@schema = nil
   end

   # This returns the map that maintains the Function classes that have information on declared methods.

   def self.get_functions_to_register
     self.name_and_add_class_object

     @@functions_to_register
   end

   # The Function class privates a convenient wrapper to store information about EvalFuncs, separating
   # out the methods that will be used on the frontend to get information on the method registered.

   class Function
     attr_accessor :method_name
     attr_reader :arity, :class_object

     def initialize class_object, method_name, arity
       @class_object = class_object
       @method_name = method_name
       @arity = arity
     end

     def required_args
       if @arity.is_a? Numeric
         @arity
       else
         @class_object.instance_method(@arity.to_sym).parameters.count {|x,y| x==:req}
       end
     end

     def optional_args
       if @arity.is_a? Numeric
         0
       else
         params = @class_object.instance_method(@arity.to_sym).parameters
         return -1 if params.any? {|x,y| x==:rest}
         params.count {|x,y| x==:opt}
       end
     end

     # This conveniently gives an instance of the class this Function wraps, so that on the Java end
     # it is trivial to get the object against which method calls can be made.

     def get_receiver
       @class_object.new
     end

     # This is useful for identifying the subclass Java is dealing with (EvalFunc, FilterFunc, etc)

     def name
       return self.class.to_s
     end
   end

   class EvalFunc < Function
     def initialize class_object, method_name, arity, schema_or_func
       super class_object, method_name, arity
       @schema_or_func = schema_or_func
     end

     # This is the function that will be used from Java to get the proper schema of the output.
     # Given that users have two options, output_schema or output_schema_function, this method
     # detects which and acts appropriately. It must be given an instance of the EvalFunc (generally
     # the result of "get_receiver") in the case of an output_schema_function so that it can evaluate
     # the output Schema based on the input Schema.

     def schema input_schema, class_instance
       if !@schema_or_func
          return Schema.bytearray
       elsif @schema_or_func.is_a? String
          return Schema.new @schema_or_func
       elsif @schema_or_func.is_a? Schema
          return @schema_or_func
       else
          func = @schema_or_func
          func = @class_object.instance_method(func) if func.is_a? Symbol
          return func.bind(class_instance).call input_schema
       end
     end
   end
 end

 # This is the base class used for Algebraic and Accumulator functions. The reason for the different
 # implementation is because there is more structure in these cases. In the case of general EvalFuncs,
 # a method is equivalent to a UDF. In the case of Algebraic and Accumulator UDFs, however, a class is
 # equivalent to a UDF. Thus, instead of keeping track of methods added, we keep track of classes
 # that extend our Algebraic and Accumulator UDF base classes.

 class ComplexUdfBase
   # As with the basic PigUdf, there is a class method "output_schema" which defines the schema for the class.
   # This method can be called anywhere (as there is not the issue of multiple UDFs to worry about). If it is not
   # called, it will have return type bytearray.

   def self.output_schema schema
     @schema = schema
   end

   class << self
     alias :outputSchema :output_schema
   end

   # This returns the schema, or in the case that one was not supplied, a Schema of bytearray.

   def self.get_output_schema
     Schema.new(@schema||Schema.bytearray)
   end

   # Since a class = a UDF, in this case it makes sense to traverse the tree of decendant classes
   # in order to pull all of the registered classes. It's important to note

   def self.classes_to_register
     classes = {}
     ObjectSpace.each_object(Class) do |c|
       classes[c.to_s] = c if c.ancestors.include?(self) and (c != self)
     end
     classes
   end

   # This is a method that can be used by Pig to ensure that all of the necessary methods are present, so that
   # the function will throw an error on parsing instead of on execution. This is a shell implementation
   # to ensure that necessary_methods is called by a subclass, which will then generate the proper implementation.

   def self.check_if_necessary_methods_present
     throw "Need to declare the methods that should be present"
   end

   # This is a method that, if called at the class level, defines a set of methods that must be called
   # by any child classes (ie UDFs).

   def self.necessary_methods *m
     self.instance_eval "def self.check_if_necessary_methods_present; #{Array(m).inspect}.all? { |m| self.method_defined? m }; end"
   end
 end

 # This is the class that any Accumulator UDF must extend. The necessary_methods call ensures that all
 # child classes have the necessary methods implemented. AccumulatorPigUdfs support dynamic output_schema.
 # To do so, register a block with the schema function, as so:
 # output_schema do |input|
 #  return input
 # end
 #
 # In the case of a non-dyanamic output schema, it's possible to stil just set output_schema "long".
 #
 # an example of an accumulator UDF is:
 #
 # class SUM < AccumulatorPigUdf
 #   output_schema "long"
 #
 #   def exec input
 #     @res ||= 0
 #     input.flatten.inject(:+)
 #   end
 #   def get
 #     @res
 #   end
 # end

 class AccumulatorPigUdf < ComplexUdfBase
   def self.output_schema schema=nil, &blk
     if block_given?
       throw "Can specify block or schema but not both!" if schema
       throw "Block must accept one argument!" if blk.arity != 1
       @schema = blk
     else
       @schema = schema
     end
   end

   class << self
     alias :outputSchema :output_schema
   end

   def self.get_output_schema input_schema=nil
     if input_schema && @schema.class == Proc
       @schema.call input_schema
     else
       Schema.new(@schema||Schema.bytearray)
     end
   end

   necessary_methods :exec, :get
 end

 # This is the class that any Accumulator UDF must extend. The necessary_methods call ensures that all
 # child classes have the necessary methods implemented.
 #
 # an example of an Algebraic UDF is:
 #
 # class Count < AlgebraicPigUdf
 #   output_schema "long"
 #
 #   def initial t
 #     t.nil? ? 0 : 1
 #   end
 #
 #   def intermed t
 #     return 0 if t.nil?
 #     return t.flatten.inject(:+)
 #   end
 #
 #   def final t
 #     return intermed(t)
 #   end
 # end


 class AlgebraicPigUdf < ComplexUdfBase
   necessary_methods :initial, :intermed, :final
 end
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	throw "pigudf.rb only works under JRuby!" unless RUBY_PLATFORM=="java"
	require 'jruby'
	org.apache.pig.scripting.jruby.PigJrubyLibrary.new.load(JRuby.runtime, false)

	#TODO output_schema should accept a Schema object as well, and use Schema objects
	#TODO AccumulatorEvalFunc output_schema should not allow you to give a block and defined it

	# This is the base class for runy of the mill EvalFuncs. A class just serves to
	# contain similar jobs, as well as allow for method reuse. In the case of simple
	# EvalFuncs, each method will be turned into a UDF (though they do not have to be called).
	#
	# TODO: EXPLAIN SYNTAX

	class PigUdf

	# Here we initialize the variables we'll be using at the class level (generally
	# analogous to static in Java). The nice thing about this method is that these
	# values are all set in the same way, even from children. Thus, all of the children
	# will update PigUdf.@@functions_to_register, and on the Java side when we want to
	# access this, we can return that Map. This means it is no longer necessary to keep
	# track of descendent children, etc, since all that matters are the methods that
	# are registered with subclasses.
	#
	# The @@class_object_to_name_and_add variable is used by self.evalfunc and self.filterfunc.
	# See the documentation for the former to understand why it is necessary. @@schema holds
	# the last schema given by output_schema or output_schema_function, see the documentation
	# on output_schema for more.

	@@functions_to_register = {}
	@@class_object_to_name_and_add = nil
	@@schema = nil

	# See the documentation on self.evalfunc for why this is necessary. This takes the current class
	# object and registers it. This is necessary because self.evalfunc has to return before .to_s
	# will return something meaningful and not gibberish.

	def self.name_and_add_class_object
	if @@class_object_to_name_and_add
	name = @@class_object_to_name_and_add.class_object.to_s
	@@class_object_to_name_and_add.method_name = "eval"
	@@functions_to_register[name] = @@class_object_to_name_and_add
	end
	@@class_object_to_name_and_add = nil
	end

	# This is the core function that registers a method as a UDF. The pig_func_name
	# identifies it, and in most cases, is the method name (the exception begin
	# UDFs created using self.evalfunc). The class_object is the class against an instance
	# of which the method will be called. The arity is so Pig knows how many arguments
	# to pass to the UDF, and the output_schema defines the Schema of the output, either
	# as a string, or as a function.

	def self.register_function pig_func_name, class_object, arity, output_schema
	self.name_and_add_class_object

	pig_func_name = pig_func_name.to_s

	reg = EvalFunc.new class_object, pig_func_name, arity, output_schema

	@@functions_to_register[pig_func_name] = reg
	end

	def self.set_class_object_to_name_and_add func
	self.name_and_add_class_object
	@@class_object_to_name_and_add = func
	end

	# This method provides the most succinct way to define a UDF. The syntax is as follows:
	#
	# UdfName = PigUdf.evalfunc('int') do \|arg1\|
	# return arg.length
	# end
	#
	# EvalFunc takes one parameter, the schema to be returned, and a block which will represent
	# the method call.
	#
	# In the case that this will be used, then it will be one class with one function,
	# and the function name will be UdfName. It is essential that UdfName begin with
	# a capital letter, as this method uses a hook given to ruby where Name = Class.new
	# will generate a class of name Name, but only if Name begins with a capital letter.
	#
	# The reason for naming the function "GETCLASSFROMOBJECT" is that the class object must first
	# be returned for its name to be available. Asking it for its name before allowing "evalfunc"
	# to return will not yield the name it is given. Thus, we plant "GETCLASSFROMOBJECT" so the next
	# time we access @functions_to_register, we know to check.

	def self.evalfunc output_schema, &blk
	c=Class.new do
	define_method :eval do \|*args\|
	blk.call(*args)
	end
	end
	self.set_class_object_to_name_and_add EvalFunc.new c, "GETFROMCLASSOBJECT", blk.arity, output_schema
	c
	end

	# This method functions identically to evalfunc above, the only difference being that no schema
	# needs to be given.

	def self.filterfunc &blk
	c=Class.new do
	define_method :eval do \|*args\|
	blk.call(*args)
	end
	end
	self.set_class_object_to_name_and_add EvalFunc.new c, "GETFROMCLASSOBJECT", blk.arity, Schema.boolean
	c
	end

	# This is the function which register the schema associated with a given function. There are
	# two ways that it can be invoked, with one argument or two (thus the vague argument names).
	#
	# case 1: one argument
	# In this case, output_schema's argument is the schema to be set for the next method declaration.
	# For example:
	# output_schema "long"
	#
	# The above would mean that the schema for the function following it would be set to long. The mechanism
	# by which this is achieved is by setting a class schema variable to the schema, and the next time
	# a method is declared in the class, the class uses the schema that was set to register the function being
	# declared. For more information on that, see self.method_added, as this is the Ruby provided hook
	# that is used to allow this disconnect between declaring a schema and the method declaration that follows.
	#
	# case 2: two arguments
	# In this case, arg1 is the name of the function whose schema we want to set, and arg2 is
	# the schema, ie
	#
	# output_schema :sum, "long"
	#
	# You can only use this after the function is declared, otherwise there will be an error.
	# In this case, the information passed to the registration function is the function name,
	# an instance of the class (so that on the Java side we can instantiate a version), the arity,
	# and the schema. For more information on how that information is used, see self.register_function.
	#
	# The following two uses are identical:
	#
	# use 1:
	# output_schema "long"
	# def sum x, y
	# return x + y
	# end
	#
	# use 2:
	# def sum x,y
	# return x + y
	# end
	# output_schema :sum, "long"

	def self.output_schema arg1, arg2=nil
	if arg2
	function_name = arg1.to_s
	schema = arg2.to_s
	self.register_function function_name, self, function_name, schema
	else
	@@schema = arg1
	end
	end

	# This function acts identically to output_schema, except that it is not necessary to provide a schema string
	# because a filter func will always have a set schema (it will return boolean).

	def self.filter_func arg1=nil
	schema = "FILTERFUNC"
	if arg1
	function_name = arg1.to_s
	self.output_schema function_name, schema
	else
	self.output_schema schema
	end
	end

	# output_schema is only useful when the function at hand has a deterministic schema. In the case that the schema
	# needs to be dynamic, it is useful to be able to process the input schema with a function and return the appropriate
	# output schema. An example of this might be a concat function, which takes two values and concatenates them together.
	# This function could work for chararrays, but also for bytearrays. In that case, the output schema depends on the input schema.
	#
	# As with output_schema, there are two cases, and they are identical (see output_schema for a more detailed explanation).
	# The difference, however, is that instead of passing a string ie "long", the user gives a function name. Note: the schema
	# function does not yet have to be defined. In the case of two arguments, the same information is passed to register_function
	# as in the case of output_schema, the difference being that while the schema is passed as a string, it has an identifier
	# appended to it so that when this function is running in Java, we'll know that we should be using a function.

	def self.output_schema_function arg1, arg2=nil #TODO allow it to also accept a block, as in ComplexPigUdf
	schema_func = (arg2\|\|arg1).to_sym
	if arg2
	function_name = arg1.to_s
	self.register_function function_name, self, function_name, schema_func.to_sym
	else
	@@schema = arg1.to_sym
	end
	end

	# Javaists love their camelCase
	class << self
	alias :outputSchema :output_schema
	alias :filterFunc :filter_func
	alias :outputSchemaFunction :output_schema_function
	end

	# This is a hook that Ruby provides that is called whenever a method is declared on the subclass.
	# This is used so that we have visibility on the methods as they are declared, which is useful because
	# every declared method will be registered as a UDF for use in Pig. In the case of a method that doesn't
	# yet have a schema declared, it's return type will just be a bytearray, as in Pig.

	def self.method_added function_name
	if @@schema
	self.register_function function_name, self, function_name, @@schema
	elsif !@@functions_to_register[function_name]
	self.register_function function_name, self, function_name, nil
	end
	@@schema = nil
	end

	# This returns the map that maintains the Function classes that have information on declared methods.

	def self.get_functions_to_register
	self.name_and_add_class_object

	@@functions_to_register
	end

	# The Function class privates a convenient wrapper to store information about EvalFuncs, separating
	# out the methods that will be used on the frontend to get information on the method registered.

	class Function
	attr_accessor :method_name
	attr_reader :arity, :class_object

	def initialize class_object, method_name, arity
	@class_object = class_object
	@method_name = method_name
	@arity = arity
	end

	def required_args
	if @arity.is_a? Numeric
	@arity
	else
	@class_object.instance_method(@arity.to_sym).parameters.count {\|x,y\| x==:req}
	end
	end

	def optional_args
	if @arity.is_a? Numeric
	0
	else
	params = @class_object.instance_method(@arity.to_sym).parameters
	return -1 if params.any? {\|x,y\| x==:rest}
	params.count {\|x,y\| x==:opt}
	end
	end

	# This conveniently gives an instance of the class this Function wraps, so that on the Java end
	# it is trivial to get the object against which method calls can be made.

	def get_receiver
	@class_object.new
	end

	# This is useful for identifying the subclass Java is dealing with (EvalFunc, FilterFunc, etc)

	def name
	return self.class.to_s
	end
	end

	class EvalFunc < Function
	def initialize class_object, method_name, arity, schema_or_func
	super class_object, method_name, arity
	@schema_or_func = schema_or_func
	end

	# This is the function that will be used from Java to get the proper schema of the output.
	# Given that users have two options, output_schema or output_schema_function, this method
	# detects which and acts appropriately. It must be given an instance of the EvalFunc (generally
	# the result of "get_receiver") in the case of an output_schema_function so that it can evaluate
	# the output Schema based on the input Schema.

	def schema input_schema, class_instance
	if !@schema_or_func
	return Schema.bytearray
	elsif @schema_or_func.is_a? String
	return Schema.new @schema_or_func
	elsif @schema_or_func.is_a? Schema
	return @schema_or_func
	else
	func = @schema_or_func
	func = @class_object.instance_method(func) if func.is_a? Symbol
	return func.bind(class_instance).call input_schema
	end
	end
	end
	end

	# This is the base class used for Algebraic and Accumulator functions. The reason for the different
	# implementation is because there is more structure in these cases. In the case of general EvalFuncs,
	# a method is equivalent to a UDF. In the case of Algebraic and Accumulator UDFs, however, a class is
	# equivalent to a UDF. Thus, instead of keeping track of methods added, we keep track of classes
	# that extend our Algebraic and Accumulator UDF base classes.

	class ComplexUdfBase
	# As with the basic PigUdf, there is a class method "output_schema" which defines the schema for the class.
	# This method can be called anywhere (as there is not the issue of multiple UDFs to worry about). If it is not
	# called, it will have return type bytearray.

	def self.output_schema schema
	@schema = schema
	end

	class << self
	alias :outputSchema :output_schema
	end

	# This returns the schema, or in the case that one was not supplied, a Schema of bytearray.

	def self.get_output_schema
	Schema.new(@schema\|\|Schema.bytearray)
	end

	# Since a class = a UDF, in this case it makes sense to traverse the tree of decendant classes
	# in order to pull all of the registered classes. It's important to note

	def self.classes_to_register
	classes = {}
	ObjectSpace.each_object(Class) do \|c\|
	classes[c.to_s] = c if c.ancestors.include?(self) and (c != self)
	end
	classes
	end

	# This is a method that can be used by Pig to ensure that all of the necessary methods are present, so that
	# the function will throw an error on parsing instead of on execution. This is a shell implementation
	# to ensure that necessary_methods is called by a subclass, which will then generate the proper implementation.

	def self.check_if_necessary_methods_present
	throw "Need to declare the methods that should be present"
	end

	# This is a method that, if called at the class level, defines a set of methods that must be called
	# by any child classes (ie UDFs).

	def self.necessary_methods *m
	self.instance_eval "def self.check_if_necessary_methods_present; #{Array(m).inspect}.all? { \|m\| self.method_defined? m }; end"
	end
	end

	# This is the class that any Accumulator UDF must extend. The necessary_methods call ensures that all
	# child classes have the necessary methods implemented. AccumulatorPigUdfs support dynamic output_schema.
	# To do so, register a block with the schema function, as so:
	# output_schema do \|input\|
	# return input
	# end
	#
	# In the case of a non-dyanamic output schema, it's possible to stil just set output_schema "long".
	#
	# an example of an accumulator UDF is:
	#
	# class SUM < AccumulatorPigUdf
	# output_schema "long"
	#
	# def exec input
	# @res \|\|= 0
	# input.flatten.inject(:+)
	# end
	# def get
	# @res
	# end
	# end

	class AccumulatorPigUdf < ComplexUdfBase
	def self.output_schema schema=nil, &blk
	if block_given?
	throw "Can specify block or schema but not both!" if schema
	throw "Block must accept one argument!" if blk.arity != 1
	@schema = blk
	else
	@schema = schema
	end
	end

	class << self
	alias :outputSchema :output_schema
	end

	def self.get_output_schema input_schema=nil
	if input_schema && @schema.class == Proc
	@schema.call input_schema
	else
	Schema.new(@schema\|\|Schema.bytearray)
	end
	end

	necessary_methods :exec, :get
	end

	# This is the class that any Accumulator UDF must extend. The necessary_methods call ensures that all
	# child classes have the necessary methods implemented.
	#
	# an example of an Algebraic UDF is:
	#
	# class Count < AlgebraicPigUdf
	# output_schema "long"
	#
	# def initial t
	# t.nil? ? 0 : 1
	# end
	#
	# def intermed t
	# return 0 if t.nil?
	# return t.flatten.inject(:+)
	# end
	#
	# def final t
	# return intermed(t)
	# end
	# end


	class AlgebraicPigUdf < ComplexUdfBase
	necessary_methods :initial, :intermed, :final
	end