perl/lib/Lucy/Analysis/RegexTokenizer.pm - lucy - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 package Lucy::Analysis::RegexTokenizer;
 use Lucy;

 1;

 __END__

 __BINDING__

 my $synopsis = <<'END_SYNOPSIS';
     my $whitespace_tokenizer
         = Lucy::Analysis::RegexTokenizer->new( pattern => '\S+' );

     # or...
     my $word_char_tokenizer
         = Lucy::Analysis::RegexTokenizer->new( pattern => '\w+' );

     # or...
     my $apostrophising_tokenizer = Lucy::Analysis::RegexTokenizer->new;

     # Then... once you have a tokenizer, put it into a PolyAnalyzer:
     my $polyanalyzer = Lucy::Analysis::PolyAnalyzer->new(
         analyzers => [ $case_folder, $word_char_tokenizer, $stemmer ], );
 END_SYNOPSIS

 my $constructor = <<'END_CONSTRUCTOR';
     my $word_char_tokenizer = Lucy::Analysis::RegexTokenizer->new(
         pattern => '\w+',    # required
     );
 END_CONSTRUCTOR

 Clownfish::Binding::Perl::Class->register(
     parcel            => "Lucy",
     class_name        => "Lucy::Analysis::RegexTokenizer",
     bind_constructors => ["_new"],
     make_pod          => {
         constructor => { sample => $constructor },
         synopsis    => $synopsis,
     },
 );
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	package Lucy::Analysis::RegexTokenizer;
	use Lucy;

	1;

	__END__

	__BINDING__

	my $synopsis = <<'END_SYNOPSIS';
	my $whitespace_tokenizer
	= Lucy::Analysis::RegexTokenizer->new( pattern => '\S+' );

	# or...
	my $word_char_tokenizer
	= Lucy::Analysis::RegexTokenizer->new( pattern => '\w+' );

	# or...
	my $apostrophising_tokenizer = Lucy::Analysis::RegexTokenizer->new;

	# Then... once you have a tokenizer, put it into a PolyAnalyzer:
	my $polyanalyzer = Lucy::Analysis::PolyAnalyzer->new(
	analyzers => [ $case_folder, $word_char_tokenizer, $stemmer ], );
	END_SYNOPSIS

	my $constructor = <<'END_CONSTRUCTOR';
	my $word_char_tokenizer = Lucy::Analysis::RegexTokenizer->new(
	pattern => '\w+', # required
	);
	END_CONSTRUCTOR

	Clownfish::Binding::Perl::Class->register(
	parcel => "Lucy",
	class_name => "Lucy::Analysis::RegexTokenizer",
	bind_constructors => ["_new"],
	make_pod => {
	constructor => { sample => $constructor },
	synopsis => $synopsis,
	},
	);