Merge pull request #56 from apache/LicensePackageSwap
License package swap
diff --git a/.travis.yml b/.travis.yml
index 30939c9..e5c7d8a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,15 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
# http://docs.travis-ci.com/user/customizing-the-build/
language: java
+sudo: false # faster builds
+
+dist: trusty
+
jdk:
- oraclejdk8
install:
- - mvn install -Dmaven.javadoc.skip=true -Dsource.skip=true -DskipTests=true -Dgpg.skip=true
+ - mvn clean install -q -Dmaven.javadoc.skip=true -Dsource.skip=true -DskipTests=true -Dgpg.skip=true
+
+before_script:
+ - _JAVA_OPTIONS="-Xmx4g -Xms1g"
script:
- - mvn clean compile test -Dgpg.skip=true
+ - mvn clean compile test -q -Dgpg.skip=true
after_success:
- mvn clean test jacoco:report coveralls:report -DrepoToken=$coveralls_token
diff --git a/DISCLAIMER-WIP b/DISCLAIMER-WIP
new file mode 100644
index 0000000..c90f3f7
--- /dev/null
+++ b/DISCLAIMER-WIP
@@ -0,0 +1,26 @@
+Apache DataSketches (incubating) is an effort undergoing incubation
+at The Apache Software Foundation (ASF), sponsored by the Apache Incubator.
+
+Incubation is required of all newly accepted projects until a further review
+indicates that the infrastructure, communications, and decision making process
+have stabilized in a manner consistent with other successful ASF projects.
+
+While incubation status is not necessarily a reflection of the
+completeness or stability of the code, it does indicate that the
+project has yet to be fully endorsed by the ASF.
+
+Some of the incubating project's releases may not be fully compliant
+with ASF policy. For example, releases may have incomplete or
+un-reviewed licensing conditions. What follows is a list of known
+issues the project is currently aware of (note that this list, by
+definition, is likely to be incomplete):
+
+ * The LICENSE and NOTICE files may not be complete and will be fixed with the next release.
+
+If you are planning to incorporate this work into your
+product or project, please be aware that you will need to conduct a
+thorough licensing review to determine the overall implications of
+including this work. For the current status of this project through the Apache
+Incubator visit:
+
+http://incubator.apache.org/projects/datasketches.html
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..70ed0d3
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,210 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+
+
+APPENDIX A: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+ -------------------------------------------------------------
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -------------------------------------------------------------
+
+
+
+APPENDIX B: Additional licenses relevant to Apache DataSketches-pig (INCUBATING):
+ (none)
diff --git a/LICENSE.md b/LICENSE.md
deleted file mode 100644
index 4d2d52c..0000000
--- a/LICENSE.md
+++ /dev/null
@@ -1,54 +0,0 @@
-Apache License
-
-Version 2.0, January 2004
-
-https://www.apache.org/licenses/
-
-TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-1. Definitions.
-
-"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.
-
-"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.
-
-"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
-
-"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License.
-
-"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.
-
-"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.
-
-"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).
-
-"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.
-
-"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution."
-
-"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work.
-
-2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form.
-
-3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed.
-
-4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions:
-
- You must give any other recipients of the Work or Derivative Works a copy of this License; and
- You must cause any modified files to carry prominent notices stating that You changed the files; and
- You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and
- If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License.
-
- You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License.
-
-5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions.
-
-6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file.
-
-7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License.
-
-8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages.
-
-9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability.
-
-END OF TERMS AND CONDITIONS
\ No newline at end of file
diff --git a/NOTICE b/NOTICE
new file mode 100644
index 0000000..fc14b29
--- /dev/null
+++ b/NOTICE
@@ -0,0 +1,11 @@
+Apache DataSketches Pig
+Copyright 2019 - The Apache Software Foundation
+
+Copyright 2015-2018 Yahoo
+Copyright 2018 - Verizon Media
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+Prior to moving to ASF, the software for this project was developed at
+Yahoo (now Verizon Media) (https://developer.yahoo.com).
diff --git a/README.md b/README.md
index 2944e4c..a760e9a 100644
--- a/README.md
+++ b/README.md
@@ -1,25 +1,107 @@
-[![][travis img]][travis] [![][coveralls img]][coveralls] [![][mavenbadge img]][mavenbadge] [![][versioneye img]][versioneye]
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+[![Build Status](https://travis-ci.org/apache/incubator-datasketches-pig.svg?branch=master)](https://travis-ci.org/apache/incubator-datasketches-pig)
+[![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.datasketches/datasketches-pig/badge.svg)](https://maven-badges.herokuapp.com/maven-central/org.apache.datasketches/datasketches-pig)
+[![Language grade: Java](https://img.shields.io/lgtm/grade/java/g/apache/incubator-datasketches-pig.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/apache/incubator-datasketches-pig/context:java)
+[![Total alerts](https://img.shields.io/lgtm/alerts/g/apache/incubator-datasketches-pig.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/apache/incubator-datasketches-pig/alerts/)
+[![Coverage Status](https://coveralls.io/repos/github/apache/incubator-datasketches-pig/badge.svg?branch=master&service=github)](https://coveralls.io/github/apache/incubator-datasketches-pig?branch=master)
=================
-# Sketch UDFs for Pig
-Depends on sketches-core. The released jars include a jar with just the Pig UDFs and a shaded jar
-that also includes a shaded sketches-core.
+# DataSketches UDF/UDAF Adaptors for Apache Pig
+Please refer to our [website](https://datasketches.github.io) for more comprehensive information about the various sketching algorithms and how to use them.
-## [Documentation](https://datasketches.github.io)
+## Documentation
-## [Latest Release](https://github.com/DataSketches/sketches-pig/releases)
+### [DataSketches Library Website](https://datasketches.github.io/)
-## [Comments](https://groups.google.com/forum/#!forum/sketches-user)
+### [Java Core Overview](https://datasketches.github.io/docs/TheChallenge.html)
-[travis]:https://travis-ci.org//DataSketches/sketches-pig/builds?branch=master
-[travis img]:https://secure.travis-ci.org/DataSketches/sketches-pig.svg?branch=master
+### [Java Core Javadocs](https://datasketches.github.io/api/core/snapshot/apidocs/index.html)
-[coveralls]:https://coveralls.io/github/DataSketches/sketches-pig?branch=master
-[coveralls img]:https://coveralls.io/repos/github/DataSketches/sketches-pig/badge.svg?branch=master
+### Hadoop Pig UDFs
+See relevant sections under the different sketch types in Java Core Overview
-[mavenbadge]:https://search.maven.org/#search|gav|1|g%3A%22com.yahoo.datasketches%22%20AND%20a%3A%22sketches-pig%22
-[mavenbadge img]:https://maven-badges.herokuapp.com/maven-central/com.yahoo.datasketches/sketches-pig/badge.svg
-[versioneye]:https://www.versioneye.com/user/projects/587fd6ebb194d4003d5289ae
-[versioneye img]:https://www.versioneye.com/user/projects/587fd6ebb194d4003d5289ae/badge.svg?style=flat
+## Downloading Latest Release
+__NOTE:__ This component accesses resource files for testing. As a result, the directory elements of the full absolute path of the target installation directory
+ must qualify as Java identifiers. In other words, the directory elements must not have any space characters (or non-Java identifier characters) in any of the path elements.
+
+This is required by the Oracle Java Specification in order to ensure location-independent
+ access to resources: [See Oracle Location-Independent Access to Resources](https://docs.oracle.com/javase/8/docs/technotes/guides/lang/resources.html)
+
+### [Zip File from Apache Archive](http://archive.apache.org/dist/incubator/datasketches/pig/)
+
+### [Jar Files from Maven Central](https://repository.apache.org/content/repositories/releases/org/apache/datasketches/datasketches-pig/)
+
+### [GitHub](https://github.com/apache/incubator-datasketches-pig/releases)
+
+## Build Instructions
+
+### JDK8 is Required Compiler
+This DataSketches component is pure Java and you must compile using JDK 8.
+
+### Recommended Build Tool
+This DataSketches component is structured as a Maven project and Maven is the recommended Build Tool.
+
+There are two types of tests: normal unit tests and tests run by the strict profile.
+
+To run normal unit tests:
+
+ $ mvn clean test
+
+To run the strict profile tests:
+
+ $ mvn clean test -P strict
+
+To install jars built from the downloaded source:
+
+ $ mvn clean install -DskipTests=true
+
+This will create the following jars:
+
+* datasketches-pig-X.Y.Z-incubating.jar The compiled main class files.
+* datasketches-pig-X.Y.Z-incubating-tests.jar The compiled test class files.
+* datasketches-pig-X.Y.Z-incubating-sources.jar The main source files.
+* datasketches-pig-X.Y.Z-incubating-test-sources.jar The test source files
+* datasketches-pig-X.Y.Z-incubating-javadoc.jar The compressed Javadocs.
+
+### Dependencies
+
+#### Run-time
+This has the following top-level dependencies:
+
+* org.apache.datasketches : datasketches-java
+* org.apache.pig : pig
+* org.apache.hadoop : hadoop-common
+* org.apache.hadoop : hadoop-mapreduce-client-core
+* org.apache.hadoop : hadoop-mapreduce-client-common
+* org.apache.commons : commons-math3
+
+#### Testing
+See the pom.xml file for test dependencies.
+
+## Resources
+
+### [Issues for datasketches-pig](https://github.com/apache/incubator-datasketches-pig/issues)
+
+### [Forum](https://groups.google.com/forum/#!forum/sketches-user)
+
+### [Dev mailing list](dev@datasketches.apache.org)
diff --git a/pom.xml b/pom.xml
index 5e36763..f49a68c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,78 +1,76 @@
<?xml version="1.0" encoding="UTF-8"?>
-<!-- Copyright 2015, Yahoo! Inc.
- Licensed under the terms of the Apache License 2.0.
- See LICENSE file at the project root for terms. -->
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
-
+ <parent>
+ <groupId>org.apache</groupId>
+ <artifactId>apache</artifactId>
+ <version>21</version>
+ </parent>
<groupId>org.apache.datasketches</groupId>
<artifactId>datasketches-pig</artifactId>
- <version>0.13.1-SNAPSHOT</version>
-
- <!-- Project Information -->
- <name>${project.groupId}:${project.artifactId}</name>
- <description>Data Sketches Pig</description>
- <url>https://datasketches.github.io/</url>
+ <version>1.0.0-incubating-SNAPSHOT</version>
+ <name>${project.artifactId}</name>
+ <description>Apache Pig adaptors the DataSketches library.</description>
+ <url>https://datasketches.apache.org/</url>
<inceptionYear>2015</inceptionYear>
-
+ <packaging>jar</packaging>
+ <!-- jar is the default -->
+ <!-- With the 3.6.1 version of Maven this causes a warning to obsolete this in
+ favor of using the enforcer plugin (which we do). But removing this
+ causes the command "mvn versions:display-plugin-updates" to issue an error
+ and not work properly. So this will stay for the time being.
+ -->
+ <prerequisites>
+ <maven>3.2.0</maven>
+ </prerequisites>
<licenses>
<license>
<name>Apache License, Version 2.0</name>
<url>https://www.apache.org/licenses/LICENSE-2.0</url>
+ <distribution>repo</distribution>
</license>
</licenses>
-
- <organization>
- <name>Yahoo! Inc.</name>
- <url>https://www.yahoo.com</url>
- </organization>
-
- <developers>
- <developer>
- <name>Lee Rhodes</name>
- <roles>
- <role>founder, project admin, developer</role>
- </roles>
- <url>https://github.com/leerho</url>
- </developer>
- <developer>
- <name>Alexander Saydakov</name>
- <roles>
- <role>lead developer</role>
- </roles>
- <url>https://github.com/AlexanderSaydakov</url>
- </developer>
- </developers>
-
- <contributors>
- <contributor>
- <url>https://github.com/DataSketches/sketches-pig/graphs/contributors</url>
- </contributor>
- </contributors>
- <!-- End Project Information -->
-
- <properties>
- <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
- <project.build.resourceEncoding>UTF-8</project.build.resourceEncoding>
- <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
- <maven.compiler.source>1.8</maven.compiler.source>
- <maven.compiler.target>1.8</maven.compiler.target>
- </properties>
-
- <!-- Environment Settings -->
+ <scm>
+ <connection>scm:git:ssh://git@github.com/apache/incubator-${project.artifactId}.git</connection>
+ <developerConnection>scm:git:ssh://git@github.com/apache/incubator-${project.artifactId}.git</developerConnection>
+ <url>https://github.com/apache/incubator-${project.artifactId}</url>
+ <tag>HEAD</tag>
+ </scm>
<issueManagement>
- <system>GitHub Issues</system>
- <url>https://github.com/DataSketches/sketches-pig/issues</url>
+ <!-- <system>jira</system>
+ <url>https://issues.apache.org/jira/browse/DATASKETCHES</url> -->
+ <system>GitHub</system>
+ <url>https://github.com/apache/incubator-${project.artifactId}/issues</url>
</issueManagement>
-
- <ciManagement>
- <system>travis</system>
- <url>https://travis-ci.org/DataSketches/sketches-pig</url>
- </ciManagement>
-
<mailingLists>
<mailingList>
+ <name>DataSketches Developers</name>
+ <subscribe>dev-subscribe@datasketches.apache.org</subscribe>
+ <unsubscribe>dev-unsubscribe@datasketches.apache.org</unsubscribe>
+ <post>dev@datasketches.apache.org</post>
+ <archive>https://mail-archives.apache.org/mod_mbox/datasketches-dev</archive>
+ </mailingList>
+ <mailingList>
<name>sketches-user</name>
<archive>https://groups.google.com/forum/#!forum/sketches-user</archive>
<subscribe>mailto:sketches-user%2Bsubscribe@googlegroups.com</subscribe>
@@ -80,87 +78,101 @@
<post>mailto:sketches-user@googlegroups.com</post>
</mailingList>
</mailingLists>
-
- <scm>
- <connection>scm:git:ssh://git@github.com/DataSketches/sketches-pig.git</connection>
- <developerConnection>scm:git:ssh://git@github.com/DataSketches/sketches-pig.git</developerConnection>
- <url>https://github.com/DataSketches/sketches-pig</url>
- <tag>HEAD</tag>
- </scm>
-
- <prerequisites>
- <maven>3.0.4</maven>
- </prerequisites>
-
+ <developers>
+ <developer>
+ <name>The Apache DataSketches Team</name>
+ <email>dev@datasketches.apache.org</email>
+ <url>https://datasketches.apache.org</url>
+ <organization>Apache Software Foundation</organization>
+ <organizationUrl>http://www.apache.org</organizationUrl>
+ </developer>
+ </developers>
+ <properties>
+ <!-- System-wide properties -->
+ <argLine>-Xmx4g -Duser.language=en -Duser.country=US -Dfile.encoding=UTF-8</argLine>
+ <charset.encoding>UTF-8</charset.encoding>
+ <project.build.sourceEncoding>${charset.encoding}</project.build.sourceEncoding>
+ <project.build.resourceEncoding>${charset.encoding}</project.build.resourceEncoding>
+ <project.reporting.outputEncoding>${charset.encoding}</project.reporting.outputEncoding>
+ <java.version>1.8</java.version>
+ <maven.compiler.source>${java.version}</maven.compiler.source>
+ <maven.compiler.target>${java.version}</maven.compiler.target>
+ <!-- Dependencies -->
+ <datasketches-java.version>1.1.0-incubating</datasketches-java.version>
+ <pig.version>0.17.0</pig.version>
+ <hadoop-common.version>2.8.5</hadoop-common.version>
+ <hadoop-mapreduce-client-core.version>2.8.5</hadoop-mapreduce-client-core.version>
+ <hadoop-mapreduce-client-common.version>2.8.5</hadoop-mapreduce-client-common.version>
+ <commons-math3.version>3.6.1</commons-math3.version>
+ <testng.version>6.14.3</testng.version>
+ <slf4j-simple.version>1.7.27</slf4j-simple.version>
+ <!-- org.codehaus.plexus used for strict profile testing-->
+ <plexus-compiler-javac-errorprone.version>2.8.5</plexus-compiler-javac-errorprone.version>
+ <!-- Maven Plugins -->
+ <maven-assembly-plugin.version>3.1.1</maven-assembly-plugin.version>
+ <maven-compiler-plugin.version>3.8.1</maven-compiler-plugin.version>
+ <maven-deploy-plugin.version>3.0.0-M1</maven-deploy-plugin.version>
+ <maven-enforcer-plugin.version>3.0.0-M2</maven-enforcer-plugin.version>
+ <maven-gpg-plugin.version>1.6</maven-gpg-plugin.version>
+ <maven-jar-plugin.version>3.1.2</maven-jar-plugin.version>
+ <maven-javadoc-plugin.version>3.1.1</maven-javadoc-plugin.version>
+ <maven-release-plugin.version>2.5.3</maven-release-plugin.version>
+ <maven-remote-resources-plugin.version>[1.0,)</maven-remote-resources-plugin.version>
+ <maven-source-plugin.version>3.1.0</maven-source-plugin.version>
+ <maven-surefire-plugin.version>3.0.0-M3</maven-surefire-plugin.version>
+ <!-- Apache Plugins -->
+ <apache-rat-plugin.version>0.13</apache-rat-plugin.version>
+ <!-- org.jacoco Maven Plugins -->
+ <jacoco-maven-plugin.version>0.8.4</jacoco-maven-plugin.version>
+ <!-- org.eluder Maven Plugins -->
+ <coveralls-maven-plugin.version>4.3.0</coveralls-maven-plugin.version>
+ <!-- other -->
+ <lifecycle-mapping.version>1.0.0</lifecycle-mapping.version>
+ <git-commit-id-plugin.version>3.0.0</git-commit-id-plugin.version>
+ </properties>
<repositories>
<repository>
- <id>jcenter</id>
- <name>bintray</name>
- <url>https://jcenter.bintray.com</url>
+ <id>apache.snapshots</id>
+ <name>Apache Snapshot Repository</name>
+ <url>https://repository.apache.org/content/groups/snapshots/org/apache/datasketches/</url>
+ <releases>
+ <enabled>false</enabled>
+ </releases>
+ <snapshots>
+ <enabled>true</enabled>
+ </snapshots>
+ </repository>
+ <repository>
+ <id>apache</id>
+ <name>Apache Releases Repository</name>
+ <url>https://repository.apache.org/content/repositories/releases/org/apache/datasketches/</url>
+ <releases>
+ <enabled>true</enabled>
+ </releases>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
</repository>
</repositories>
-
- <distributionManagement>
- <repository>
- <id>sonatype-nexus-staging</id>
- <url>https://oss.sonatype.org/service/local/staging/deploy/maven2/</url>
- </repository>
- </distributionManagement>
-
- <profiles>
- <profile>
- <id>strict</id>
- <build>
- <pluginManagement>
- <plugins>
-
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-compiler-plugin</artifactId>
- <version>3.7.0</version>
- <configuration>
- <source>${maven.compiler.source}</source>
- <target>${maven.compiler.target}</target>
- <compilerId>javac-with-errorprone</compilerId>
- <forceJavacCompilerUse>true</forceJavacCompilerUse>
- </configuration>
- <dependencies>
- <dependency>
- <groupId>org.codehaus.plexus</groupId>
- <artifactId>plexus-compiler-javac-errorprone</artifactId>
- <version>2.8.2</version>
- </dependency>
- </dependencies>
- </plugin>
-
- </plugins>
- </pluginManagement>
- </build>
- </profile>
- </profiles>
- <!-- End of Environment Settings -->
-
<dependencies>
- <!-- sketches-core -->
+ <!-- datasketches-java -->
<dependency>
- <groupId>com.yahoo.datasketches</groupId>
- <artifactId>sketches-core</artifactId>
- <version>0.13.1</version>
+ <groupId>org.apache.datasketches</groupId>
+ <artifactId>datasketches-java</artifactId>
+ <version>${datasketches-java.version}</version>
</dependency>
-
- <!-- Pig -->
+ <!-- Pig Dependencies (provided scope) -->
<dependency>
<groupId>org.apache.pig</groupId>
<artifactId>pig</artifactId>
- <version>0.17.0</version>
+ <version>${pig.version}</version>
<scope>provided</scope>
</dependency>
-
<!-- hadoop -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
- <version>2.8.1</version>
+ <version>${hadoop-common.version}</version>
<optional>true</optional>
<exclusions>
<exclusion>
@@ -169,11 +181,10 @@
</exclusion>
</exclusions>
</dependency>
-
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
- <version>2.8.1</version>
+ <version>${hadoop-mapreduce-client-core.version}</version>
<optional>true</optional>
<exclusions>
<exclusion>
@@ -190,248 +201,467 @@
</exclusion>
</exclusions>
</dependency>
-
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-common</artifactId>
- <version>2.8.1</version>
+ <version>${hadoop-mapreduce-client-common.version}</version>
<optional>true</optional>
</dependency>
-
<!-- For statistics. Used in p-value calculation. -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
- <version>3.6.1</version>
+ <version>${commons-math3.version}</version>
</dependency>
-
<!-- Test Scope -->
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
- <version>6.11</version>
+ <version>${testng.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
-
<build>
+ <pluginManagement>
+ <plugins>
+ <plugin>
+ <!-- We want to deploy the artifacts to a staging location for perusal -->
+ <!-- Apache Parent pom: apache-release profile -->
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-deploy-plugin</artifactId>
+ <version>${maven-deploy-plugin.version}</version>
+ <configuration>
+ <updateReleaseInfo>true</updateReleaseInfo>
+ <!-- see maven-install-plugin -->
+ </configuration>
+ </plugin>
+ <plugin>
+ <!-- Apache Parent pom, pluginManagement-->
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-release-plugin</artifactId>
+ <version>${maven-release-plugin.version}</version>
+ </plugin>
+ <plugin>
+ <!-- Extends Apache Parent pom, pluginManagement-->
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <version>${maven-jar-plugin.version}</version>
+ <executions>
+ <execution>
+ <id>default-jar</id>
+ <goals>
+ <goal>jar</goal>
+ </goals>
+ </execution>
+ <execution>
+ <id>default-test-jar</id>
+ <goals>
+ <goal>test-jar</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <!-- Extends Apache Parent pom, apache-release profile -->
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-javadoc-plugin</artifactId>
+ <version>${maven-javadoc-plugin.version}</version>
+ <configuration>
+ <docfilessubdirs>true</docfilessubdirs>
+ </configuration>
+ <executions>
+ <execution>
+ <id>attach-javadocs</id>
+ <goals>
+ <goal>jar</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-enforcer-plugin</artifactId>
+ <version>${maven-enforcer-plugin.version}</version>
+ <executions>
+ <execution>
+ <id>enforce-banned-dependencies</id>
+ <goals>
+ <goal>enforce</goal>
+ </goals>
+ <configuration>
+ <rules>
+ <requireJavaVersion>
+ <version>1.8.0</version>
+ </requireJavaVersion>
+ <requireMavenVersion>
+ <version>3.2,</version>
+ </requireMavenVersion>
+ <bannedDependencies>
+ <excludes>
+ <!--LGPL licenced library-->
+ <exclude>com.google.code.findbugs:annotations</exclude>
+ </excludes>
+ </bannedDependencies>
+ </rules>
+ <fail>true</fail>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <!-- Apache Parent pom, pluginManagement-->
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
+ <version>${apache-rat-plugin.version}</version>
+ <executions>
+ <execution>
+ <phase>verify</phase>
+ <goals>
+ <goal>check</goal>
+ </goals>
+ </execution>
+ </executions>
+ <configuration>
+ <outputDirectory>${project.basedir}/rat</outputDirectory>
+ <consoleOutput>true</consoleOutput>
+ <useDefaultExcludes>true</useDefaultExcludes>
+ <excludes>
+ <!-- rat uses .gitignore for excludes by default -->
+ <exclude>**/test/resources/**/*.txt</exclude>
+ <exclude>LICENSE</exclude>
+ <exclude>NOTICE</exclude>
+ <exclude>DISCLAIMER-WIP</exclude>
+ </excludes>
+ </configuration>
+ </plugin>
+ <plugin>
+ <!-- Extends Apache Parent pom, apache-release profile -->
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-source-plugin</artifactId>
+ <version>${maven-source-plugin.version}</version>
+ <executions>
+ <execution>
+ <id>attach-sources</id>
+ <phase>package</phase>
+ <goals>
+ <goal>jar-no-fork</goal>
+ </goals>
+ </execution>
+ <execution>
+ <id>attach-test-sources</id>
+ <phase>package</phase>
+ <goals>
+ <goal>test-jar-no-fork</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <!-- Apache Parent pom, pluginManagement-->
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <version>${maven-surefire-plugin.version}</version>
+ <configuration>
+ <trimStackTrace>false</trimStackTrace>
+ <useManifestOnlyJar>false</useManifestOnlyJar>
+ <redirectTestOutputToFile>true</redirectTestOutputToFile>
+ </configuration>
+ </plugin>
+ <plugin>
+ <!-- Generates code coverage report from website. -->
+ <groupId>org.jacoco</groupId>
+ <artifactId>jacoco-maven-plugin</artifactId>
+ <version>${jacoco-maven-plugin.version}</version>
+ <executions>
+ <execution>
+ <id>prepare-agent</id>
+ <goals>
+ <goal>prepare-agent</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <!-- Submit code coverage report to Coveralls.io. -->
+ <groupId>org.eluder.coveralls</groupId>
+ <artifactId>coveralls-maven-plugin</artifactId>
+ <version>${coveralls-maven-plugin.version}</version>
+ <configuration>
+ <!-- Since we use Travis CI we do not have to put a Coveralls token here. -->
+ </configuration>
+ </plugin>
+ </plugins>
+ </pluginManagement>
<plugins>
-
<plugin>
<groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-shade-plugin</artifactId>
- <version>3.1.0</version>
- <executions>
- <execution>
- <phase>package</phase>
- <goals>
- <goal>shade</goal>
- </goals>
- <configuration>
- <artifactSet>
- <includes>
- <include>com.yahoo.datasketches:sketches-core</include>
- <include>com.yahoo.datasketches:memory</include>
- </includes>
- </artifactSet>
- <relocations>
- <relocation>
- <pattern>com.yahoo.sketches</pattern>
- <shadedPattern>shaded.com.yahoo.sketches</shadedPattern>
- <excludes>
- <exclude>com.yahoo.sketches.pig.**</exclude>
- </excludes>
- </relocation>
- <relocation>
- <pattern>com.yahoo.memory</pattern>
- <shadedPattern>shaded.com.yahoo.memory</shadedPattern>
- </relocation>
- </relocations>
- <shadedArtifactAttached>true</shadedArtifactAttached>
- <shadedClassifierName>with-shaded-core</shadedClassifierName>
- </configuration>
- </execution>
- </executions>
+ <artifactId>maven-deploy-plugin</artifactId>
</plugin>
-
<plugin>
<groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-source-plugin</artifactId>
- <version>3.0.1</version>
- <executions>
- <execution>
- <id>attach-sources</id>
- <goals>
- <goal>jar-no-fork</goal>
- <goal>test-jar-no-fork</goal>
- </goals>
- </execution>
- </executions>
+ <artifactId>maven-release-plugin</artifactId>
</plugin>
-
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ </plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
- <version>2.10.4</version>
- <configuration>
- <!--
- <show>private</show>
- -->
- <stylesheetfile>src/main/javadoc/stylesheet.css</stylesheetfile>
- <docfilessubdirs>true</docfilessubdirs>
- </configuration>
- <executions>
- <execution>
- <id>attach-javadocs</id>
- <goals>
- <goal>jar</goal>
- </goals>
- </execution>
- </executions>
</plugin>
-
<plugin>
<groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-gpg-plugin</artifactId>
- <version>1.6</version>
- <executions>
- <execution>
- <id>sign-artifacts</id>
- <phase>verify</phase>
- <goals>
- <goal>sign</goal>
- </goals>
- </execution>
- </executions>
+ <artifactId>maven-enforcer-plugin</artifactId>
</plugin>
-
<plugin>
- <groupId>org.sonatype.plugins</groupId>
- <artifactId>nexus-staging-maven-plugin</artifactId>
- <version>1.6.8</version>
- <extensions>true</extensions>
- <configuration>
- <serverId>sonatype-nexus-staging</serverId>
- <nexusUrl>https://oss.sonatype.org/</nexusUrl>
- <autoReleaseAfterClose>false</autoReleaseAfterClose>
- </configuration>
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
</plugin>
-
- <!-- Code coverage plugin, generates coverage report to target/site/jacoco/
- To skip coverage generation add -Djacoco.skip=true -->
<plugin>
- <groupId>org.jacoco</groupId>
- <artifactId>jacoco-maven-plugin</artifactId>
- <executions>
- <execution>
- <goals>
- <goal>prepare-agent</goal>
- </goals>
- </execution>
- <execution>
- <id>report</id>
- <phase>test</phase>
- <goals>
- <goal>report</goal>
- </goals>
- </execution>
- </executions>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-source-plugin</artifactId>
</plugin>
-
- <!-- Coveralls is a online code coverage reporting tool that leverages JaCoCo -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.jacoco</groupId>
+ <artifactId>jacoco-maven-plugin</artifactId>
+ </plugin>
<plugin>
<groupId>org.eluder.coveralls</groupId>
<artifactId>coveralls-maven-plugin</artifactId>
- <version>4.3.0</version>
- <configuration>
- <repoToken />
- </configuration>
</plugin>
-
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId>
- </plugin>
-
</plugins>
-
- <pluginManagement>
- <plugins>
-
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-clean-plugin</artifactId>
- <version>3.0.0</version>
- </plugin>
-
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-dependency-plugin</artifactId>
- <version>3.0.2</version>
- </plugin>
-
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-deploy-plugin</artifactId>
- <version>2.8.2</version>
- </plugin>
-
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-help-plugin</artifactId>
- <version>2.2</version>
- </plugin>
-
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-install-plugin</artifactId>
- <version>2.5.2</version>
- </plugin>
-
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-resources-plugin</artifactId>
- <version>3.0.2</version>
- </plugin>
-
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-release-plugin</artifactId>
- <version>2.5.3</version>
- </plugin>
-
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-assembly-plugin</artifactId>
- <version>3.1.0</version>
- </plugin>
-
- <plugin>
- <groupId>org.codehaus.mojo</groupId>
- <artifactId>license-maven-plugin</artifactId>
- <version>1.14</version>
- </plugin>
-
- <plugin>
- <groupId>org.codehaus.mojo</groupId>
- <artifactId>exec-maven-plugin</artifactId>
- <version>1.6.0</version>
- </plugin>
-
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId>
- <version>2.20.1</version>
- </plugin>
-
- <plugin>
- <groupId>org.jacoco</groupId>
- <artifactId>jacoco-maven-plugin</artifactId>
- <version>0.7.9</version>
- </plugin>
-
- </plugins>
- </pluginManagement>
</build>
-
+ <profiles>
+ <!-- Ignore nuisance warning from Apache parent plugin:
+ "maven-remote-resources-plugin (goal "process") is ignored by m2e".
+ This also should fix the Maven warning that it can't find the lifecycle-mapping jar.
+ This profile is only active when the property "m2e.version" is set,
+ which is the case when building in Eclipse with m2e.
+ The ignore below tells m2eclipse to skip the execution.
+ -->
+ <profile>
+ <id>m2e</id>
+ <activation>
+ <property>
+ <name>m2e.version</name>
+ </property>
+ </activation>
+ <build>
+ <pluginManagement>
+ <plugins>
+ <plugin>
+ <groupId>org.eclipse.m2e</groupId>
+ <artifactId>lifecycle-mapping</artifactId>
+ <version>${lifecycle-mapping.version}</version>
+ <configuration>
+ <lifecycleMappingMetadata>
+ <pluginExecutions>
+ <pluginExecution>
+ <pluginExecutionFilter>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-remote-resources-plugin</artifactId>
+ <versionRange>${maven-remote-resources-plugin.version}</versionRange>
+ <goals>
+ <goal>process</goal>
+ </goals>
+ </pluginExecutionFilter>
+ <action>
+ <ignore/>
+ </action>
+ </pluginExecution>
+ </pluginExecutions>
+ </lifecycleMappingMetadata>
+ </configuration>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+ </build>
+ </profile>
+ <profile>
+ <id>strict</id>
+ <build>
+ <pluginManagement>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>${maven-compiler-plugin.version}</version>
+ <dependencies>
+ <dependency>
+ <groupId>org.codehaus.plexus</groupId>
+ <artifactId>plexus-compiler-javac-errorprone</artifactId>
+ <version>${plexus-compiler-javac-errorprone.version}</version>
+ </dependency>
+ </dependencies>
+ <configuration>
+ <source>${maven.compiler.source}</source>
+ <target>${maven.compiler.target}</target>
+ <compilerId>javac-with-errorprone</compilerId>
+ <forceJavacCompilerUse>true</forceJavacCompilerUse>
+ </configuration>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+ </build>
+ </profile>
+ <!-- This profile is used to release signed jars to the Apache Nexus repository.
+ This must be executed from a git repository set at the proper Release branch (e.g., 1.1.X-incubating)
+ and at a Release Candidate tag (e.g., 1.1.0-incubating-RC1).
+ The pom version in the release branch must be properly set to something like: "1.1.0-incubating".
+ The pom version in the master would be set to something like: "1.2.0-incubating-SNAPSHOT".
+ Test Command: mvn clean verify -Pnexus-jars -DskipTests=true
+ Command: mvn clean deploy -Dnexus-jars
+ Verify Command (from terminal): gpg -v --verify $ASC $FILE # dashdashverify
+ -->
+ <profile>
+ <id>nexus-jars</id>
+ <build>
+ <pluginManagement>
+ <plugins>
+ <plugin>
+ <groupId>pl.project13.maven</groupId>
+ <artifactId>git-commit-id-plugin</artifactId>
+ <version>${git-commit-id-plugin.version}</version>
+ <executions>
+ <execution>
+ <goals>
+ <goal>revision</goal>
+ </goals>
+ <phase>initialize</phase>
+ </execution>
+ </executions>
+ <configuration>
+ <dotGitDirectory>${project.basedir}/.git</dotGitDirectory>
+ <dateFormatTimeZone>UTC</dateFormatTimeZone>
+ <verbose>false</verbose>
+ <skipPoms>false</skipPoms>
+ <format>json</format>
+ <generateGitPropertiesFile>true</generateGitPropertiesFile>
+ <generateGitPropertiesFilename>${project.build.directory}/git.properties</generateGitPropertiesFilename>
+ <failOnNoGitDirectory>true</failOnNoGitDirectory>
+ <failOnUnableToExtractRepoInfo>true</failOnUnableToExtractRepoInfo>
+ <commitIdGenerationMode>full</commitIdGenerationMode>
+ <includeOnlyProperties>
+ <includeProperty>git.branch</includeProperty>
+ <includeProperty>git.commit.id.full</includeProperty>
+ <includeProperty>git.commit.time</includeProperty>
+ <includeProperty>git.commit.user.email</includeProperty>
+ <includeProperty>git.tags</includeProperty>
+ </includeOnlyProperties>
+ <gitDescribe>
+ <skip>false</skip>
+ <always>true</always>
+ <abbrev>7</abbrev>
+ <dirty>-dirty</dirty>
+ <tags>true</tags>
+ <forceLongFormat>true</forceLongFormat>
+ </gitDescribe>
+ </configuration>
+ </plugin>
+ <!-- Extends Apache Parent pom, pluginManagement-->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <version>${maven-jar-plugin.version}</version>
+ <executions>
+ <execution>
+ <id>default-jar</id>
+ <goals>
+ <goal>jar</goal>
+ </goals>
+ </execution>
+ <execution>
+ <id>default-test-jar</id>
+ <goals>
+ <goal>test-jar</goal>
+ </goals>
+ </execution>
+ </executions>
+ <configuration>
+ <injectAllReactorProjects>true</injectAllReactorProjects>
+ <archive>
+ <manifest>
+ <addDefaultEntries>false</addDefaultEntries>
+ <addDefaultSpecificationEntries>false</addDefaultSpecificationEntries>
+ <addDefaultImplementationEntries>false</addDefaultImplementationEntries>
+ </manifest>
+ <manifestEntries>
+ <Build-Jdk>${java.version} (${java.vendor} ${java.vm.version})</Build-Jdk>
+ <Build-OS>${os.name} ${os.arch} ${os.version}</Build-OS>
+ <Implementation-Vendor>The Apache Software Foundation</Implementation-Vendor>
+ <GroupId-ArtifactId>${project.groupId}:${project.artifactId}</GroupId-ArtifactId>
+ <git-branch>${git.branch}</git-branch>
+ <git-commit-id>${git.commit.id.full}</git-commit-id>
+ <git-commit-time>${git.commit.time}</git-commit-time>
+ <git-commit-user-email>${git.commit.user.email}</git-commit-user-email>
+ <git-commit-tag>${git.tags}</git-commit-tag>
+ </manifestEntries>
+ </archive>
+ </configuration>
+ </plugin>
+ <!-- We want to sign the artifacts, POM, and all attached artifacts -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-gpg-plugin</artifactId>
+ <version>${maven-gpg-plugin.version}</version>
+ <executions>
+ <execution>
+ <id>sign-artifacts</id>
+ <phase>verify</phase>
+ <goals>
+ <goal>sign</goal>
+ </goals>
+ </execution>
+ </executions>
+ <configuration>
+ <gpgArguments>
+ <arg>--verbose</arg>
+ <!-- prints the algorithm used -->
+ <arg>--personal-digest-preferences=SHA512</arg>
+ </gpgArguments>
+ </configuration>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+ <plugins>
+ <plugin>
+ <groupId>pl.project13.maven</groupId>
+ <artifactId>git-commit-id-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-gpg-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
+ </profile>
+ <!-- Disable source release assembly for 'apache-release' profile.
+ This is performed from a script outside Maven
+ -->
+ <profile>
+ <id>apache-release</id>
+ <build>
+ <plugins>
+ <plugin>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <version>${maven-assembly-plugin.version}</version>
+ <executions>
+ <execution>
+ <id>source-release-assembly</id>
+ <phase>none</phase>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+ </profile>
+ </profiles>
</project>
diff --git a/src/main/java/com/yahoo/sketches/pig/cpc/DataToSketchAlgebraicFinal.java b/src/main/java/com/yahoo/sketches/pig/cpc/DataToSketchAlgebraicFinal.java
deleted file mode 100644
index e425d77..0000000
--- a/src/main/java/com/yahoo/sketches/pig/cpc/DataToSketchAlgebraicFinal.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.cpc;
-
-import static com.yahoo.sketches.Util.DEFAULT_UPDATE_SEED;
-
-import com.yahoo.sketches.cpc.CpcSketch;
-
-public class DataToSketchAlgebraicFinal extends AlgebraicFinal {
-
- /**
- * Default constructor for the final pass of an Algebraic function.
- * Assumes default lgK and seed.
- */
- public DataToSketchAlgebraicFinal() {
- super(CpcSketch.DEFAULT_LG_K, DEFAULT_UPDATE_SEED);
- }
-
- /**
- * Constructor for the final pass of an Algebraic function. Pig will call
- * this and pass the same constructor arguments as the base UDF.
- * Assumes default seed.
- *
- * @param lgK in a form of a String
- */
- public DataToSketchAlgebraicFinal(final String lgK) {
- super(Integer.parseInt(lgK), DEFAULT_UPDATE_SEED);
- }
-
- /**
- * Constructor for the final pass of an Algebraic function. Pig will call
- * this and pass the same constructor arguments as the base UDF.
- *
- * @param lgK parameter controlling the sketch size and accuracy
- * @param seed for the hash function
- */
- public DataToSketchAlgebraicFinal(final String lgK, final String seed) {
- super(Integer.parseInt(lgK), Long.parseLong(seed));
- }
-
- @Override
- boolean isInputRaw() {
- return true;
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/cpc/DataToSketchAlgebraicIntermediate.java b/src/main/java/com/yahoo/sketches/pig/cpc/DataToSketchAlgebraicIntermediate.java
deleted file mode 100644
index dea8591..0000000
--- a/src/main/java/com/yahoo/sketches/pig/cpc/DataToSketchAlgebraicIntermediate.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.cpc;
-
-import static com.yahoo.sketches.Util.DEFAULT_UPDATE_SEED;
-
-import com.yahoo.sketches.cpc.CpcSketch;
-
-public class DataToSketchAlgebraicIntermediate extends AlgebraicIntermediate {
-
- /**
- * Default constructor for the intermediate pass of an Algebraic function.
- * Assumes default lgK and seed.
- */
- public DataToSketchAlgebraicIntermediate() {
- super(CpcSketch.DEFAULT_LG_K, DEFAULT_UPDATE_SEED);
- }
-
- /**
- * Constructor for the intermediate pass of an Algebraic function. Pig will call
- * this and pass the same constructor arguments as the base UDF.
- * Assumes default seed.
- *
- * @param lgK in a form of a String
- */
- public DataToSketchAlgebraicIntermediate(final String lgK) {
- super(Integer.parseInt(lgK), DEFAULT_UPDATE_SEED);
- }
-
- /**
- * Constructor for the intermediate pass of an Algebraic function. Pig will call
- * this and pass the same constructor arguments as the base UDF.
- *
- * @param lgK parameter controlling the sketch size and accuracy
- * @param seed for the hash function
- */
- public DataToSketchAlgebraicIntermediate(final String lgK, final String seed) {
- super(Integer.parseInt(lgK), Long.parseLong(seed));
- }
-
- @Override
- boolean isInputRaw() {
- return true;
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/cpc/GetEstimate.java b/src/main/java/com/yahoo/sketches/pig/cpc/GetEstimate.java
deleted file mode 100644
index 88ce2a5..0000000
--- a/src/main/java/com/yahoo/sketches/pig/cpc/GetEstimate.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.cpc;
-
-import static com.yahoo.sketches.Util.DEFAULT_UPDATE_SEED;
-
-import java.io.IOException;
-
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.DataByteArray;
-import org.apache.pig.data.Tuple;
-
-import com.yahoo.sketches.cpc.CpcSketch;
-
-/**
- * This is a User Defined Function (UDF) for getting a distinct count estimate from a given CpcdSketch
- *
- * @author Alexander Saydakov
- */
-public class GetEstimate extends EvalFunc<Double> {
-
- private final long seed_;
-
- /**
- * Constructor with default seed
- */
- public GetEstimate() {
- this(DEFAULT_UPDATE_SEED);
- }
-
- /**
- * Constructor with given seed
- * @param seed in a form of a String
- */
- public GetEstimate(final String seed) {
- this(Long.parseLong(seed));
- }
-
- /**
- * Base constructor
- * @param seed parameter for the hash function
- */
- GetEstimate(final long seed) {
- seed_ = seed;
- }
-
- @Override
- public Double exec(final Tuple sketchTuple) throws IOException {
- if ((sketchTuple == null) || (sketchTuple.size() == 0)) {
- return null;
- }
- final DataByteArray dba = (DataByteArray) sketchTuple.get(0);
- final CpcSketch sketch = CpcSketch.heapify(dba.get(), seed_);
- return sketch.getEstimate();
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/cpc/UnionSketchAlgebraicFinal.java b/src/main/java/com/yahoo/sketches/pig/cpc/UnionSketchAlgebraicFinal.java
deleted file mode 100644
index 9cbd22d..0000000
--- a/src/main/java/com/yahoo/sketches/pig/cpc/UnionSketchAlgebraicFinal.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.cpc;
-
-import static com.yahoo.sketches.Util.DEFAULT_UPDATE_SEED;
-
-import com.yahoo.sketches.cpc.CpcSketch;
-
-public class UnionSketchAlgebraicFinal extends AlgebraicFinal {
-
- /**
- * Default constructor for the final pass of an Algebraic function.
- * Assumes default lgK and seed.
- */
- public UnionSketchAlgebraicFinal() {
- super(CpcSketch.DEFAULT_LG_K, DEFAULT_UPDATE_SEED);
- }
-
- /**
- * Constructor for the final pass of an Algebraic function. Pig will call
- * this and pass the same constructor arguments as the base UDF.
- * Assumes default seed.
- *
- * @param lgK in a form of a String
- */
- public UnionSketchAlgebraicFinal(final String lgK) {
- super(Integer.parseInt(lgK), DEFAULT_UPDATE_SEED);
- }
-
- /**
- * Constructor for the final pass of an Algebraic function. Pig will call
- * this and pass the same constructor arguments as the base UDF.
- *
- * @param lgK parameter controlling the sketch size and accuracy
- * @param seed for the hash function
- */
- public UnionSketchAlgebraicFinal(final String lgK, final String seed) {
- super(Integer.parseInt(lgK), Long.parseLong(seed));
- }
-
- @Override
- boolean isInputRaw() {
- return false;
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/cpc/UnionSketchAlgebraicIntermediate.java b/src/main/java/com/yahoo/sketches/pig/cpc/UnionSketchAlgebraicIntermediate.java
deleted file mode 100644
index f5ab147..0000000
--- a/src/main/java/com/yahoo/sketches/pig/cpc/UnionSketchAlgebraicIntermediate.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.cpc;
-
-import static com.yahoo.sketches.Util.DEFAULT_UPDATE_SEED;
-
-import com.yahoo.sketches.cpc.CpcSketch;
-
-public class UnionSketchAlgebraicIntermediate extends AlgebraicIntermediate {
-
- /**
- * Default constructor of the intermediate pass of an Algebraic function.
- * Assumes default lgK and seed.
- */
- public UnionSketchAlgebraicIntermediate() {
- super(CpcSketch.DEFAULT_LG_K, DEFAULT_UPDATE_SEED);
- }
-
- /**
- * Constructor for the intermediate pass of an Algebraic function. Pig will call
- * this and pass the same constructor arguments as the base UDF.
- * Assumes default seed.
- *
- * @param lgK in a form of a String
- */
- public UnionSketchAlgebraicIntermediate(final String lgK) {
- super(Integer.parseInt(lgK), DEFAULT_UPDATE_SEED);
- }
-
- /**
- * Constructor for the intermediate pass of an Algebraic function. Pig will call
- * this and pass the same constructor arguments as the base UDF.
- *
- * @param lgK parameter controlling the sketch size and accuracy
- * @param seed for the hash function
- */
- public UnionSketchAlgebraicIntermediate(final String lgK, final String seed) {
- super(Integer.parseInt(lgK), Long.parseLong(seed));
- }
-
- @Override
- boolean isInputRaw() {
- return false;
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/cpc/package-info.java b/src/main/java/com/yahoo/sketches/pig/cpc/package-info.java
deleted file mode 100644
index c50fe23..0000000
--- a/src/main/java/com/yahoo/sketches/pig/cpc/package-info.java
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-/**
- * Pig UDFs for CPC sketches.
- * This is a distinct-counting sketch that implements the
- * <i>Compressed Probabilistic Counting (CPC, a.k.a FM85)</i> algorithms developed by Kevin Lang in
- * his paper
- * <a href="https://arxiv.org/abs/1708.06839">Back to the Future: an Even More Nearly
- * Optimal Cardinality Estimation Algorithm</a>.
- *
- * @author Alexander Saydakov
- */
-package com.yahoo.sketches.pig.cpc;
diff --git a/src/main/java/com/yahoo/sketches/pig/frequencies/AlgebraicInitial.java b/src/main/java/com/yahoo/sketches/pig/frequencies/AlgebraicInitial.java
deleted file mode 100644
index 22dbd24..0000000
--- a/src/main/java/com/yahoo/sketches/pig/frequencies/AlgebraicInitial.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.frequencies;
-
-import java.io.IOException;
-
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.DataBag;
-import org.apache.pig.data.Tuple;
-
-/**
- * This is a common pass-through implementation for initial step of an Algebraic operation
- */
-public abstract class AlgebraicInitial extends EvalFunc<Tuple> {
- @Override
- public Tuple exec(final Tuple inputTuple) throws IOException {
- final DataBag bag = (DataBag) inputTuple.get(0);
- if (bag == null) {
- throw new IllegalArgumentException("InputTuple.Field0: Bag may not be null");
- }
- return inputTuple;
- }
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/frequencies/Util.java b/src/main/java/com/yahoo/sketches/pig/frequencies/Util.java
deleted file mode 100644
index 649cdbf..0000000
--- a/src/main/java/com/yahoo/sketches/pig/frequencies/Util.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.frequencies;
-
-import org.apache.pig.backend.executionengine.ExecException;
-import org.apache.pig.data.DataByteArray;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.data.TupleFactory;
-
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.ArrayOfItemsSerDe;
-import com.yahoo.sketches.frequencies.ItemsSketch;
-
-final class Util {
-
- static final TupleFactory tupleFactory = TupleFactory.getInstance();
-
- static <T> Tuple serializeSketchToTuple(
- final ItemsSketch<T> sketch, final ArrayOfItemsSerDe<T> serDe) throws ExecException {
- final Tuple outputTuple = Util.tupleFactory.newTuple(1);
- outputTuple.set(0, new DataByteArray(sketch.toByteArray(serDe)));
- return outputTuple;
- }
-
- static <T> ItemsSketch<T> deserializeSketchFromTuple(
- final Tuple tuple, final ArrayOfItemsSerDe<T> serDe) throws ExecException {
- final byte[] bytes = ((DataByteArray) tuple.get(0)).get();
- return ItemsSketch.getInstance(Memory.wrap(bytes), serDe);
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/frequencies/package-info.java b/src/main/java/com/yahoo/sketches/pig/frequencies/package-info.java
deleted file mode 100644
index 4677ad1..0000000
--- a/src/main/java/com/yahoo/sketches/pig/frequencies/package-info.java
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Copyright 2016, Yahoo! Inc. Licensed under the terms of the Apache License 2.0. See LICENSE file
- * at the project root for terms.
- */
-
-/**
- * Pig UDFs for Frequent Items sketch.
- * This includes generic implementation in the form of abstract classes DataToFrequentItemsSketch
- * and UnionFrequentItemsSketch to be specialized for particular types of items.
- * An implementation for strings is provided: DataToFrequentStringsSketch and UnionFrequentStringsSketch.
- * FrequentStringsSketchToEstimates is to obtain results from sketches.
- *
- * @author Alexander Saydakov
- */
-package com.yahoo.sketches.pig.frequencies;
diff --git a/src/main/java/com/yahoo/sketches/pig/hash/package-info.java b/src/main/java/com/yahoo/sketches/pig/hash/package-info.java
deleted file mode 100644
index f8c3ee8..0000000
--- a/src/main/java/com/yahoo/sketches/pig/hash/package-info.java
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright 2016, Yahoo! Inc. Licensed under the terms of the Apache License 2.0. See LICENSE file
- * at the project root for terms.
- */
-
-/**
- * <p>The hash package contains a high-performing and extended Java implementation
- * of Austin Appleby's 128-bit MurmurHash3 hash function originally coded in C.
- * This core MurmurHash3.java class is used throughout all the sketch classes for consistentancy
- * and as long as the user specifies the same seed will result in coordinated hash operations.
- * This package also contains an adaptor class that extends the basic class with more functions
- * commonly associated with hashing.
- * </p>
- *
- * @author Lee Rhodes
- */
-package com.yahoo.sketches.pig.hash;
diff --git a/src/main/java/com/yahoo/sketches/pig/hll/DataToSketchAlgebraicFinal.java b/src/main/java/com/yahoo/sketches/pig/hll/DataToSketchAlgebraicFinal.java
deleted file mode 100644
index f34daeb..0000000
--- a/src/main/java/com/yahoo/sketches/pig/hll/DataToSketchAlgebraicFinal.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.hll;
-
-import static com.yahoo.sketches.pig.hll.DataToSketch.DEFAULT_HLL_TYPE;
-import static com.yahoo.sketches.pig.hll.DataToSketch.DEFAULT_LG_K;
-
-import org.apache.pig.backend.executionengine.ExecException;
-import org.apache.pig.data.DataBag;
-
-import com.yahoo.sketches.hll.TgtHllType;
-import com.yahoo.sketches.hll.Union;
-
-public class DataToSketchAlgebraicFinal extends AlgebraicFinal {
-
- /**
- * Default constructor for the final pass of an Algebraic function.
- * Assumes default lgK and target HLL type.
- */
- public DataToSketchAlgebraicFinal() {
- super(DEFAULT_LG_K, DEFAULT_HLL_TYPE);
- }
-
- /**
- * Constructor for the final pass of an Algebraic function. Pig will call
- * this and pass the same constructor arguments as the base UDF.
- * Assumes default HLL target type.
- *
- * @param lgK in a form of a String
- */
- public DataToSketchAlgebraicFinal(final String lgK) {
- super(Integer.parseInt(lgK), DEFAULT_HLL_TYPE);
- }
-
- /**
- * Constructor for the final pass of an Algebraic function. Pig will call
- * this and pass the same constructor arguments as the base UDF.
- *
- * @param lgK parameter controlling the sketch size and accuracy
- * @param tgtHllType HLL type of the resulting sketch
- */
- public DataToSketchAlgebraicFinal(final String lgK, final String tgtHllType) {
- super(Integer.parseInt(lgK), TgtHllType.valueOf(tgtHllType));
- }
-
- @Override
- void updateUnion(final DataBag bag, final Union union) throws ExecException {
- DataToSketch.updateUnion(bag, union);
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/hll/DataToSketchAlgebraicIntermediate.java b/src/main/java/com/yahoo/sketches/pig/hll/DataToSketchAlgebraicIntermediate.java
deleted file mode 100644
index 2fd9cdb..0000000
--- a/src/main/java/com/yahoo/sketches/pig/hll/DataToSketchAlgebraicIntermediate.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.hll;
-
-import static com.yahoo.sketches.pig.hll.DataToSketch.DEFAULT_HLL_TYPE;
-import static com.yahoo.sketches.pig.hll.DataToSketch.DEFAULT_LG_K;
-
-import org.apache.pig.backend.executionengine.ExecException;
-import org.apache.pig.data.DataBag;
-
-import com.yahoo.sketches.hll.TgtHllType;
-import com.yahoo.sketches.hll.Union;
-
-public class DataToSketchAlgebraicIntermediate extends AlgebraicIntermediate {
-
- /**
- * Default constructor for the intermediate pass of an Algebraic function.
- * Assumes default lgK and target HLL type.
- */
- public DataToSketchAlgebraicIntermediate() {
- super(DEFAULT_LG_K, DEFAULT_HLL_TYPE);
- }
-
- /**
- * Constructor for the intermediate pass of an Algebraic function. Pig will call
- * this and pass the same constructor arguments as the base UDF.
- * Assumes default HLL target type.
- *
- * @param lgK in a form of a String
- */
- public DataToSketchAlgebraicIntermediate(final String lgK) {
- super(Integer.parseInt(lgK), DEFAULT_HLL_TYPE);
- }
-
- /**
- * Constructor for the intermediate pass of an Algebraic function. Pig will call
- * this and pass the same constructor arguments as the base UDF.
- *
- * @param lgK parameter controlling the sketch size and accuracy
- * @param tgtHllType HLL type of the resulting sketch
- */
- public DataToSketchAlgebraicIntermediate(final String lgK, final String tgtHllType) {
- super(Integer.parseInt(lgK), TgtHllType.valueOf(tgtHllType));
- }
-
- @Override
- void updateUnion(final DataBag bag, final Union union) throws ExecException {
- DataToSketch.updateUnion(bag, union);
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/hll/SketchToEstimate.java b/src/main/java/com/yahoo/sketches/pig/hll/SketchToEstimate.java
deleted file mode 100644
index 6fa0c80..0000000
--- a/src/main/java/com/yahoo/sketches/pig/hll/SketchToEstimate.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.hll;
-
-import java.io.IOException;
-
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.DataByteArray;
-import org.apache.pig.data.Tuple;
-
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.hll.HllSketch;
-
-/**
- * This is a User Defined Function (UDF) for getting a unique count estimate from an HllSketch
- *
- * @author Alexander Saydakov
- */
-public class SketchToEstimate extends EvalFunc<Double> {
-
- @Override
- public Double exec(final Tuple sketchTuple) throws IOException {
- if ((sketchTuple == null) || (sketchTuple.size() == 0)) {
- return null;
- }
- final DataByteArray dba = (DataByteArray) sketchTuple.get(0);
- final HllSketch sketch = HllSketch.wrap(Memory.wrap(dba.get()));
- return sketch.getEstimate();
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/hll/UnionSketchAlgebraicFinal.java b/src/main/java/com/yahoo/sketches/pig/hll/UnionSketchAlgebraicFinal.java
deleted file mode 100644
index 07d5681..0000000
--- a/src/main/java/com/yahoo/sketches/pig/hll/UnionSketchAlgebraicFinal.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.hll;
-
-import static com.yahoo.sketches.pig.hll.DataToSketch.DEFAULT_HLL_TYPE;
-import static com.yahoo.sketches.pig.hll.DataToSketch.DEFAULT_LG_K;
-
-import org.apache.pig.backend.executionengine.ExecException;
-import org.apache.pig.data.DataBag;
-
-import com.yahoo.sketches.hll.TgtHllType;
-import com.yahoo.sketches.hll.Union;
-
-public class UnionSketchAlgebraicFinal extends AlgebraicFinal {
-
- /**
- * Default constructor for the final pass of an Algebraic function.
- * Assumes default lgK and target HLL type.
- */
- public UnionSketchAlgebraicFinal() {
- super(DEFAULT_LG_K, DEFAULT_HLL_TYPE);
- }
-
- /**
- * Constructor for the final pass of an Algebraic function. Pig will call
- * this and pass the same constructor arguments as the base UDF.
- * Assumes default HLL target type.
- *
- * @param lgK in a form of a String
- */
- public UnionSketchAlgebraicFinal(final String lgK) {
- super(Integer.parseInt(lgK), DEFAULT_HLL_TYPE);
- }
-
- /**
- * Constructor for the final pass of an Algebraic function. Pig will call
- * this and pass the same constructor arguments as the base UDF.
- *
- * @param lgK parameter controlling the sketch size and accuracy
- * @param tgtHllType HLL type of the resulting sketch
- */
- public UnionSketchAlgebraicFinal(final String lgK, final String tgtHllType) {
- super(Integer.parseInt(lgK), TgtHllType.valueOf(tgtHllType));
- }
-
- @Override
- void updateUnion(final DataBag bag, final Union union) throws ExecException {
- UnionSketch.updateUnion(bag, union);
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/hll/UnionSketchAlgebraicIntermediate.java b/src/main/java/com/yahoo/sketches/pig/hll/UnionSketchAlgebraicIntermediate.java
deleted file mode 100644
index 77b6335..0000000
--- a/src/main/java/com/yahoo/sketches/pig/hll/UnionSketchAlgebraicIntermediate.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.hll;
-
-import static com.yahoo.sketches.pig.hll.DataToSketch.DEFAULT_HLL_TYPE;
-import static com.yahoo.sketches.pig.hll.DataToSketch.DEFAULT_LG_K;
-
-import org.apache.pig.backend.executionengine.ExecException;
-import org.apache.pig.data.DataBag;
-
-import com.yahoo.sketches.hll.TgtHllType;
-import com.yahoo.sketches.hll.Union;
-
-public class UnionSketchAlgebraicIntermediate extends AlgebraicIntermediate {
-
- /**
- * Default constructor of the intermediate pass of an Algebraic function.
- * Assumes default lgK and target HLL type.
- */
- public UnionSketchAlgebraicIntermediate() {
- super(DEFAULT_LG_K, DEFAULT_HLL_TYPE);
- }
-
- /**
- * Constructor for the intermediate pass of an Algebraic function. Pig will call
- * this and pass the same constructor arguments as the base UDF.
- * Assumes default HLL target type.
- *
- * @param lgK in a form of a String
- */
- public UnionSketchAlgebraicIntermediate(final String lgK) {
- super(Integer.parseInt(lgK), DEFAULT_HLL_TYPE);
- }
-
- /**
- * Constructor for the intermediate pass of an Algebraic function. Pig will call
- * this and pass the same constructor arguments as the base UDF.
- *
- * @param lgK parameter controlling the sketch size and accuracy
- * @param tgtHllType HLL type of the resulting sketch
- */
- public UnionSketchAlgebraicIntermediate(final String lgK, final String tgtHllType) {
- super(Integer.parseInt(lgK), TgtHllType.valueOf(tgtHllType));
- }
-
- @Override
- void updateUnion(final DataBag bag, final Union union) throws ExecException {
- UnionSketch.updateUnion(bag, union);
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/hll/package-info.java b/src/main/java/com/yahoo/sketches/pig/hll/package-info.java
deleted file mode 100644
index f5f2f21..0000000
--- a/src/main/java/com/yahoo/sketches/pig/hll/package-info.java
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Copyright 2017, Yahoo! Inc. Licensed under the terms of the Apache License 2.0. See LICENSE file
- * at the project root for terms.
- */
-/**
- * Pig UDFs for HLL sketches.
- *
- * These UDFs can be used as a replacement of corresponding Theta sketch UDFs.
- * Notice that intersections and A-not-B operations are not supported by the HLL sketch.
- * Also notice a small difference in the output type of DataToSketch and UnionSketch:
- * HLL sketch UDFs return DataByteArray (BYTEARRAY in Pig), but corresponding Theta sketch
- * UDFs return a Tuple with single DataByteArray inside. This was a historical accident,
- * and we are reluctant to break the compatibility with existing scripts. HLL sketch UDFs
- * don't have to keep this compatibility. As a result, HLL sketch UDFs don't need
- * flatten() around them to remove the Tuple, and internally they don't have to spend extra
- * resources to wrap every output DataByteArray into a Tuple.
- *
- * @author Alexander Saydakov
- */
-package com.yahoo.sketches.pig.hll;
diff --git a/src/main/java/com/yahoo/sketches/pig/kll/GetK.java b/src/main/java/com/yahoo/sketches/pig/kll/GetK.java
deleted file mode 100644
index 2507325..0000000
--- a/src/main/java/com/yahoo/sketches/pig/kll/GetK.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.kll;
-
-import java.io.IOException;
-
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.DataByteArray;
-import org.apache.pig.data.Tuple;
-
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.kll.KllFloatsSketch;
-
-/**
- * This UDF is to get the parameter K from a given sketch.
- * This can be useful for debugging a work flow to make sure that resulting sketches
- * have the intended K, and, therefore, the intended accuracy
- */
-public class GetK extends EvalFunc<Integer> {
-
- @Override
- public Integer exec(final Tuple input) throws IOException {
- if (input.size() != 1) {
- throw new IllegalArgumentException("expected one input");
- }
-
- if (!(input.get(0) instanceof DataByteArray)) {
- throw new IllegalArgumentException("expected a DataByteArray as a sketch, got "
- + input.get(0).getClass().getSimpleName());
- }
- final DataByteArray dba = (DataByteArray) input.get(0);
- final KllFloatsSketch sketch = KllFloatsSketch.heapify(Memory.wrap(dba.get()));
-
- return sketch.getK();
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/kll/GetRank.java b/src/main/java/com/yahoo/sketches/pig/kll/GetRank.java
deleted file mode 100644
index 4b0a50e..0000000
--- a/src/main/java/com/yahoo/sketches/pig/kll/GetRank.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.kll;
-
-import java.io.IOException;
-
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.DataByteArray;
-import org.apache.pig.data.Tuple;
-
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.kll.KllFloatsSketch;
-
-/**
- * This UDF is to get a normalized rank for a given value from a given sketch. A single
- * rank for a given value is returned. The normalized rank is a double value
- * from 0 to 1 inclusive. For example, the rank of 0.5 corresponds to 50th percentile,
- * which is the median value of the distribution (the number separating the higher half
- * of the probability distribution from the lower half).
- */
-public class GetRank extends EvalFunc<Double> {
-
- @Override
- public Double exec(final Tuple input) throws IOException {
- if (input.size() != 2) {
- throw new IllegalArgumentException("expected two inputs: sketch and value");
- }
-
- if (!(input.get(0) instanceof DataByteArray)) {
- throw new IllegalArgumentException("expected a DataByteArray as a sketch, got "
- + input.get(0).getClass().getSimpleName());
- }
- final DataByteArray dba = (DataByteArray) input.get(0);
- final KllFloatsSketch sketch = KllFloatsSketch.heapify(Memory.wrap(dba.get()));
-
- if (!(input.get(1) instanceof Float)) {
- throw new IllegalArgumentException("expected a float value, got "
- + input.get(1).getClass().getSimpleName());
- }
- final float value = (float) input.get(1);
- return sketch.getRank(value);
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/kll/SketchToString.java b/src/main/java/com/yahoo/sketches/pig/kll/SketchToString.java
deleted file mode 100644
index 6e23a5a..0000000
--- a/src/main/java/com/yahoo/sketches/pig/kll/SketchToString.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.kll;
-
-import java.io.IOException;
-
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.DataByteArray;
-import org.apache.pig.data.Tuple;
-
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.kll.KllFloatsSketch;
-
-/**
- * This UDF is to get a human-readable summary of a given sketch.
- */
-public class SketchToString extends EvalFunc<String> {
-
- @Override
- public String exec(final Tuple input) throws IOException {
- if (input.size() != 1) {
- throw new IllegalArgumentException("expected one input");
- }
-
- if (!(input.get(0) instanceof DataByteArray)) {
- throw new IllegalArgumentException("expected a DataByteArray as a sketch, got "
- + input.get(0).getClass().getSimpleName());
- }
- final DataByteArray dba = (DataByteArray) input.get(0);
- final KllFloatsSketch sketch = KllFloatsSketch.heapify(Memory.wrap(dba.get()));
-
- return sketch.toString();
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/kll/package-info.java b/src/main/java/com/yahoo/sketches/pig/kll/package-info.java
deleted file mode 100644
index 9cae236..0000000
--- a/src/main/java/com/yahoo/sketches/pig/kll/package-info.java
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-/**
- * Pig UDFs for KLL quantiles sketches.
- * See https://datasketches.github.io/docs/Quantiles/KLLSketch.html
- *
- * @author Alexander Saydakov
- */
-package com.yahoo.sketches.pig.kll;
diff --git a/src/main/java/com/yahoo/sketches/pig/quantiles/DataToStringsSketch.java b/src/main/java/com/yahoo/sketches/pig/quantiles/DataToStringsSketch.java
deleted file mode 100644
index 5f2548e..0000000
--- a/src/main/java/com/yahoo/sketches/pig/quantiles/DataToStringsSketch.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.quantiles;
-
-import java.util.Comparator;
-
-import com.yahoo.sketches.ArrayOfItemsSerDe;
-import com.yahoo.sketches.ArrayOfStringsSerDe;
-
-/**
- * Creates an ItemsSketch<String> from raw data.
- * It supports all three ways: exec(), Accumulator and Algebraic.
- */
-public class DataToStringsSketch extends DataToItemsSketch<String> {
-
- private static final Comparator<String> COMPARATOR = Comparator.naturalOrder();
- private static final ArrayOfItemsSerDe<String> SER_DE = new ArrayOfStringsSerDe();
-
- public DataToStringsSketch() {
- super(0, COMPARATOR, SER_DE);
- }
-
- public DataToStringsSketch(final String kStr) {
- super(Integer.parseInt(kStr), COMPARATOR, SER_DE);
- }
-
- // ALGEBRAIC INTERFACE
-
- @Override
- public String getInitial() {
- return DataToItemsSketchInitial.class.getName();
- }
-
- @Override
- public String getIntermed() {
- return DataToStringsSketchIntermediateFinal.class.getName();
- }
-
- @Override
- public String getFinal() {
- return DataToStringsSketchIntermediateFinal.class.getName();
- }
-
- public static class DataToStringsSketchIntermediateFinal
- extends DataToItemsSketchIntermediateFinal<String> {
-
- public DataToStringsSketchIntermediateFinal() {
- super(0, COMPARATOR, SER_DE);
- }
-
- public DataToStringsSketchIntermediateFinal(final String kStr) {
- super(Integer.parseInt(kStr), COMPARATOR, SER_DE);
- }
-
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/quantiles/DoublesSketchToString.java b/src/main/java/com/yahoo/sketches/pig/quantiles/DoublesSketchToString.java
deleted file mode 100644
index 6b913dc..0000000
--- a/src/main/java/com/yahoo/sketches/pig/quantiles/DoublesSketchToString.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.quantiles;
-
-import java.io.IOException;
-
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.DataByteArray;
-import org.apache.pig.data.Tuple;
-
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.quantiles.DoublesSketch;
-
-/**
- * This UDF is to get a human-readable summary of a given sketch.
- */
-public class DoublesSketchToString extends EvalFunc<String> {
-
- @Override
- public String exec(final Tuple input) throws IOException {
- if (input == null) {
- return null;
- }
- if (input.size() != 1) {
- throw new IllegalArgumentException("expected one input");
- }
-
- if (!(input.get(0) instanceof DataByteArray)) {
- throw new IllegalArgumentException("expected a DataByteArray as a sketch, got "
- + input.get(0).getClass().getSimpleName());
- }
- final DataByteArray dba = (DataByteArray) input.get(0);
- final DoublesSketch sketch = DoublesSketch.wrap(Memory.wrap(dba.get()));
-
- return sketch.toString();
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/quantiles/GetKFromDoublesSketch.java b/src/main/java/com/yahoo/sketches/pig/quantiles/GetKFromDoublesSketch.java
deleted file mode 100644
index 71b1dfd..0000000
--- a/src/main/java/com/yahoo/sketches/pig/quantiles/GetKFromDoublesSketch.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.quantiles;
-
-import java.io.IOException;
-
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.DataByteArray;
-import org.apache.pig.data.Tuple;
-
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.quantiles.DoublesSketch;
-
-/**
- * This UDF is to get the parameter K from a given sketch.
- * This can be useful for debugging a work flow to make sure that resulting sketches
- * have the intended K, and, therefore, the accuracy
- */
-public class GetKFromDoublesSketch extends EvalFunc<Integer> {
-
- @Override
- public Integer exec(final Tuple input) throws IOException {
- if (input.size() != 1) {
- throw new IllegalArgumentException("expected one input");
- }
-
- if (!(input.get(0) instanceof DataByteArray)) {
- throw new IllegalArgumentException("expected a DataByteArray as a sketch, got "
- + input.get(0).getClass().getSimpleName());
- }
- final DataByteArray dba = (DataByteArray) input.get(0);
- final DoublesSketch sketch = DoublesSketch.wrap(Memory.wrap(dba.get()));
-
- return sketch.getK();
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/quantiles/GetKFromStringsSketch.java b/src/main/java/com/yahoo/sketches/pig/quantiles/GetKFromStringsSketch.java
deleted file mode 100644
index aa93490..0000000
--- a/src/main/java/com/yahoo/sketches/pig/quantiles/GetKFromStringsSketch.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.quantiles;
-
-import java.io.IOException;
-import java.util.Comparator;
-
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.DataByteArray;
-import org.apache.pig.data.Tuple;
-
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.ArrayOfStringsSerDe;
-import com.yahoo.sketches.quantiles.ItemsSketch;
-
-/**
- * This UDF is to get the parameter K from a given sketch.
- * This can be useful for debugging a work flow to make sure that resulting sketches
- * have the intended K, and, therefore, the accuracy
- */
-public class GetKFromStringsSketch extends EvalFunc<Integer> {
-
- @Override
- public Integer exec(final Tuple input) throws IOException {
- if (input.size() != 1) {
- throw new IllegalArgumentException("expected one input");
- }
-
- if (!(input.get(0) instanceof DataByteArray)) {
- throw new IllegalArgumentException("expected a DataByteArray as a sketch, got "
- + input.get(0).getClass().getSimpleName());
- }
- final DataByteArray dba = (DataByteArray) input.get(0);
- final ItemsSketch<String> sketch =
- ItemsSketch.getInstance(Memory.wrap(dba.get()), Comparator.naturalOrder(),
- new ArrayOfStringsSerDe());
-
- return sketch.getK();
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/quantiles/StringsSketchToString.java b/src/main/java/com/yahoo/sketches/pig/quantiles/StringsSketchToString.java
deleted file mode 100644
index 7a1d9bb..0000000
--- a/src/main/java/com/yahoo/sketches/pig/quantiles/StringsSketchToString.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.quantiles;
-
-import java.io.IOException;
-import java.util.Comparator;
-
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.DataByteArray;
-import org.apache.pig.data.Tuple;
-
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.ArrayOfStringsSerDe;
-import com.yahoo.sketches.quantiles.ItemsSketch;
-
-/**
- * This UDF is to get a human-readable summary of a given sketch.
- */
-public class StringsSketchToString extends EvalFunc<String> {
-
- @Override
- public String exec(final Tuple input) throws IOException {
- if (input == null) {
- return null;
- }
- if (input.size() != 1) {
- throw new IllegalArgumentException("expected one input");
- }
-
- if (!(input.get(0) instanceof DataByteArray)) {
- throw new IllegalArgumentException("expected a DataByteArray as a sketch, got "
- + input.get(0).getClass().getSimpleName());
- }
- final DataByteArray dba = (DataByteArray) input.get(0);
- final ItemsSketch<String> sketch =
- ItemsSketch.getInstance(Memory.wrap(dba.get()), Comparator.naturalOrder(),
- new ArrayOfStringsSerDe());
- return sketch.toString();
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/quantiles/UnionStringsSketch.java b/src/main/java/com/yahoo/sketches/pig/quantiles/UnionStringsSketch.java
deleted file mode 100644
index c4f9589..0000000
--- a/src/main/java/com/yahoo/sketches/pig/quantiles/UnionStringsSketch.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.quantiles;
-
-import java.util.Comparator;
-
-import com.yahoo.sketches.ArrayOfItemsSerDe;
-import com.yahoo.sketches.ArrayOfStringsSerDe;
-
-/**
- * Computes union of ItemsSketch<String>.
- * It supports all three ways: exec(), Accumulator and Algebraic
- */
-public class UnionStringsSketch extends UnionItemsSketch<String> {
-
- private static final Comparator<String> COMPARATOR = Comparator.naturalOrder();
- private static final ArrayOfItemsSerDe<String> SER_DE = new ArrayOfStringsSerDe();
-
- public UnionStringsSketch() {
- super(0, COMPARATOR, SER_DE);
- }
-
- public UnionStringsSketch(final String kStr) {
- super(Integer.parseInt(kStr), COMPARATOR, SER_DE);
- }
-
- //ALGEBRAIC INTERFACE
-
- @Override
- public String getInitial() {
- return UnionItemsSketchInitial.class.getName();
- }
-
- @Override
- public String getIntermed() {
- return UnionStringsSketchIntermediateFinal.class.getName();
- }
-
- @Override
- public String getFinal() {
- return UnionStringsSketchIntermediateFinal.class.getName();
- }
-
- public static class UnionStringsSketchIntermediateFinal extends UnionItemsSketchIntermediateFinal<String> {
-
- public UnionStringsSketchIntermediateFinal() {
- super(0, COMPARATOR, SER_DE);
- }
-
- public UnionStringsSketchIntermediateFinal(final String kStr) {
- super(Integer.parseInt(kStr), COMPARATOR, SER_DE);
- }
-
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/quantiles/Util.java b/src/main/java/com/yahoo/sketches/pig/quantiles/Util.java
deleted file mode 100644
index 99de93c..0000000
--- a/src/main/java/com/yahoo/sketches/pig/quantiles/Util.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.quantiles;
-
-import org.apache.pig.backend.executionengine.ExecException;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.data.TupleFactory;
-
-class Util {
-
- static Tuple doubleArrayToTuple(final double[] array) throws ExecException {
- final Tuple tuple = TupleFactory.getInstance().newTuple(array.length);
- for (int i = 0; i < array.length; i++) {
- tuple.set(i, array[i]);
- }
- return tuple;
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/quantiles/package-info.java b/src/main/java/com/yahoo/sketches/pig/quantiles/package-info.java
deleted file mode 100644
index 4c02e52..0000000
--- a/src/main/java/com/yahoo/sketches/pig/quantiles/package-info.java
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright 2016, Yahoo! Inc. Licensed under the terms of the Apache License 2.0. See LICENSE file
- * at the project root for terms.
- */
-
-/**
- * Pig UDFs for Quantiles sketches.
- * This includes UDFs for generic ItemsSketch and specialized DoublesSketch.
- *
- * <p>The generic implementation is in the form of abstract classes DataToItemsSketch and
- * UnionItemsSketch to be specialized for particular types of items.
- * An implementation for strings is provided: DataToStringsSketch, UnionStringsSketch,
- * plus UDFs to obtain the results from sketches:
- * GetQuantileFromStringsSketch, GetQuantilesFromStringsSketch and GetPmfFromStringsSketch.
- *
- * <p>Support for DoublesSketch: DataToDoublesSketch, UnionDoublesSketch,
- * GetQuantileFromDoublesSketch, GetQuantilesFromDoublesSketch, GetPmfFromDoublesSketch
- *
- * @author Alexander Saydakov
- */
-package com.yahoo.sketches.pig.quantiles;
diff --git a/src/main/java/com/yahoo/sketches/pig/sampling/package-info.java b/src/main/java/com/yahoo/sketches/pig/sampling/package-info.java
deleted file mode 100644
index d388d20..0000000
--- a/src/main/java/com/yahoo/sketches/pig/sampling/package-info.java
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root
- * for terms.
- */
-
-/**
- * <p>This package is dedicated to streaming algorithms that enable fixed size, uniform sampling of
- * unweighted items from a stream.</p>
- *
- * <p>These sketches are mergeable, but do <em>not</em> serialize to a compact form.</p>
- *
- * @author Jon Malkin
- */
-package com.yahoo.sketches.pig.sampling;
diff --git a/src/main/java/com/yahoo/sketches/pig/theta/Estimate.java b/src/main/java/com/yahoo/sketches/pig/theta/Estimate.java
deleted file mode 100644
index 0c3adf0..0000000
--- a/src/main/java/com/yahoo/sketches/pig/theta/Estimate.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.theta;
-
-import static com.yahoo.sketches.pig.theta.PigUtil.tupleToSketch;
-
-import java.io.IOException;
-
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.Tuple;
-
-import com.yahoo.sketches.Util;
-import com.yahoo.sketches.theta.Sketch;
-
-/**
- * Returns the unique count estimate of a sketch as a Double.
- *
- * @author LeeRhodes
- */
-public class Estimate extends EvalFunc<Double> {
- private final long seed_;
-
- /**
- * Constructs with the DEFAULT_UPDATE_SEED used when deserializing the sketch.
- */
- public Estimate() {
- this(Util.DEFAULT_UPDATE_SEED);
- }
-
- /**
- * Constructs with the given seed.
- * @param seedStr the string seed used when deserializing the sketch.
- */
- public Estimate(final String seedStr) {
- this(Long.parseLong(seedStr));
- }
-
- /**
- * Constructs with the given seed.
- * @param seed used when deserializing the sketch.
- */
- public Estimate(final long seed) {
- super();
- seed_ = seed;
- }
-
- @Override
- public Double exec(final Tuple sketchTuple) throws IOException { //throws is in API
- if ((sketchTuple == null) || (sketchTuple.size() == 0)) {
- return null;
- }
- final Sketch sketch = tupleToSketch(sketchTuple, seed_);
- return sketch.getEstimate();
- }
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/theta/package-info.java b/src/main/java/com/yahoo/sketches/pig/theta/package-info.java
deleted file mode 100644
index 6e95a6f..0000000
--- a/src/main/java/com/yahoo/sketches/pig/theta/package-info.java
+++ /dev/null
@@ -1,11 +0,0 @@
-/*
- * Copyright 2016, Yahoo! Inc. Licensed under the terms of the Apache License 2.0. See LICENSE file
- * at the project root for terms.
- */
-
-/**
- * Pig UDFs for Theta sketch.
- *
- * @author Lee Rhodes
- */
-package com.yahoo.sketches.pig.theta;
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/AlgebraicInitial.java b/src/main/java/com/yahoo/sketches/pig/tuple/AlgebraicInitial.java
deleted file mode 100644
index e1460bd..0000000
--- a/src/main/java/com/yahoo/sketches/pig/tuple/AlgebraicInitial.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.tuple;
-
-import java.io.IOException;
-
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.DataBag;
-import org.apache.pig.data.Tuple;
-
-/**
- * This is a common pass-through implementation for initial step of an Algebraic operation
- */
-public abstract class AlgebraicInitial extends EvalFunc<Tuple> {
- @Override
- public Tuple exec(final Tuple inputTuple) throws IOException {
- final DataBag bag = (DataBag) inputTuple.get(0);
- if (bag == null) {
- throw new IllegalArgumentException("InputTuple.Field0: Bag may not be null");
- }
- return inputTuple;
- }
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchStats.java b/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchStats.java
deleted file mode 100644
index 683454d..0000000
--- a/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchStats.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.tuple;
-
-import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
-
-import com.yahoo.sketches.tuple.ArrayOfDoublesSketch;
-import com.yahoo.sketches.tuple.ArrayOfDoublesSketchIterator;
-
-class ArrayOfDoublesSketchStats {
-
- /**
- * Convert sketch to summary statistics.
- *
- * @param sketch ArrayOfDoublesSketch to convert to summary statistics.
- * @return An array of SummaryStatistics.
- */
- static SummaryStatistics[] sketchToSummaryStatistics(final ArrayOfDoublesSketch sketch) {
- final SummaryStatistics[] summaryStatistics = new SummaryStatistics[sketch.getNumValues()];
- for (int i = 0; i < sketch.getNumValues(); i++) {
- summaryStatistics[i] = new SummaryStatistics();
- }
- final ArrayOfDoublesSketchIterator it = sketch.iterator();
- while (it.next()) {
- final double[] values = it.getValues();
- for (int i = 0; i < it.getValues().length; i++) {
- summaryStatistics[i].addValue(values[i]);
- }
- }
- return summaryStatistics;
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToEstimateAndErrorBounds.java b/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToEstimateAndErrorBounds.java
deleted file mode 100644
index 13baefb..0000000
--- a/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToEstimateAndErrorBounds.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.tuple;
-
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.DataByteArray;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.data.TupleFactory;
-
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.tuple.ArrayOfDoublesSketch;
-import com.yahoo.sketches.tuple.ArrayOfDoublesSketches;
-
-/**
- * This is a User Defined Function (UDF) for obtaining the unique count estimate
- * along with a lower and upper bound from an ArrayOfDoublesSketch.
- *
- * <p>The result is a tuple with three double values: estimate, lower bound and upper bound.
- * The bounds are given at 95.5% confidence.
- *
- * @author Alexander Saydakov
- */
-public class ArrayOfDoublesSketchToEstimateAndErrorBounds extends EvalFunc<Tuple> {
-
- @Override
- public Tuple exec(final Tuple input) throws IOException {
- if ((input == null) || (input.size() == 0)) {
- return null;
- }
-
- final DataByteArray dba = (DataByteArray) input.get(0);
- final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dba.get()));
-
- return TupleFactory.getInstance().newTuple(Arrays.asList(
- sketch.getEstimate(),
- sketch.getLowerBound(2),
- sketch.getUpperBound(2)
- ));
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToMeans.java b/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToMeans.java
deleted file mode 100644
index 04efb9a..0000000
--- a/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToMeans.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.tuple;
-
-import java.io.IOException;
-
-import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.DataByteArray;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.data.TupleFactory;
-
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.tuple.ArrayOfDoublesSketch;
-import com.yahoo.sketches.tuple.ArrayOfDoublesSketches;
-
-/**
- * This UDF converts an ArrayOfDoubles sketch to mean values.
- * The result will be a tuple with N double values, where
- * N is the number of double values kept in the sketch per key.
- */
-public class ArrayOfDoublesSketchToMeans extends EvalFunc<Tuple> {
-
- @Override
- public Tuple exec(final Tuple input) throws IOException {
- if ((input == null) || (input.size() == 0)) {
- return null;
- }
-
- final DataByteArray dba = (DataByteArray) input.get(0);
- final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dba.get()));
-
- if (sketch.getRetainedEntries() < 1) {
- return null;
- }
-
- final SummaryStatistics[] summaries = ArrayOfDoublesSketchStats.sketchToSummaryStatistics(sketch);
-
- final Tuple means = TupleFactory.getInstance().newTuple(sketch.getNumValues());
- for (int i = 0; i < sketch.getNumValues(); i++) {
- means.set(i, summaries[i].getMean());
- }
- return means;
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToNumberOfRetainedEntries.java b/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToNumberOfRetainedEntries.java
deleted file mode 100644
index 67cab37..0000000
--- a/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToNumberOfRetainedEntries.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.tuple;
-
-import java.io.IOException;
-
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.DataByteArray;
-import org.apache.pig.data.Tuple;
-
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.tuple.ArrayOfDoublesSketch;
-import com.yahoo.sketches.tuple.ArrayOfDoublesSketches;
-
-/**
- * This is a User Defined Function (UDF) for obtaining the number of retained entries
- * from an ArrayOfDoublesSketch.
- *
- * <p>The result is an integer value.
- *
- * @author Alexander Saydakov
- */
-public class ArrayOfDoublesSketchToNumberOfRetainedEntries extends EvalFunc<Integer> {
-
- @Override
- public Integer exec(final Tuple input) throws IOException {
- if ((input == null) || (input.size() == 0)) {
- return null;
- }
-
- final DataByteArray dba = (DataByteArray) input.get(0);
- final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dba.get()));
-
- return sketch.getRetainedEntries();
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToVariances.java b/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToVariances.java
deleted file mode 100644
index 6cab71a..0000000
--- a/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToVariances.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.tuple;
-
-import java.io.IOException;
-
-import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.DataByteArray;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.data.TupleFactory;
-
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.tuple.ArrayOfDoublesSketch;
-import com.yahoo.sketches.tuple.ArrayOfDoublesSketches;
-
-/**
- * This UDF converts an ArrayOfDoubles sketch to variance values.
- * The result will be a tuple with N double values, where
- * N is the number of double values kept in the sketch per key.
- */
-public class ArrayOfDoublesSketchToVariances extends EvalFunc<Tuple> {
-
- @Override
- public Tuple exec(final Tuple input) throws IOException {
- if ((input == null) || (input.size() == 0)) {
- return null;
- }
-
- final DataByteArray dba = (DataByteArray) input.get(0);
- final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dba.get()));
-
- if (sketch.getRetainedEntries() < 1) {
- return null;
- }
-
- final SummaryStatistics[] summaries = ArrayOfDoublesSketchStats.sketchToSummaryStatistics(sketch);
-
- final Tuple variances = TupleFactory.getInstance().newTuple(sketch.getNumValues());
- for (int i = 0; i < sketch.getNumValues(); i++) {
- variances.set(i, summaries[i].getVariance());
- }
- return variances;
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToEstimates.java b/src/main/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToEstimates.java
deleted file mode 100644
index 00e3c23..0000000
--- a/src/main/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToEstimates.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.tuple;
-
-import java.io.IOException;
-
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.DataByteArray;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.data.TupleFactory;
-
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.tuple.Sketch;
-import com.yahoo.sketches.tuple.SketchIterator;
-import com.yahoo.sketches.tuple.Sketches;
-import com.yahoo.sketches.tuple.SummaryDeserializer;
-import com.yahoo.sketches.tuple.adouble.DoubleSummary;
-import com.yahoo.sketches.tuple.adouble.DoubleSummaryDeserializer;
-
-/**
- * This UDF converts a Sketch<DoubleSummary> to estimates.
- * The first estimate is the estimate of the number of unique
- * keys in the original population.
- * The second is the estimate of the sum of the parameter
- * in the original population (sums of the values in the sketch
- * scaled to the original population). This estimate assumes
- * that the DoubleSummary was used in the Sum mode.
- */
-public class DoubleSummarySketchToEstimates extends EvalFunc<Tuple> {
-
- private static final SummaryDeserializer<DoubleSummary> SUMMARY_DESERIALIZER =
- new DoubleSummaryDeserializer();
-
- @Override
- public Tuple exec(final Tuple input) throws IOException {
- if ((input == null) || (input.size() == 0)) {
- return null;
- }
-
- final DataByteArray dba = (DataByteArray) input.get(0);
- final Sketch<DoubleSummary> sketch = Sketches.heapifySketch(
- Memory.wrap(dba.get()), SUMMARY_DESERIALIZER);
-
- final Tuple output = TupleFactory.getInstance().newTuple(2);
- output.set(0, sketch.getEstimate());
- double sum = 0;
- final SketchIterator<DoubleSummary> it = sketch.iterator();
- while (it.next()) {
- sum += it.getSummary().getValue();
- }
- output.set(1, sum / sketch.getTheta());
-
- return output;
- }
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToPercentile.java b/src/main/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToPercentile.java
deleted file mode 100644
index 1347ded..0000000
--- a/src/main/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToPercentile.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.tuple;
-
-import java.io.IOException;
-
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.DataByteArray;
-import org.apache.pig.data.Tuple;
-
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.quantiles.DoublesSketch;
-import com.yahoo.sketches.quantiles.UpdateDoublesSketch;
-import com.yahoo.sketches.tuple.Sketch;
-import com.yahoo.sketches.tuple.SketchIterator;
-import com.yahoo.sketches.tuple.Sketches;
-import com.yahoo.sketches.tuple.SummaryDeserializer;
-import com.yahoo.sketches.tuple.adouble.DoubleSummary;
-import com.yahoo.sketches.tuple.adouble.DoubleSummaryDeserializer;
-
-/**
- * This UDF is to get a percentile value from a Sketch<DoubleSummary>.
- * The values from DoubleSummary objects in the sketch are extracted,
- * and a single value with the given rank is returned. The rank is in
- * percent. For example, 50th percentile is the median value of the
- * distribution (the number separating the higher half of a probability
- * distribution from the lower half).
- */
-public class DoubleSummarySketchToPercentile extends EvalFunc<Double> {
-
- private static final SummaryDeserializer<DoubleSummary> SUMMARY_DESERIALIZER =
- new DoubleSummaryDeserializer();
- private static final int QUANTILES_SKETCH_SIZE = 1024;
-
- @Override
- public Double exec(final Tuple input) throws IOException {
- if (input.size() != 2) {
- throw new IllegalArgumentException("expected two inputs: sketch and pecentile");
- }
-
- final DataByteArray dba = (DataByteArray) input.get(0);
- final Sketch<DoubleSummary> sketch = Sketches.heapifySketch(
- Memory.wrap(dba.get()), SUMMARY_DESERIALIZER);
-
- final double percentile = (double) input.get(1);
- if ((percentile < 0) || (percentile > 100)) {
- throw new IllegalArgumentException("percentile must be between 0 and 100");
- }
-
- final UpdateDoublesSketch qs = DoublesSketch.builder().setK(QUANTILES_SKETCH_SIZE).build();
- final SketchIterator<DoubleSummary> it = sketch.iterator();
- while (it.next()) {
- qs.update(it.getSummary().getValue());
- }
- return qs.getQuantile(percentile / 100);
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/Util.java b/src/main/java/com/yahoo/sketches/pig/tuple/Util.java
deleted file mode 100644
index 7a557a9..0000000
--- a/src/main/java/com/yahoo/sketches/pig/tuple/Util.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.tuple;
-
-import org.apache.pig.backend.executionengine.ExecException;
-import org.apache.pig.data.DataByteArray;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.data.TupleFactory;
-
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.tuple.Sketch;
-import com.yahoo.sketches.tuple.Sketches;
-import com.yahoo.sketches.tuple.Summary;
-import com.yahoo.sketches.tuple.SummaryDeserializer;
-
-final class Util {
-
- static final TupleFactory tupleFactory = TupleFactory.getInstance();
-
- static Tuple doubleArrayToTuple(final double[] array) throws ExecException {
- final Tuple tuple = tupleFactory.newTuple(array.length);
- for (int i = 0; i < array.length; i++) {
- tuple.set(i, array[i]);
- }
- return tuple;
- }
-
- static <S extends Summary> Sketch<S> deserializeSketchFromTuple(final Tuple tuple,
- final SummaryDeserializer<S> summaryDeserializer) throws ExecException {
- final byte[] bytes = ((DataByteArray) tuple.get(0)).get();
- return Sketches.heapifySketch(Memory.wrap(bytes), summaryDeserializer);
- }
-
-}
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/package-info.java b/src/main/java/com/yahoo/sketches/pig/tuple/package-info.java
deleted file mode 100644
index 0687e55..0000000
--- a/src/main/java/com/yahoo/sketches/pig/tuple/package-info.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright 2016, Yahoo! Inc. Licensed under the terms of the Apache License 2.0. See LICENSE file
- * at the project root for terms.
- */
-/**
- * Pig UDFs for Tuple sketches.
- * Tuple sketches are based on the idea of Theta sketches with the addition of
- * values associated with unique keys.
- * Two sets of tuple sketch classes are available at the moment:
- * generic Tuple sketches with user-defined Summary, and a faster specialized
- * implementation with an array of double values.
- *
- * <p>There are two sets of Pig UDFs: one for generic Tuple sketch with an example
- * implementation for DoubleSummay, and another one for a specialized ArrayOfDoublesSketch.
- *
- * <p> The generic implementation is in the form of abstract classes DataToSketch and
- * UnionSketch to be specialized for particular types of Summary.
- * An example implementation for DoubleSumamry is provided: DataToDoubleSummarySketch and
- * UnionDoubleSummarySketch, as well as UDFs to obtain the results from sketches:
- * DoubleSumamrySketchToEstimates and DoubleSummarySketchToPercentile.
- *
- * <p>UDFs for ArrayOfDoublesSketch: DataToArrayOfDoublesSketch, UnionArrayOfDoublesSketch,
- * ArrayOfDoublesSketchToEstimates.
- *
- * @author Alexander Saydakov
- */
-package com.yahoo.sketches.pig.tuple;
diff --git a/src/main/java/com/yahoo/sketches/sampling/SamplingPigUtil.java b/src/main/java/com/yahoo/sketches/sampling/SamplingPigUtil.java
deleted file mode 100644
index c6e66a6..0000000
--- a/src/main/java/com/yahoo/sketches/sampling/SamplingPigUtil.java
+++ /dev/null
@@ -1,12 +0,0 @@
-package com.yahoo.sketches.sampling;
-
-import java.util.ArrayList;
-
-/**
- * @author Jon Malkin
- */
-public final class SamplingPigUtil {
- public static <T> ArrayList<T> getRawSamplesAsList(final ReservoirItemsSketch<T> sketch) {
- return sketch.getRawSamplesAsList();
- }
-}
diff --git a/src/main/java/com/yahoo/sketches/sampling/package-info.java b/src/main/java/com/yahoo/sketches/sampling/package-info.java
deleted file mode 100644
index 994fc05..0000000
--- a/src/main/java/com/yahoo/sketches/sampling/package-info.java
+++ /dev/null
@@ -1,8 +0,0 @@
-/*
- * Copyright 2017, Yahoo! Inc. Licensed under the terms of the Apache License 2.0. See LICENSE file
- * at the project root for terms.
- */
-/**
- * The sampling package
- */
-package com.yahoo.sketches.sampling;
diff --git a/src/main/java/com/yahoo/sketches/pig/cpc/AlgebraicFinal.java b/src/main/java/org/apache/datasketches/pig/cpc/AlgebraicFinal.java
similarity index 81%
rename from src/main/java/com/yahoo/sketches/pig/cpc/AlgebraicFinal.java
rename to src/main/java/org/apache/datasketches/pig/cpc/AlgebraicFinal.java
index 8b42a1f..ef6b5b0 100644
--- a/src/main/java/com/yahoo/sketches/pig/cpc/AlgebraicFinal.java
+++ b/src/main/java/org/apache/datasketches/pig/cpc/AlgebraicFinal.java
@@ -1,21 +1,34 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.cpc;
+package org.apache.datasketches.pig.cpc;
import java.io.IOException;
+import org.apache.datasketches.cpc.CpcSketch;
+import org.apache.datasketches.cpc.CpcUnion;
import org.apache.log4j.Logger;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
-import com.yahoo.sketches.cpc.CpcSketch;
-import com.yahoo.sketches.cpc.CpcUnion;
-
/**
* Class used to calculate the final pass of an <i>Algebraic</i> sketch
* operation. It will receive a bag of values returned by either the <i>Intermediate</i>
diff --git a/src/main/java/com/yahoo/sketches/pig/cpc/AlgebraicInitial.java b/src/main/java/org/apache/datasketches/pig/cpc/AlgebraicInitial.java
similarity index 63%
rename from src/main/java/com/yahoo/sketches/pig/cpc/AlgebraicInitial.java
rename to src/main/java/org/apache/datasketches/pig/cpc/AlgebraicInitial.java
index f332dd0..65e2285 100644
--- a/src/main/java/com/yahoo/sketches/pig/cpc/AlgebraicInitial.java
+++ b/src/main/java/org/apache/datasketches/pig/cpc/AlgebraicInitial.java
@@ -1,9 +1,23 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.cpc;
+package org.apache.datasketches.pig.cpc;
import java.io.IOException;
diff --git a/src/main/java/com/yahoo/sketches/pig/cpc/AlgebraicIntermediate.java b/src/main/java/org/apache/datasketches/pig/cpc/AlgebraicIntermediate.java
similarity index 68%
rename from src/main/java/com/yahoo/sketches/pig/cpc/AlgebraicIntermediate.java
rename to src/main/java/org/apache/datasketches/pig/cpc/AlgebraicIntermediate.java
index a778df8..7f506b7 100644
--- a/src/main/java/com/yahoo/sketches/pig/cpc/AlgebraicIntermediate.java
+++ b/src/main/java/org/apache/datasketches/pig/cpc/AlgebraicIntermediate.java
@@ -1,20 +1,33 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.cpc;
+package org.apache.datasketches.pig.cpc;
import java.io.IOException;
+import org.apache.datasketches.cpc.CpcSketch;
import org.apache.log4j.Logger;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
-import com.yahoo.sketches.cpc.CpcSketch;
-
/**
* Class used to calculate the intermediate combiner pass of an <i>Algebraic</i> sketch
* operation. This is called from the combiner, and may be called multiple times (from a mapper
diff --git a/src/main/java/com/yahoo/sketches/pig/cpc/DataToSketch.java b/src/main/java/org/apache/datasketches/pig/cpc/DataToSketch.java
similarity index 87%
rename from src/main/java/com/yahoo/sketches/pig/cpc/DataToSketch.java
rename to src/main/java/org/apache/datasketches/pig/cpc/DataToSketch.java
index 5d8aa10..30c656c 100644
--- a/src/main/java/com/yahoo/sketches/pig/cpc/DataToSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/cpc/DataToSketch.java
@@ -1,14 +1,29 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.cpc;
+package org.apache.datasketches.pig.cpc;
-import static com.yahoo.sketches.Util.DEFAULT_UPDATE_SEED;
+import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
import java.io.IOException;
+import org.apache.datasketches.cpc.CpcSketch;
import org.apache.log4j.Logger;
import org.apache.pig.Accumulator;
import org.apache.pig.Algebraic;
@@ -19,8 +34,6 @@
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
-import com.yahoo.sketches.cpc.CpcSketch;
-
/**
* This is a Pig UDF that builds sketches from data.
* This class implements both the <i>Accumulator</i> and <i>Algebraic</i> interfaces.
diff --git a/src/main/java/org/apache/datasketches/pig/cpc/DataToSketchAlgebraicFinal.java b/src/main/java/org/apache/datasketches/pig/cpc/DataToSketchAlgebraicFinal.java
new file mode 100644
index 0000000..c97d1d1
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/cpc/DataToSketchAlgebraicFinal.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.cpc;
+
+import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
+
+import org.apache.datasketches.cpc.CpcSketch;
+
+@SuppressWarnings("javadoc")
+public class DataToSketchAlgebraicFinal extends AlgebraicFinal {
+
+ /**
+ * Default constructor for the final pass of an Algebraic function.
+ * Assumes default lgK and seed.
+ */
+ public DataToSketchAlgebraicFinal() {
+ super(CpcSketch.DEFAULT_LG_K, DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Constructor for the final pass of an Algebraic function. Pig will call
+ * this and pass the same constructor arguments as the base UDF.
+ * Assumes default seed.
+ *
+ * @param lgK in a form of a String
+ */
+ public DataToSketchAlgebraicFinal(final String lgK) {
+ super(Integer.parseInt(lgK), DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Constructor for the final pass of an Algebraic function. Pig will call
+ * this and pass the same constructor arguments as the base UDF.
+ *
+ * @param lgK parameter controlling the sketch size and accuracy
+ * @param seed for the hash function
+ */
+ public DataToSketchAlgebraicFinal(final String lgK, final String seed) {
+ super(Integer.parseInt(lgK), Long.parseLong(seed));
+ }
+
+ @Override
+ boolean isInputRaw() {
+ return true;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/pig/cpc/DataToSketchAlgebraicIntermediate.java b/src/main/java/org/apache/datasketches/pig/cpc/DataToSketchAlgebraicIntermediate.java
new file mode 100644
index 0000000..14e48a9
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/cpc/DataToSketchAlgebraicIntermediate.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.cpc;
+
+import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
+
+import org.apache.datasketches.cpc.CpcSketch;
+
+@SuppressWarnings("javadoc")
+public class DataToSketchAlgebraicIntermediate extends AlgebraicIntermediate {
+
+ /**
+ * Default constructor for the intermediate pass of an Algebraic function.
+ * Assumes default lgK and seed.
+ */
+ public DataToSketchAlgebraicIntermediate() {
+ super(CpcSketch.DEFAULT_LG_K, DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Constructor for the intermediate pass of an Algebraic function. Pig will call
+ * this and pass the same constructor arguments as the base UDF.
+ * Assumes default seed.
+ *
+ * @param lgK in a form of a String
+ */
+ public DataToSketchAlgebraicIntermediate(final String lgK) {
+ super(Integer.parseInt(lgK), DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Constructor for the intermediate pass of an Algebraic function. Pig will call
+ * this and pass the same constructor arguments as the base UDF.
+ *
+ * @param lgK parameter controlling the sketch size and accuracy
+ * @param seed for the hash function
+ */
+ public DataToSketchAlgebraicIntermediate(final String lgK, final String seed) {
+ super(Integer.parseInt(lgK), Long.parseLong(seed));
+ }
+
+ @Override
+ boolean isInputRaw() {
+ return true;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/pig/cpc/GetEstimate.java b/src/main/java/org/apache/datasketches/pig/cpc/GetEstimate.java
new file mode 100644
index 0000000..485e699
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/cpc/GetEstimate.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.cpc;
+
+import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
+
+import java.io.IOException;
+
+import org.apache.datasketches.cpc.CpcSketch;
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+
+/**
+ * This is a User Defined Function (UDF) for getting a distinct count estimate from a given CpcdSketch
+ *
+ * @author Alexander Saydakov
+ */
+public class GetEstimate extends EvalFunc<Double> {
+
+ private final long seed_;
+
+ /**
+ * Constructor with default seed
+ */
+ public GetEstimate() {
+ this(DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Constructor with given seed
+ * @param seed in a form of a String
+ */
+ public GetEstimate(final String seed) {
+ this(Long.parseLong(seed));
+ }
+
+ /**
+ * Base constructor
+ * @param seed parameter for the hash function
+ */
+ GetEstimate(final long seed) {
+ seed_ = seed;
+ }
+
+ @Override
+ public Double exec(final Tuple sketchTuple) throws IOException {
+ if ((sketchTuple == null) || (sketchTuple.size() == 0)) {
+ return null;
+ }
+ final DataByteArray dba = (DataByteArray) sketchTuple.get(0);
+ final CpcSketch sketch = CpcSketch.heapify(dba.get(), seed_);
+ return sketch.getEstimate();
+ }
+
+}
diff --git a/src/main/java/com/yahoo/sketches/pig/cpc/GetEstimateAndErrorBounds.java b/src/main/java/org/apache/datasketches/pig/cpc/GetEstimateAndErrorBounds.java
similarity index 76%
rename from src/main/java/com/yahoo/sketches/pig/cpc/GetEstimateAndErrorBounds.java
rename to src/main/java/org/apache/datasketches/pig/cpc/GetEstimateAndErrorBounds.java
index be87f1b..a9d68f8 100644
--- a/src/main/java/com/yahoo/sketches/pig/cpc/GetEstimateAndErrorBounds.java
+++ b/src/main/java/org/apache/datasketches/pig/cpc/GetEstimateAndErrorBounds.java
@@ -1,14 +1,29 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.cpc;
+package org.apache.datasketches.pig.cpc;
-import static com.yahoo.sketches.Util.DEFAULT_UPDATE_SEED;
+import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
import java.io.IOException;
+import org.apache.datasketches.cpc.CpcSketch;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DataType;
@@ -17,8 +32,6 @@
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
-import com.yahoo.sketches.cpc.CpcSketch;
-
/**
* This is a User Defined Function (UDF) for obtaining the distinct count estimate
* along with a lower and upper bound from a given CpcSketch.
diff --git a/src/main/java/com/yahoo/sketches/pig/cpc/SketchToString.java b/src/main/java/org/apache/datasketches/pig/cpc/SketchToString.java
similarity index 65%
rename from src/main/java/com/yahoo/sketches/pig/cpc/SketchToString.java
rename to src/main/java/org/apache/datasketches/pig/cpc/SketchToString.java
index b82aa24..e123820 100644
--- a/src/main/java/com/yahoo/sketches/pig/cpc/SketchToString.java
+++ b/src/main/java/org/apache/datasketches/pig/cpc/SketchToString.java
@@ -1,20 +1,33 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.cpc;
+package org.apache.datasketches.pig.cpc;
-import static com.yahoo.sketches.Util.DEFAULT_UPDATE_SEED;
+import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
import java.io.IOException;
+import org.apache.datasketches.cpc.CpcSketch;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
-import com.yahoo.sketches.cpc.CpcSketch;
-
/**
* This is a User Defined Function (UDF) for printing a human-readable summary of a given CpcSketch
* @author Alexander Saydakov
diff --git a/src/main/java/com/yahoo/sketches/pig/cpc/UnionSketch.java b/src/main/java/org/apache/datasketches/pig/cpc/UnionSketch.java
similarity index 85%
rename from src/main/java/com/yahoo/sketches/pig/cpc/UnionSketch.java
rename to src/main/java/org/apache/datasketches/pig/cpc/UnionSketch.java
index c4a9c9d..f311928 100644
--- a/src/main/java/com/yahoo/sketches/pig/cpc/UnionSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/cpc/UnionSketch.java
@@ -1,14 +1,30 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.cpc;
+package org.apache.datasketches.pig.cpc;
-import static com.yahoo.sketches.Util.DEFAULT_UPDATE_SEED;
+import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
import java.io.IOException;
+import org.apache.datasketches.cpc.CpcSketch;
+import org.apache.datasketches.cpc.CpcUnion;
import org.apache.log4j.Logger;
import org.apache.pig.Accumulator;
import org.apache.pig.Algebraic;
@@ -19,9 +35,6 @@
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
-import com.yahoo.sketches.cpc.CpcSketch;
-import com.yahoo.sketches.cpc.CpcUnion;
-
/**
* This is a Pig UDF that performs the Union operation on CpcSketches.
* This class implements both the <i>Accumulator</i> and <i>Algebraic</i> interfaces.
diff --git a/src/main/java/org/apache/datasketches/pig/cpc/UnionSketchAlgebraicFinal.java b/src/main/java/org/apache/datasketches/pig/cpc/UnionSketchAlgebraicFinal.java
new file mode 100644
index 0000000..04e741f
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/cpc/UnionSketchAlgebraicFinal.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.cpc;
+
+import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
+
+import org.apache.datasketches.cpc.CpcSketch;
+
+@SuppressWarnings("javadoc")
+public class UnionSketchAlgebraicFinal extends AlgebraicFinal {
+
+ /**
+ * Default constructor for the final pass of an Algebraic function.
+ * Assumes default lgK and seed.
+ */
+ public UnionSketchAlgebraicFinal() {
+ super(CpcSketch.DEFAULT_LG_K, DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Constructor for the final pass of an Algebraic function. Pig will call
+ * this and pass the same constructor arguments as the base UDF.
+ * Assumes default seed.
+ *
+ * @param lgK in a form of a String
+ */
+ public UnionSketchAlgebraicFinal(final String lgK) {
+ super(Integer.parseInt(lgK), DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Constructor for the final pass of an Algebraic function. Pig will call
+ * this and pass the same constructor arguments as the base UDF.
+ *
+ * @param lgK parameter controlling the sketch size and accuracy
+ * @param seed for the hash function
+ */
+ public UnionSketchAlgebraicFinal(final String lgK, final String seed) {
+ super(Integer.parseInt(lgK), Long.parseLong(seed));
+ }
+
+ @Override
+ boolean isInputRaw() {
+ return false;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/pig/cpc/UnionSketchAlgebraicIntermediate.java b/src/main/java/org/apache/datasketches/pig/cpc/UnionSketchAlgebraicIntermediate.java
new file mode 100644
index 0000000..2f6b78d
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/cpc/UnionSketchAlgebraicIntermediate.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.cpc;
+
+import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
+
+import org.apache.datasketches.cpc.CpcSketch;
+
+@SuppressWarnings("javadoc")
+public class UnionSketchAlgebraicIntermediate extends AlgebraicIntermediate {
+
+ /**
+ * Default constructor of the intermediate pass of an Algebraic function.
+ * Assumes default lgK and seed.
+ */
+ public UnionSketchAlgebraicIntermediate() {
+ super(CpcSketch.DEFAULT_LG_K, DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Constructor for the intermediate pass of an Algebraic function. Pig will call
+ * this and pass the same constructor arguments as the base UDF.
+ * Assumes default seed.
+ *
+ * @param lgK in a form of a String
+ */
+ public UnionSketchAlgebraicIntermediate(final String lgK) {
+ super(Integer.parseInt(lgK), DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Constructor for the intermediate pass of an Algebraic function. Pig will call
+ * this and pass the same constructor arguments as the base UDF.
+ *
+ * @param lgK parameter controlling the sketch size and accuracy
+ * @param seed for the hash function
+ */
+ public UnionSketchAlgebraicIntermediate(final String lgK, final String seed) {
+ super(Integer.parseInt(lgK), Long.parseLong(seed));
+ }
+
+ @Override
+ boolean isInputRaw() {
+ return false;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/pig/cpc/package-info.java b/src/main/java/org/apache/datasketches/pig/cpc/package-info.java
new file mode 100644
index 0000000..5dde74c
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/cpc/package-info.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Pig UDFs for CPC sketches.
+ * This is a distinct-counting sketch that implements the
+ * <i>Compressed Probabilistic Counting (CPC, a.k.a FM85)</i> algorithms developed by Kevin Lang in
+ * his paper
+ * <a href="https://arxiv.org/abs/1708.06839">Back to the Future: an Even More Nearly
+ * Optimal Cardinality Estimation Algorithm</a>.
+ *
+ * @author Alexander Saydakov
+ */
+package org.apache.datasketches.pig.cpc;
diff --git a/src/main/java/org/apache/datasketches/pig/frequencies/AlgebraicInitial.java b/src/main/java/org/apache/datasketches/pig/frequencies/AlgebraicInitial.java
new file mode 100644
index 0000000..acb19a8
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/frequencies/AlgebraicInitial.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.frequencies;
+
+import java.io.IOException;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.Tuple;
+
+/**
+ * This is a common pass-through implementation for initial step of an Algebraic operation
+ */
+public abstract class AlgebraicInitial extends EvalFunc<Tuple> {
+ @Override
+ public Tuple exec(final Tuple inputTuple) throws IOException {
+ final DataBag bag = (DataBag) inputTuple.get(0);
+ if (bag == null) {
+ throw new IllegalArgumentException("InputTuple.Field0: Bag may not be null");
+ }
+ return inputTuple;
+ }
+}
diff --git a/src/main/java/com/yahoo/sketches/pig/frequencies/DataToFrequentItemsSketch.java b/src/main/java/org/apache/datasketches/pig/frequencies/DataToFrequentItemsSketch.java
similarity index 76%
rename from src/main/java/com/yahoo/sketches/pig/frequencies/DataToFrequentItemsSketch.java
rename to src/main/java/org/apache/datasketches/pig/frequencies/DataToFrequentItemsSketch.java
index 15f14cd..fe1345d 100644
--- a/src/main/java/com/yahoo/sketches/pig/frequencies/DataToFrequentItemsSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/frequencies/DataToFrequentItemsSketch.java
@@ -1,12 +1,28 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.frequencies;
+package org.apache.datasketches.pig.frequencies;
import java.io.IOException;
+import org.apache.datasketches.ArrayOfItemsSerDe;
+import org.apache.datasketches.frequencies.ItemsSketch;
import org.apache.log4j.Logger;
import org.apache.pig.Accumulator;
import org.apache.pig.EvalFunc;
@@ -14,9 +30,6 @@
import org.apache.pig.data.DataBag;
import org.apache.pig.data.Tuple;
-import com.yahoo.sketches.ArrayOfItemsSerDe;
-import com.yahoo.sketches.frequencies.ItemsSketch;
-
/**
* This is a generic implementation to be specialized in concrete UDFs
* @param <T> type of item
diff --git a/src/main/java/com/yahoo/sketches/pig/frequencies/DataToFrequentItemsSketchAlgebraicIntermediateFinal.java b/src/main/java/org/apache/datasketches/pig/frequencies/DataToFrequentItemsSketchAlgebraicIntermediateFinal.java
similarity index 69%
rename from src/main/java/com/yahoo/sketches/pig/frequencies/DataToFrequentItemsSketchAlgebraicIntermediateFinal.java
rename to src/main/java/org/apache/datasketches/pig/frequencies/DataToFrequentItemsSketchAlgebraicIntermediateFinal.java
index c7d3de0..4533c88 100644
--- a/src/main/java/com/yahoo/sketches/pig/frequencies/DataToFrequentItemsSketchAlgebraicIntermediateFinal.java
+++ b/src/main/java/org/apache/datasketches/pig/frequencies/DataToFrequentItemsSketchAlgebraicIntermediateFinal.java
@@ -1,21 +1,34 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.frequencies;
+package org.apache.datasketches.pig.frequencies;
import java.io.IOException;
+import org.apache.datasketches.ArrayOfItemsSerDe;
+import org.apache.datasketches.frequencies.ItemsSketch;
import org.apache.log4j.Logger;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
-import com.yahoo.sketches.ArrayOfItemsSerDe;
-import com.yahoo.sketches.frequencies.ItemsSketch;
-
/**
* Class used to calculate the intermediate pass (combiner) or the final pass
* (reducer) of an Algebraic sketch operation. This may be called multiple times
@@ -35,6 +48,7 @@
*/
public DataToFrequentItemsSketchAlgebraicIntermediateFinal() {}
+ @SuppressWarnings("javadoc")
public DataToFrequentItemsSketchAlgebraicIntermediateFinal(
final int sketchSize, final ArrayOfItemsSerDe<T> serDe) {
sketchSize_ = sketchSize;
@@ -48,7 +62,7 @@
Logger.getLogger(getClass()).info("algebraic was used");
isFirstCall_ = false;
}
- final ItemsSketch<T> sketch = new ItemsSketch<T>(sketchSize_);
+ final ItemsSketch<T> sketch = new ItemsSketch<>(sketchSize_);
final DataBag bag = (DataBag) inputTuple.get(0);
for (Tuple dataTuple: bag) {
diff --git a/src/main/java/com/yahoo/sketches/pig/frequencies/DataToFrequentStringsSketch.java b/src/main/java/org/apache/datasketches/pig/frequencies/DataToFrequentStringsSketch.java
similarity index 65%
rename from src/main/java/com/yahoo/sketches/pig/frequencies/DataToFrequentStringsSketch.java
rename to src/main/java/org/apache/datasketches/pig/frequencies/DataToFrequentStringsSketch.java
index 2a2d050..e7d5763 100644
--- a/src/main/java/com/yahoo/sketches/pig/frequencies/DataToFrequentStringsSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/frequencies/DataToFrequentStringsSketch.java
@@ -1,14 +1,27 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.frequencies;
+package org.apache.datasketches.pig.frequencies;
+import org.apache.datasketches.ArrayOfStringsSerDe;
import org.apache.pig.Algebraic;
-import com.yahoo.sketches.ArrayOfStringsSerDe;
-
/**
* This UDF creates a FrequentItemsSketch<String> from raw data.
* It supports all three ways: exec(), Accumulator and Algebraic.
@@ -38,6 +51,7 @@
return IntermediateFinal.class.getName();
}
+ @SuppressWarnings("javadoc")
public static class Initial extends AlgebraicInitial {
/**
* Constructor for the initial pass of an Algebraic function. This will be passed the same
@@ -52,6 +66,7 @@
public Initial() {}
}
+ @SuppressWarnings("javadoc")
public static class IntermediateFinal extends DataToFrequentItemsSketchAlgebraicIntermediateFinal<String> {
/**
* Constructor for the intermediate and final passes of an Algebraic function. This will be
diff --git a/src/main/java/com/yahoo/sketches/pig/frequencies/FrequentStringsSketchToEstimates.java b/src/main/java/org/apache/datasketches/pig/frequencies/FrequentStringsSketchToEstimates.java
similarity index 71%
rename from src/main/java/com/yahoo/sketches/pig/frequencies/FrequentStringsSketchToEstimates.java
rename to src/main/java/org/apache/datasketches/pig/frequencies/FrequentStringsSketchToEstimates.java
index 79ca919..96f1d5a 100644
--- a/src/main/java/com/yahoo/sketches/pig/frequencies/FrequentStringsSketchToEstimates.java
+++ b/src/main/java/org/apache/datasketches/pig/frequencies/FrequentStringsSketchToEstimates.java
@@ -1,12 +1,30 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.frequencies;
+package org.apache.datasketches.pig.frequencies;
import java.io.IOException;
+import org.apache.datasketches.ArrayOfStringsSerDe;
+import org.apache.datasketches.frequencies.ErrorType;
+import org.apache.datasketches.frequencies.ItemsSketch;
+import org.apache.datasketches.memory.Memory;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
@@ -17,11 +35,6 @@
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.ArrayOfStringsSerDe;
-import com.yahoo.sketches.frequencies.ErrorType;
-import com.yahoo.sketches.frequencies.ItemsSketch;
-
/**
* This UDF converts a FrequentItemsSketch<String> to estimates:
* {(item, estimate, upper bound, lower bound), ...}
diff --git a/src/main/java/com/yahoo/sketches/pig/frequencies/UnionFrequentItemsSketch.java b/src/main/java/org/apache/datasketches/pig/frequencies/UnionFrequentItemsSketch.java
similarity index 75%
rename from src/main/java/com/yahoo/sketches/pig/frequencies/UnionFrequentItemsSketch.java
rename to src/main/java/org/apache/datasketches/pig/frequencies/UnionFrequentItemsSketch.java
index ccd8a52..9707f25 100644
--- a/src/main/java/com/yahoo/sketches/pig/frequencies/UnionFrequentItemsSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/frequencies/UnionFrequentItemsSketch.java
@@ -1,12 +1,28 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.frequencies;
+package org.apache.datasketches.pig.frequencies;
import java.io.IOException;
+import org.apache.datasketches.ArrayOfItemsSerDe;
+import org.apache.datasketches.frequencies.ItemsSketch;
import org.apache.log4j.Logger;
import org.apache.pig.Accumulator;
import org.apache.pig.EvalFunc;
@@ -14,9 +30,6 @@
import org.apache.pig.data.DataBag;
import org.apache.pig.data.Tuple;
-import com.yahoo.sketches.ArrayOfItemsSerDe;
-import com.yahoo.sketches.frequencies.ItemsSketch;
-
/**
* This is a generic implementation to be specialized in concrete UDFs
* @param <T> Type of item
diff --git a/src/main/java/com/yahoo/sketches/pig/frequencies/UnionFrequentItemsSketchAlgebraicIntermediateFinal.java b/src/main/java/org/apache/datasketches/pig/frequencies/UnionFrequentItemsSketchAlgebraicIntermediateFinal.java
similarity index 71%
rename from src/main/java/com/yahoo/sketches/pig/frequencies/UnionFrequentItemsSketchAlgebraicIntermediateFinal.java
rename to src/main/java/org/apache/datasketches/pig/frequencies/UnionFrequentItemsSketchAlgebraicIntermediateFinal.java
index bfe234e..8744354 100644
--- a/src/main/java/com/yahoo/sketches/pig/frequencies/UnionFrequentItemsSketchAlgebraicIntermediateFinal.java
+++ b/src/main/java/org/apache/datasketches/pig/frequencies/UnionFrequentItemsSketchAlgebraicIntermediateFinal.java
@@ -1,21 +1,34 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.frequencies;
+package org.apache.datasketches.pig.frequencies;
import java.io.IOException;
+import org.apache.datasketches.ArrayOfItemsSerDe;
+import org.apache.datasketches.frequencies.ItemsSketch;
import org.apache.log4j.Logger;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
-import com.yahoo.sketches.ArrayOfItemsSerDe;
-import com.yahoo.sketches.frequencies.ItemsSketch;
-
/**
* This is to calculate the intermediate pass (combiner) or the final pass
* (reducer) of an Algebraic sketch operation. This may be called multiple times
@@ -35,6 +48,7 @@
*/
public UnionFrequentItemsSketchAlgebraicIntermediateFinal() {}
+ @SuppressWarnings("javadoc")
public UnionFrequentItemsSketchAlgebraicIntermediateFinal(
final int sketchSize, final ArrayOfItemsSerDe<T> serDe) {
sketchSize_ = sketchSize;
@@ -48,7 +62,7 @@
Logger.getLogger(getClass()).info("algebraic is used");
isFirstCall_ = false;
}
- final ItemsSketch<T> sketch = new ItemsSketch<T>(sketchSize_);
+ final ItemsSketch<T> sketch = new ItemsSketch<>(sketchSize_);
final DataBag bag = (DataBag) inputTuple.get(0);
if (bag == null) {
diff --git a/src/main/java/com/yahoo/sketches/pig/frequencies/UnionFrequentStringsSketch.java b/src/main/java/org/apache/datasketches/pig/frequencies/UnionFrequentStringsSketch.java
similarity index 65%
rename from src/main/java/com/yahoo/sketches/pig/frequencies/UnionFrequentStringsSketch.java
rename to src/main/java/org/apache/datasketches/pig/frequencies/UnionFrequentStringsSketch.java
index 5b68ad5..fa6b6bc 100644
--- a/src/main/java/com/yahoo/sketches/pig/frequencies/UnionFrequentStringsSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/frequencies/UnionFrequentStringsSketch.java
@@ -1,14 +1,27 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.frequencies;
+package org.apache.datasketches.pig.frequencies;
+import org.apache.datasketches.ArrayOfStringsSerDe;
import org.apache.pig.Algebraic;
-import com.yahoo.sketches.ArrayOfStringsSerDe;
-
/**
* This is to union FrequentItemsSketch<String>.
* It supports all three ways: exec(), Accumulator and Algebraic
@@ -38,6 +51,7 @@
return IntermediateFinal.class.getName();
}
+ @SuppressWarnings("javadoc")
public static class Initial extends AlgebraicInitial {
/**
* Default constructor to make pig validation happy.
@@ -52,6 +66,7 @@
public Initial(final String sketchSize) {}
}
+ @SuppressWarnings("javadoc")
public static class IntermediateFinal extends UnionFrequentItemsSketchAlgebraicIntermediateFinal<String> {
/**
* Default constructor to make pig validation happy.
diff --git a/src/main/java/org/apache/datasketches/pig/frequencies/Util.java b/src/main/java/org/apache/datasketches/pig/frequencies/Util.java
new file mode 100644
index 0000000..4fec7fe
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/frequencies/Util.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.frequencies;
+
+import org.apache.datasketches.ArrayOfItemsSerDe;
+import org.apache.datasketches.frequencies.ItemsSketch;
+import org.apache.datasketches.memory.Memory;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+
+final class Util {
+
+ static final TupleFactory tupleFactory = TupleFactory.getInstance();
+
+ static <T> Tuple serializeSketchToTuple(
+ final ItemsSketch<T> sketch, final ArrayOfItemsSerDe<T> serDe) throws ExecException {
+ final Tuple outputTuple = Util.tupleFactory.newTuple(1);
+ outputTuple.set(0, new DataByteArray(sketch.toByteArray(serDe)));
+ return outputTuple;
+ }
+
+ static <T> ItemsSketch<T> deserializeSketchFromTuple(
+ final Tuple tuple, final ArrayOfItemsSerDe<T> serDe) throws ExecException {
+ final byte[] bytes = ((DataByteArray) tuple.get(0)).get();
+ return ItemsSketch.getInstance(Memory.wrap(bytes), serDe);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/pig/frequencies/package-info.java b/src/main/java/org/apache/datasketches/pig/frequencies/package-info.java
new file mode 100644
index 0000000..0426a35
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/frequencies/package-info.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Pig UDFs for Frequent Items sketch.
+ * This includes generic implementation in the form of abstract classes DataToFrequentItemsSketch
+ * and UnionFrequentItemsSketch to be specialized for particular types of items.
+ * An implementation for strings is provided: DataToFrequentStringsSketch and UnionFrequentStringsSketch.
+ * FrequentStringsSketchToEstimates is to obtain results from sketches.
+ *
+ * @author Alexander Saydakov
+ */
+package org.apache.datasketches.pig.frequencies;
diff --git a/src/main/java/com/yahoo/sketches/pig/hash/MurmurHash3.java b/src/main/java/org/apache/datasketches/pig/hash/MurmurHash3.java
similarity index 84%
rename from src/main/java/com/yahoo/sketches/pig/hash/MurmurHash3.java
rename to src/main/java/org/apache/datasketches/pig/hash/MurmurHash3.java
index 87bf52f..5de7173 100644
--- a/src/main/java/com/yahoo/sketches/pig/hash/MurmurHash3.java
+++ b/src/main/java/org/apache/datasketches/pig/hash/MurmurHash3.java
@@ -1,15 +1,29 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.hash;
+package org.apache.datasketches.pig.hash;
-import static com.yahoo.sketches.hash.MurmurHash3.hash;
-import static com.yahoo.sketches.hash.MurmurHash3Adaptor.hashToLongs;
-import static com.yahoo.sketches.hash.MurmurHash3Adaptor.modulo;
import static java.lang.Math.min;
import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.datasketches.hash.MurmurHash3.hash;
+import static org.apache.datasketches.hash.MurmurHash3Adaptor.hashToLongs;
+import static org.apache.datasketches.hash.MurmurHash3Adaptor.modulo;
import java.io.IOException;
@@ -27,8 +41,6 @@
* parameter can be a positive Integer modulus divisor. If the divisor is provided, the Integer
* modulus remainder is computed on the entire 128-bit hash output treated as if it were a 128-bit
* positive value.
- *
- * @author Lee Rhodes
*/
public class MurmurHash3 extends EvalFunc<Tuple> {
private static final TupleFactory mTupleFactory = TupleFactory.getInstance();
diff --git a/src/main/java/org/apache/datasketches/pig/hash/package-info.java b/src/main/java/org/apache/datasketches/pig/hash/package-info.java
new file mode 100644
index 0000000..0a8bbd2
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/hash/package-info.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * <p>The hash package contains a high-performing and extended Java implementation
+ * of Austin Appleby's 128-bit MurmurHash3 hash function originally coded in C.
+ * This core MurmurHash3.java class is used throughout all the sketch classes for consistentancy
+ * and as long as the user specifies the same seed will result in coordinated hash operations.
+ * This package also contains an adaptor class that extends the basic class with more functions
+ * commonly associated with hashing.
+ * </p>
+ */
+package org.apache.datasketches.pig.hash;
diff --git a/src/main/java/com/yahoo/sketches/pig/hll/AlgebraicFinal.java b/src/main/java/org/apache/datasketches/pig/hll/AlgebraicFinal.java
similarity index 76%
rename from src/main/java/com/yahoo/sketches/pig/hll/AlgebraicFinal.java
rename to src/main/java/org/apache/datasketches/pig/hll/AlgebraicFinal.java
index a89ba27..c8bcc88 100644
--- a/src/main/java/com/yahoo/sketches/pig/hll/AlgebraicFinal.java
+++ b/src/main/java/org/apache/datasketches/pig/hll/AlgebraicFinal.java
@@ -1,12 +1,30 @@
/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.hll;
+package org.apache.datasketches.pig.hll;
import java.io.IOException;
+import org.apache.datasketches.hll.HllSketch;
+import org.apache.datasketches.hll.TgtHllType;
+import org.apache.datasketches.hll.Union;
+import org.apache.datasketches.memory.Memory;
import org.apache.log4j.Logger;
import org.apache.pig.EvalFunc;
import org.apache.pig.backend.executionengine.ExecException;
@@ -14,11 +32,6 @@
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.hll.HllSketch;
-import com.yahoo.sketches.hll.TgtHllType;
-import com.yahoo.sketches.hll.Union;
-
/**
* Class used to calculate the final pass of an <i>Algebraic</i> sketch
* operation. It will receive a bag of values returned by either the <i>Intermediate</i>
diff --git a/src/main/java/com/yahoo/sketches/pig/hll/AlgebraicInitial.java b/src/main/java/org/apache/datasketches/pig/hll/AlgebraicInitial.java
similarity index 64%
rename from src/main/java/com/yahoo/sketches/pig/hll/AlgebraicInitial.java
rename to src/main/java/org/apache/datasketches/pig/hll/AlgebraicInitial.java
index 9cf33f6..c4f91c1 100644
--- a/src/main/java/com/yahoo/sketches/pig/hll/AlgebraicInitial.java
+++ b/src/main/java/org/apache/datasketches/pig/hll/AlgebraicInitial.java
@@ -1,9 +1,23 @@
/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.hll;
+package org.apache.datasketches.pig.hll;
import java.io.IOException;
diff --git a/src/main/java/com/yahoo/sketches/pig/hll/AlgebraicIntermediate.java b/src/main/java/org/apache/datasketches/pig/hll/AlgebraicIntermediate.java
similarity index 77%
rename from src/main/java/com/yahoo/sketches/pig/hll/AlgebraicIntermediate.java
rename to src/main/java/org/apache/datasketches/pig/hll/AlgebraicIntermediate.java
index 045f3ce..db10f62 100644
--- a/src/main/java/com/yahoo/sketches/pig/hll/AlgebraicIntermediate.java
+++ b/src/main/java/org/apache/datasketches/pig/hll/AlgebraicIntermediate.java
@@ -1,12 +1,30 @@
/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.hll;
+package org.apache.datasketches.pig.hll;
import java.io.IOException;
+import org.apache.datasketches.hll.HllSketch;
+import org.apache.datasketches.hll.TgtHllType;
+import org.apache.datasketches.hll.Union;
+import org.apache.datasketches.memory.Memory;
import org.apache.log4j.Logger;
import org.apache.pig.EvalFunc;
import org.apache.pig.backend.executionengine.ExecException;
@@ -15,11 +33,6 @@
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.hll.HllSketch;
-import com.yahoo.sketches.hll.TgtHllType;
-import com.yahoo.sketches.hll.Union;
-
/**
* Class used to calculate the intermediate combiner pass of an <i>Algebraic</i> sketch
* operation. This is called from the combiner, and may be called multiple times (from a mapper
diff --git a/src/main/java/com/yahoo/sketches/pig/hll/DataToSketch.java b/src/main/java/org/apache/datasketches/pig/hll/DataToSketch.java
similarity index 87%
rename from src/main/java/com/yahoo/sketches/pig/hll/DataToSketch.java
rename to src/main/java/org/apache/datasketches/pig/hll/DataToSketch.java
index 8944c51..6e38253 100644
--- a/src/main/java/com/yahoo/sketches/pig/hll/DataToSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/hll/DataToSketch.java
@@ -1,12 +1,29 @@
/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.hll;
+package org.apache.datasketches.pig.hll;
import java.io.IOException;
+import org.apache.datasketches.hll.HllSketch;
+import org.apache.datasketches.hll.TgtHllType;
+import org.apache.datasketches.hll.Union;
import org.apache.log4j.Logger;
import org.apache.pig.Accumulator;
import org.apache.pig.Algebraic;
@@ -17,10 +34,6 @@
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
-import com.yahoo.sketches.hll.HllSketch;
-import com.yahoo.sketches.hll.TgtHllType;
-import com.yahoo.sketches.hll.Union;
-
/**
* This is a Pig UDF that builds Sketches from data.
* This class implements both the <i>Accumulator</i> and <i>Algebraic</i> interfaces.
diff --git a/src/main/java/org/apache/datasketches/pig/hll/DataToSketchAlgebraicFinal.java b/src/main/java/org/apache/datasketches/pig/hll/DataToSketchAlgebraicFinal.java
new file mode 100644
index 0000000..69a9e58
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/hll/DataToSketchAlgebraicFinal.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.hll;
+
+import static org.apache.datasketches.pig.hll.DataToSketch.DEFAULT_HLL_TYPE;
+import static org.apache.datasketches.pig.hll.DataToSketch.DEFAULT_LG_K;
+
+import org.apache.datasketches.hll.TgtHllType;
+import org.apache.datasketches.hll.Union;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataBag;
+
+@SuppressWarnings("javadoc")
+public class DataToSketchAlgebraicFinal extends AlgebraicFinal {
+
+ /**
+ * Default constructor for the final pass of an Algebraic function.
+ * Assumes default lgK and target HLL type.
+ */
+ public DataToSketchAlgebraicFinal() {
+ super(DEFAULT_LG_K, DEFAULT_HLL_TYPE);
+ }
+
+ /**
+ * Constructor for the final pass of an Algebraic function. Pig will call
+ * this and pass the same constructor arguments as the base UDF.
+ * Assumes default HLL target type.
+ *
+ * @param lgK in a form of a String
+ */
+ public DataToSketchAlgebraicFinal(final String lgK) {
+ super(Integer.parseInt(lgK), DEFAULT_HLL_TYPE);
+ }
+
+ /**
+ * Constructor for the final pass of an Algebraic function. Pig will call
+ * this and pass the same constructor arguments as the base UDF.
+ *
+ * @param lgK parameter controlling the sketch size and accuracy
+ * @param tgtHllType HLL type of the resulting sketch
+ */
+ public DataToSketchAlgebraicFinal(final String lgK, final String tgtHllType) {
+ super(Integer.parseInt(lgK), TgtHllType.valueOf(tgtHllType));
+ }
+
+ @Override
+ void updateUnion(final DataBag bag, final Union union) throws ExecException {
+ DataToSketch.updateUnion(bag, union);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/pig/hll/DataToSketchAlgebraicIntermediate.java b/src/main/java/org/apache/datasketches/pig/hll/DataToSketchAlgebraicIntermediate.java
new file mode 100644
index 0000000..bb184dc
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/hll/DataToSketchAlgebraicIntermediate.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.hll;
+
+import static org.apache.datasketches.pig.hll.DataToSketch.DEFAULT_HLL_TYPE;
+import static org.apache.datasketches.pig.hll.DataToSketch.DEFAULT_LG_K;
+
+import org.apache.datasketches.hll.TgtHllType;
+import org.apache.datasketches.hll.Union;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataBag;
+
+@SuppressWarnings("javadoc")
+public class DataToSketchAlgebraicIntermediate extends AlgebraicIntermediate {
+
+ /**
+ * Default constructor for the intermediate pass of an Algebraic function.
+ * Assumes default lgK and target HLL type.
+ */
+ public DataToSketchAlgebraicIntermediate() {
+ super(DEFAULT_LG_K, DEFAULT_HLL_TYPE);
+ }
+
+ /**
+ * Constructor for the intermediate pass of an Algebraic function. Pig will call
+ * this and pass the same constructor arguments as the base UDF.
+ * Assumes default HLL target type.
+ *
+ * @param lgK in a form of a String
+ */
+ public DataToSketchAlgebraicIntermediate(final String lgK) {
+ super(Integer.parseInt(lgK), DEFAULT_HLL_TYPE);
+ }
+
+ /**
+ * Constructor for the intermediate pass of an Algebraic function. Pig will call
+ * this and pass the same constructor arguments as the base UDF.
+ *
+ * @param lgK parameter controlling the sketch size and accuracy
+ * @param tgtHllType HLL type of the resulting sketch
+ */
+ public DataToSketchAlgebraicIntermediate(final String lgK, final String tgtHllType) {
+ super(Integer.parseInt(lgK), TgtHllType.valueOf(tgtHllType));
+ }
+
+ @Override
+ void updateUnion(final DataBag bag, final Union union) throws ExecException {
+ DataToSketch.updateUnion(bag, union);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/pig/hll/SketchToEstimate.java b/src/main/java/org/apache/datasketches/pig/hll/SketchToEstimate.java
new file mode 100644
index 0000000..81a1f89
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/hll/SketchToEstimate.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.hll;
+
+import java.io.IOException;
+
+import org.apache.datasketches.hll.HllSketch;
+import org.apache.datasketches.memory.Memory;
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+
+/**
+ * This is a User Defined Function (UDF) for getting a unique count estimate from an HllSketch
+ *
+ * @author Alexander Saydakov
+ */
+public class SketchToEstimate extends EvalFunc<Double> {
+
+ @Override
+ public Double exec(final Tuple sketchTuple) throws IOException {
+ if ((sketchTuple == null) || (sketchTuple.size() == 0)) {
+ return null;
+ }
+ final DataByteArray dba = (DataByteArray) sketchTuple.get(0);
+ final HllSketch sketch = HllSketch.wrap(Memory.wrap(dba.get()));
+ return sketch.getEstimate();
+ }
+
+}
diff --git a/src/main/java/com/yahoo/sketches/pig/hll/SketchToEstimateAndErrorBounds.java b/src/main/java/org/apache/datasketches/pig/hll/SketchToEstimateAndErrorBounds.java
similarity index 68%
rename from src/main/java/com/yahoo/sketches/pig/hll/SketchToEstimateAndErrorBounds.java
rename to src/main/java/org/apache/datasketches/pig/hll/SketchToEstimateAndErrorBounds.java
index 94374aa..e5e3907 100644
--- a/src/main/java/com/yahoo/sketches/pig/hll/SketchToEstimateAndErrorBounds.java
+++ b/src/main/java/org/apache/datasketches/pig/hll/SketchToEstimateAndErrorBounds.java
@@ -1,12 +1,28 @@
/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.hll;
+package org.apache.datasketches.pig.hll;
import java.io.IOException;
+import org.apache.datasketches.hll.HllSketch;
+import org.apache.datasketches.memory.Memory;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DataType;
@@ -15,9 +31,6 @@
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.hll.HllSketch;
-
/**
* This is a User Defined Function (UDF) for obtaining the unique count estimate
* along with a lower and upper bound from an HllSketch.
diff --git a/src/main/java/com/yahoo/sketches/pig/hll/SketchToString.java b/src/main/java/org/apache/datasketches/pig/hll/SketchToString.java
similarity index 62%
rename from src/main/java/com/yahoo/sketches/pig/hll/SketchToString.java
rename to src/main/java/org/apache/datasketches/pig/hll/SketchToString.java
index c0bdbcd..2dc0bb4 100644
--- a/src/main/java/com/yahoo/sketches/pig/hll/SketchToString.java
+++ b/src/main/java/org/apache/datasketches/pig/hll/SketchToString.java
@@ -1,19 +1,32 @@
/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.hll;
+package org.apache.datasketches.pig.hll;
import java.io.IOException;
+import org.apache.datasketches.hll.HllSketch;
+import org.apache.datasketches.memory.Memory;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.hll.HllSketch;
-
/**
* This is a User Defined Function (UDF) for "pretty printing" the summary of an HllSketch
*
diff --git a/src/main/java/com/yahoo/sketches/pig/hll/UnionSketch.java b/src/main/java/org/apache/datasketches/pig/hll/UnionSketch.java
similarity index 83%
rename from src/main/java/com/yahoo/sketches/pig/hll/UnionSketch.java
rename to src/main/java/org/apache/datasketches/pig/hll/UnionSketch.java
index 7cae70e..a984db4 100644
--- a/src/main/java/com/yahoo/sketches/pig/hll/UnionSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/hll/UnionSketch.java
@@ -1,15 +1,33 @@
/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.hll;
+package org.apache.datasketches.pig.hll;
-import static com.yahoo.sketches.pig.hll.DataToSketch.DEFAULT_HLL_TYPE;
-import static com.yahoo.sketches.pig.hll.DataToSketch.DEFAULT_LG_K;
+import static org.apache.datasketches.pig.hll.DataToSketch.DEFAULT_HLL_TYPE;
+import static org.apache.datasketches.pig.hll.DataToSketch.DEFAULT_LG_K;
import java.io.IOException;
+import org.apache.datasketches.hll.HllSketch;
+import org.apache.datasketches.hll.TgtHllType;
+import org.apache.datasketches.hll.Union;
+import org.apache.datasketches.memory.Memory;
import org.apache.log4j.Logger;
import org.apache.pig.Accumulator;
import org.apache.pig.Algebraic;
@@ -20,11 +38,6 @@
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.hll.HllSketch;
-import com.yahoo.sketches.hll.TgtHllType;
-import com.yahoo.sketches.hll.Union;
-
/**
* This is a Pig UDF that performs the Union operation on HllSketches.
* This class implements both the <i>Accumulator</i> and <i>Algebraic</i> interfaces.
diff --git a/src/main/java/org/apache/datasketches/pig/hll/UnionSketchAlgebraicFinal.java b/src/main/java/org/apache/datasketches/pig/hll/UnionSketchAlgebraicFinal.java
new file mode 100644
index 0000000..0af607c
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/hll/UnionSketchAlgebraicFinal.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.hll;
+
+import static org.apache.datasketches.pig.hll.DataToSketch.DEFAULT_HLL_TYPE;
+import static org.apache.datasketches.pig.hll.DataToSketch.DEFAULT_LG_K;
+
+import org.apache.datasketches.hll.TgtHllType;
+import org.apache.datasketches.hll.Union;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataBag;
+
+@SuppressWarnings("javadoc")
+public class UnionSketchAlgebraicFinal extends AlgebraicFinal {
+
+ /**
+ * Default constructor for the final pass of an Algebraic function.
+ * Assumes default lgK and target HLL type.
+ */
+ public UnionSketchAlgebraicFinal() {
+ super(DEFAULT_LG_K, DEFAULT_HLL_TYPE);
+ }
+
+ /**
+ * Constructor for the final pass of an Algebraic function. Pig will call
+ * this and pass the same constructor arguments as the base UDF.
+ * Assumes default HLL target type.
+ *
+ * @param lgK in a form of a String
+ */
+ public UnionSketchAlgebraicFinal(final String lgK) {
+ super(Integer.parseInt(lgK), DEFAULT_HLL_TYPE);
+ }
+
+ /**
+ * Constructor for the final pass of an Algebraic function. Pig will call
+ * this and pass the same constructor arguments as the base UDF.
+ *
+ * @param lgK parameter controlling the sketch size and accuracy
+ * @param tgtHllType HLL type of the resulting sketch
+ */
+ public UnionSketchAlgebraicFinal(final String lgK, final String tgtHllType) {
+ super(Integer.parseInt(lgK), TgtHllType.valueOf(tgtHllType));
+ }
+
+ @Override
+ void updateUnion(final DataBag bag, final Union union) throws ExecException {
+ UnionSketch.updateUnion(bag, union);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/pig/hll/UnionSketchAlgebraicIntermediate.java b/src/main/java/org/apache/datasketches/pig/hll/UnionSketchAlgebraicIntermediate.java
new file mode 100644
index 0000000..50e4c6f
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/hll/UnionSketchAlgebraicIntermediate.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.hll;
+
+import static org.apache.datasketches.pig.hll.DataToSketch.DEFAULT_HLL_TYPE;
+import static org.apache.datasketches.pig.hll.DataToSketch.DEFAULT_LG_K;
+
+import org.apache.datasketches.hll.TgtHllType;
+import org.apache.datasketches.hll.Union;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataBag;
+
+@SuppressWarnings("javadoc")
+public class UnionSketchAlgebraicIntermediate extends AlgebraicIntermediate {
+
+ /**
+ * Default constructor of the intermediate pass of an Algebraic function.
+ * Assumes default lgK and target HLL type.
+ */
+ public UnionSketchAlgebraicIntermediate() {
+ super(DEFAULT_LG_K, DEFAULT_HLL_TYPE);
+ }
+
+ /**
+ * Constructor for the intermediate pass of an Algebraic function. Pig will call
+ * this and pass the same constructor arguments as the base UDF.
+ * Assumes default HLL target type.
+ *
+ * @param lgK in a form of a String
+ */
+ public UnionSketchAlgebraicIntermediate(final String lgK) {
+ super(Integer.parseInt(lgK), DEFAULT_HLL_TYPE);
+ }
+
+ /**
+ * Constructor for the intermediate pass of an Algebraic function. Pig will call
+ * this and pass the same constructor arguments as the base UDF.
+ *
+ * @param lgK parameter controlling the sketch size and accuracy
+ * @param tgtHllType HLL type of the resulting sketch
+ */
+ public UnionSketchAlgebraicIntermediate(final String lgK, final String tgtHllType) {
+ super(Integer.parseInt(lgK), TgtHllType.valueOf(tgtHllType));
+ }
+
+ @Override
+ void updateUnion(final DataBag bag, final Union union) throws ExecException {
+ UnionSketch.updateUnion(bag, union);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/pig/hll/package-info.java b/src/main/java/org/apache/datasketches/pig/hll/package-info.java
new file mode 100644
index 0000000..f49830d
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/hll/package-info.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Pig UDFs for HLL sketches.
+ *
+ * These UDFs can be used as a replacement of corresponding Theta sketch UDFs.
+ * Notice that intersections and A-not-B operations are not supported by the HLL sketch.
+ * Also notice a small difference in the output type of DataToSketch and UnionSketch:
+ * HLL sketch UDFs return DataByteArray (BYTEARRAY in Pig), but corresponding Theta sketch
+ * UDFs return a Tuple with single DataByteArray inside. This was a historical accident,
+ * and we are reluctant to break the compatibility with existing scripts. HLL sketch UDFs
+ * don't have to keep this compatibility. As a result, HLL sketch UDFs don't need
+ * flatten() around them to remove the Tuple, and internally they don't have to spend extra
+ * resources to wrap every output DataByteArray into a Tuple.
+ *
+ * @author Alexander Saydakov
+ */
+package org.apache.datasketches.pig.hll;
diff --git a/src/main/java/com/yahoo/sketches/pig/kll/DataToSketch.java b/src/main/java/org/apache/datasketches/pig/kll/DataToSketch.java
similarity index 91%
rename from src/main/java/com/yahoo/sketches/pig/kll/DataToSketch.java
rename to src/main/java/org/apache/datasketches/pig/kll/DataToSketch.java
index f9a5277..ee6977f 100644
--- a/src/main/java/com/yahoo/sketches/pig/kll/DataToSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/kll/DataToSketch.java
@@ -1,12 +1,28 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.kll;
+package org.apache.datasketches.pig.kll;
import java.io.IOException;
+import org.apache.datasketches.kll.KllFloatsSketch;
+import org.apache.datasketches.memory.Memory;
import org.apache.pig.Accumulator;
import org.apache.pig.Algebraic;
import org.apache.pig.EvalFunc;
@@ -15,9 +31,6 @@
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.kll.KllFloatsSketch;
-
/**
* This UDF is to build sketches from data.
* This class implements both the <i>Accumulator</i> and <i>Algebraic</i> interfaces for
diff --git a/src/main/java/com/yahoo/sketches/pig/kll/GetCdf.java b/src/main/java/org/apache/datasketches/pig/kll/GetCdf.java
similarity index 66%
rename from src/main/java/com/yahoo/sketches/pig/kll/GetCdf.java
rename to src/main/java/org/apache/datasketches/pig/kll/GetCdf.java
index 1cfe22a..f39f03d 100644
--- a/src/main/java/com/yahoo/sketches/pig/kll/GetCdf.java
+++ b/src/main/java/org/apache/datasketches/pig/kll/GetCdf.java
@@ -1,19 +1,32 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.kll;
+package org.apache.datasketches.pig.kll;
import java.io.IOException;
+import org.apache.datasketches.kll.KllFloatsSketch;
+import org.apache.datasketches.memory.Memory;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.kll.KllFloatsSketch;
-
/**
* This UDF is to get an approximation to the Cumulative Distribution Function (CDF) of the input stream
* given a sketch and a set of split points - an array of <i>m</i> unique, monotonically increasing
diff --git a/src/main/java/org/apache/datasketches/pig/kll/GetK.java b/src/main/java/org/apache/datasketches/pig/kll/GetK.java
new file mode 100644
index 0000000..9ea07e7
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/kll/GetK.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.kll;
+
+import java.io.IOException;
+
+import org.apache.datasketches.kll.KllFloatsSketch;
+import org.apache.datasketches.memory.Memory;
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+
+/**
+ * This UDF is to get the parameter K from a given sketch.
+ * This can be useful for debugging a work flow to make sure that resulting sketches
+ * have the intended K, and, therefore, the intended accuracy
+ */
+public class GetK extends EvalFunc<Integer> {
+
+ @Override
+ public Integer exec(final Tuple input) throws IOException {
+ if (input.size() != 1) {
+ throw new IllegalArgumentException("expected one input");
+ }
+
+ if (!(input.get(0) instanceof DataByteArray)) {
+ throw new IllegalArgumentException("expected a DataByteArray as a sketch, got "
+ + input.get(0).getClass().getSimpleName());
+ }
+ final DataByteArray dba = (DataByteArray) input.get(0);
+ final KllFloatsSketch sketch = KllFloatsSketch.heapify(Memory.wrap(dba.get()));
+
+ return sketch.getK();
+ }
+
+}
diff --git a/src/main/java/com/yahoo/sketches/pig/kll/GetPmf.java b/src/main/java/org/apache/datasketches/pig/kll/GetPmf.java
similarity index 69%
rename from src/main/java/com/yahoo/sketches/pig/kll/GetPmf.java
rename to src/main/java/org/apache/datasketches/pig/kll/GetPmf.java
index 59b9014..f05885e 100644
--- a/src/main/java/com/yahoo/sketches/pig/kll/GetPmf.java
+++ b/src/main/java/org/apache/datasketches/pig/kll/GetPmf.java
@@ -1,21 +1,34 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.kll;
+package org.apache.datasketches.pig.kll;
import java.io.IOException;
+import org.apache.datasketches.kll.KllFloatsSketch;
+import org.apache.datasketches.memory.Memory;
import org.apache.pig.EvalFunc;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.kll.KllFloatsSketch;
-
/**
* This UDF is to get an approximation to the Probability Mass Function (PMF) of the input stream
* given a sketch and a set of split points - an array of <i>m</i> unique, monotonically increasing
diff --git a/src/main/java/com/yahoo/sketches/pig/kll/GetQuantile.java b/src/main/java/org/apache/datasketches/pig/kll/GetQuantile.java
similarity index 60%
rename from src/main/java/com/yahoo/sketches/pig/kll/GetQuantile.java
rename to src/main/java/org/apache/datasketches/pig/kll/GetQuantile.java
index cb58cbe..5035294 100644
--- a/src/main/java/com/yahoo/sketches/pig/kll/GetQuantile.java
+++ b/src/main/java/org/apache/datasketches/pig/kll/GetQuantile.java
@@ -1,19 +1,32 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.kll;
+package org.apache.datasketches.pig.kll;
import java.io.IOException;
+import org.apache.datasketches.kll.KllFloatsSketch;
+import org.apache.datasketches.memory.Memory;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.kll.KllFloatsSketch;
-
/**
* This UDF is to get a quantile value from a given sketch. A single value for a
* given fraction is returned. The fraction represents a normalized rank and must be
diff --git a/src/main/java/com/yahoo/sketches/pig/kll/GetQuantiles.java b/src/main/java/org/apache/datasketches/pig/kll/GetQuantiles.java
similarity index 73%
rename from src/main/java/com/yahoo/sketches/pig/kll/GetQuantiles.java
rename to src/main/java/org/apache/datasketches/pig/kll/GetQuantiles.java
index 59c7d8c..f2ac968 100644
--- a/src/main/java/com/yahoo/sketches/pig/kll/GetQuantiles.java
+++ b/src/main/java/org/apache/datasketches/pig/kll/GetQuantiles.java
@@ -1,22 +1,35 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.kll;
+package org.apache.datasketches.pig.kll;
import java.io.IOException;
import java.util.Arrays;
+import org.apache.datasketches.kll.KllFloatsSketch;
+import org.apache.datasketches.memory.Memory;
import org.apache.pig.EvalFunc;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.kll.KllFloatsSketch;
-
/**
* This UDF is to get a list of quantile values from a given sketch and a list of
* fractions or a number of evenly spaced intervals. The fractions represent normalized ranks and
diff --git a/src/main/java/org/apache/datasketches/pig/kll/GetRank.java b/src/main/java/org/apache/datasketches/pig/kll/GetRank.java
new file mode 100644
index 0000000..bb99c48
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/kll/GetRank.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.kll;
+
+import java.io.IOException;
+
+import org.apache.datasketches.kll.KllFloatsSketch;
+import org.apache.datasketches.memory.Memory;
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+
+/**
+ * This UDF is to get a normalized rank for a given value from a given sketch. A single
+ * rank for a given value is returned. The normalized rank is a double value
+ * from 0 to 1 inclusive. For example, the rank of 0.5 corresponds to 50th percentile,
+ * which is the median value of the distribution (the number separating the higher half
+ * of the probability distribution from the lower half).
+ */
+public class GetRank extends EvalFunc<Double> {
+
+ @Override
+ public Double exec(final Tuple input) throws IOException {
+ if (input.size() != 2) {
+ throw new IllegalArgumentException("expected two inputs: sketch and value");
+ }
+
+ if (!(input.get(0) instanceof DataByteArray)) {
+ throw new IllegalArgumentException("expected a DataByteArray as a sketch, got "
+ + input.get(0).getClass().getSimpleName());
+ }
+ final DataByteArray dba = (DataByteArray) input.get(0);
+ final KllFloatsSketch sketch = KllFloatsSketch.heapify(Memory.wrap(dba.get()));
+
+ if (!(input.get(1) instanceof Float)) {
+ throw new IllegalArgumentException("expected a float value, got "
+ + input.get(1).getClass().getSimpleName());
+ }
+ final float value = (float) input.get(1);
+ return sketch.getRank(value);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/pig/kll/SketchToString.java b/src/main/java/org/apache/datasketches/pig/kll/SketchToString.java
new file mode 100644
index 0000000..52e25d9
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/kll/SketchToString.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.kll;
+
+import java.io.IOException;
+
+import org.apache.datasketches.kll.KllFloatsSketch;
+import org.apache.datasketches.memory.Memory;
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+
+/**
+ * This UDF is to get a human-readable summary of a given sketch.
+ */
+public class SketchToString extends EvalFunc<String> {
+
+ @Override
+ public String exec(final Tuple input) throws IOException {
+ if (input.size() != 1) {
+ throw new IllegalArgumentException("expected one input");
+ }
+
+ if (!(input.get(0) instanceof DataByteArray)) {
+ throw new IllegalArgumentException("expected a DataByteArray as a sketch, got "
+ + input.get(0).getClass().getSimpleName());
+ }
+ final DataByteArray dba = (DataByteArray) input.get(0);
+ final KllFloatsSketch sketch = KllFloatsSketch.heapify(Memory.wrap(dba.get()));
+
+ return sketch.toString();
+ }
+
+}
diff --git a/src/main/java/com/yahoo/sketches/pig/kll/UnionSketch.java b/src/main/java/org/apache/datasketches/pig/kll/UnionSketch.java
similarity index 91%
rename from src/main/java/com/yahoo/sketches/pig/kll/UnionSketch.java
rename to src/main/java/org/apache/datasketches/pig/kll/UnionSketch.java
index e4db8d0..676b1eb 100644
--- a/src/main/java/com/yahoo/sketches/pig/kll/UnionSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/kll/UnionSketch.java
@@ -1,12 +1,28 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.kll;
+package org.apache.datasketches.pig.kll;
import java.io.IOException;
+import org.apache.datasketches.kll.KllFloatsSketch;
+import org.apache.datasketches.memory.Memory;
import org.apache.pig.Accumulator;
import org.apache.pig.Algebraic;
import org.apache.pig.EvalFunc;
@@ -16,9 +32,6 @@
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.kll.KllFloatsSketch;
-
/**
* This UDF is to merge sketches.
* This class implements both the <i>Accumulator</i> and <i>Algebraic</i> interfaces for
diff --git a/src/main/java/org/apache/datasketches/pig/kll/package-info.java b/src/main/java/org/apache/datasketches/pig/kll/package-info.java
new file mode 100644
index 0000000..e5d77a4
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/kll/package-info.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Pig UDFs for KLL quantiles sketches.
+ * See https://datasketches.github.io/docs/Quantiles/KLLSketch.html
+ *
+ * @author Alexander Saydakov
+ */
+package org.apache.datasketches.pig.kll;
diff --git a/src/main/java/com/yahoo/sketches/pig/quantiles/DataToDoublesSketch.java b/src/main/java/org/apache/datasketches/pig/quantiles/DataToDoublesSketch.java
similarity index 91%
rename from src/main/java/com/yahoo/sketches/pig/quantiles/DataToDoublesSketch.java
rename to src/main/java/org/apache/datasketches/pig/quantiles/DataToDoublesSketch.java
index d1bc8df..42aa8ce 100644
--- a/src/main/java/com/yahoo/sketches/pig/quantiles/DataToDoublesSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/quantiles/DataToDoublesSketch.java
@@ -1,12 +1,30 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.quantiles;
+package org.apache.datasketches.pig.quantiles;
import java.io.IOException;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.quantiles.DoublesSketch;
+import org.apache.datasketches.quantiles.DoublesUnion;
+import org.apache.datasketches.quantiles.DoublesUnionBuilder;
import org.apache.pig.Accumulator;
import org.apache.pig.Algebraic;
import org.apache.pig.EvalFunc;
@@ -18,11 +36,6 @@
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.quantiles.DoublesSketch;
-import com.yahoo.sketches.quantiles.DoublesUnion;
-import com.yahoo.sketches.quantiles.DoublesUnionBuilder;
-
/**
* This is a Pig UDF that builds Sketches from data.
* To assist Pig, this class implements both the <i>Accumulator</i> and <i>Algebraic</i> interfaces.
diff --git a/src/main/java/com/yahoo/sketches/pig/quantiles/DataToItemsSketch.java b/src/main/java/org/apache/datasketches/pig/quantiles/DataToItemsSketch.java
similarity index 92%
rename from src/main/java/com/yahoo/sketches/pig/quantiles/DataToItemsSketch.java
rename to src/main/java/org/apache/datasketches/pig/quantiles/DataToItemsSketch.java
index fcccfb6..8d7c9b0 100644
--- a/src/main/java/com/yahoo/sketches/pig/quantiles/DataToItemsSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/quantiles/DataToItemsSketch.java
@@ -1,13 +1,31 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.quantiles;
+package org.apache.datasketches.pig.quantiles;
import java.io.IOException;
import java.util.Comparator;
+import org.apache.datasketches.ArrayOfItemsSerDe;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.quantiles.ItemsSketch;
+import org.apache.datasketches.quantiles.ItemsUnion;
import org.apache.pig.Accumulator;
import org.apache.pig.Algebraic;
import org.apache.pig.EvalFunc;
@@ -19,11 +37,6 @@
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.ArrayOfItemsSerDe;
-import com.yahoo.sketches.quantiles.ItemsSketch;
-import com.yahoo.sketches.quantiles.ItemsUnion;
-
/**
* Builds ItemsSketch from data.
* To assist Pig, this class implements both the <i>Accumulator</i> and <i>Algebraic</i> interfaces.
diff --git a/src/main/java/org/apache/datasketches/pig/quantiles/DataToStringsSketch.java b/src/main/java/org/apache/datasketches/pig/quantiles/DataToStringsSketch.java
new file mode 100644
index 0000000..58fbcb5
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/quantiles/DataToStringsSketch.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.quantiles;
+
+import java.util.Comparator;
+
+import org.apache.datasketches.ArrayOfItemsSerDe;
+import org.apache.datasketches.ArrayOfStringsSerDe;
+
+/**
+ * Creates an ItemsSketch<String> from raw data.
+ * It supports all three ways: exec(), Accumulator and Algebraic.
+ */
+@SuppressWarnings("javadoc")
+public class DataToStringsSketch extends DataToItemsSketch<String> {
+
+ private static final Comparator<String> COMPARATOR = Comparator.naturalOrder();
+ private static final ArrayOfItemsSerDe<String> SER_DE = new ArrayOfStringsSerDe();
+
+ public DataToStringsSketch() {
+ super(0, COMPARATOR, SER_DE);
+ }
+
+ public DataToStringsSketch(final String kStr) {
+ super(Integer.parseInt(kStr), COMPARATOR, SER_DE);
+ }
+
+ // ALGEBRAIC INTERFACE
+
+ @Override
+ public String getInitial() {
+ return DataToItemsSketchInitial.class.getName();
+ }
+
+ @Override
+ public String getIntermed() {
+ return DataToStringsSketchIntermediateFinal.class.getName();
+ }
+
+ @Override
+ public String getFinal() {
+ return DataToStringsSketchIntermediateFinal.class.getName();
+ }
+
+ public static class DataToStringsSketchIntermediateFinal
+ extends DataToItemsSketchIntermediateFinal<String> {
+
+ public DataToStringsSketchIntermediateFinal() {
+ super(0, COMPARATOR, SER_DE);
+ }
+
+ public DataToStringsSketchIntermediateFinal(final String kStr) {
+ super(Integer.parseInt(kStr), COMPARATOR, SER_DE);
+ }
+
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/pig/quantiles/DoublesSketchToString.java b/src/main/java/org/apache/datasketches/pig/quantiles/DoublesSketchToString.java
new file mode 100644
index 0000000..98c0797
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/quantiles/DoublesSketchToString.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.quantiles;
+
+import java.io.IOException;
+
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.quantiles.DoublesSketch;
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+
+/**
+ * This UDF is to get a human-readable summary of a given sketch.
+ */
+public class DoublesSketchToString extends EvalFunc<String> {
+
+ @Override
+ public String exec(final Tuple input) throws IOException {
+ if (input == null) {
+ return null;
+ }
+ if (input.size() != 1) {
+ throw new IllegalArgumentException("expected one input");
+ }
+
+ if (!(input.get(0) instanceof DataByteArray)) {
+ throw new IllegalArgumentException("expected a DataByteArray as a sketch, got "
+ + input.get(0).getClass().getSimpleName());
+ }
+ final DataByteArray dba = (DataByteArray) input.get(0);
+ final DoublesSketch sketch = DoublesSketch.wrap(Memory.wrap(dba.get()));
+
+ return sketch.toString();
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/pig/quantiles/GetKFromDoublesSketch.java b/src/main/java/org/apache/datasketches/pig/quantiles/GetKFromDoublesSketch.java
new file mode 100644
index 0000000..715a6da
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/quantiles/GetKFromDoublesSketch.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.quantiles;
+
+import java.io.IOException;
+
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.quantiles.DoublesSketch;
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+
+/**
+ * This UDF is to get the parameter K from a given sketch.
+ * This can be useful for debugging a work flow to make sure that resulting sketches
+ * have the intended K, and, therefore, the accuracy
+ */
+public class GetKFromDoublesSketch extends EvalFunc<Integer> {
+
+ @Override
+ public Integer exec(final Tuple input) throws IOException {
+ if (input.size() != 1) {
+ throw new IllegalArgumentException("expected one input");
+ }
+
+ if (!(input.get(0) instanceof DataByteArray)) {
+ throw new IllegalArgumentException("expected a DataByteArray as a sketch, got "
+ + input.get(0).getClass().getSimpleName());
+ }
+ final DataByteArray dba = (DataByteArray) input.get(0);
+ final DoublesSketch sketch = DoublesSketch.wrap(Memory.wrap(dba.get()));
+
+ return sketch.getK();
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/pig/quantiles/GetKFromStringsSketch.java b/src/main/java/org/apache/datasketches/pig/quantiles/GetKFromStringsSketch.java
new file mode 100644
index 0000000..cd2089b
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/quantiles/GetKFromStringsSketch.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.quantiles;
+
+import java.io.IOException;
+import java.util.Comparator;
+
+import org.apache.datasketches.ArrayOfStringsSerDe;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.quantiles.ItemsSketch;
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+
+/**
+ * This UDF is to get the parameter K from a given sketch.
+ * This can be useful for debugging a work flow to make sure that resulting sketches
+ * have the intended K, and, therefore, the accuracy
+ */
+public class GetKFromStringsSketch extends EvalFunc<Integer> {
+
+ @Override
+ public Integer exec(final Tuple input) throws IOException {
+ if (input.size() != 1) {
+ throw new IllegalArgumentException("expected one input");
+ }
+
+ if (!(input.get(0) instanceof DataByteArray)) {
+ throw new IllegalArgumentException("expected a DataByteArray as a sketch, got "
+ + input.get(0).getClass().getSimpleName());
+ }
+ final DataByteArray dba = (DataByteArray) input.get(0);
+ final ItemsSketch<String> sketch =
+ ItemsSketch.getInstance(Memory.wrap(dba.get()), Comparator.naturalOrder(),
+ new ArrayOfStringsSerDe());
+
+ return sketch.getK();
+ }
+
+}
diff --git a/src/main/java/com/yahoo/sketches/pig/quantiles/GetPmfFromDoublesSketch.java b/src/main/java/org/apache/datasketches/pig/quantiles/GetPmfFromDoublesSketch.java
similarity index 65%
rename from src/main/java/com/yahoo/sketches/pig/quantiles/GetPmfFromDoublesSketch.java
rename to src/main/java/org/apache/datasketches/pig/quantiles/GetPmfFromDoublesSketch.java
index 222f9bf..5a7d712 100644
--- a/src/main/java/com/yahoo/sketches/pig/quantiles/GetPmfFromDoublesSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/quantiles/GetPmfFromDoublesSketch.java
@@ -1,19 +1,32 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.quantiles;
+package org.apache.datasketches.pig.quantiles;
import java.io.IOException;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.quantiles.DoublesSketch;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.quantiles.DoublesSketch;
-
/**
* This UDF is to get an approximation to the Probability Mass Function (PMF) of the input stream
* given a sketch and a set of split points - an array of <i>m</i> unique, monotonically increasing
diff --git a/src/main/java/com/yahoo/sketches/pig/quantiles/GetPmfFromStringsSketch.java b/src/main/java/org/apache/datasketches/pig/quantiles/GetPmfFromStringsSketch.java
similarity index 65%
rename from src/main/java/com/yahoo/sketches/pig/quantiles/GetPmfFromStringsSketch.java
rename to src/main/java/org/apache/datasketches/pig/quantiles/GetPmfFromStringsSketch.java
index 8707453..87eb845 100644
--- a/src/main/java/com/yahoo/sketches/pig/quantiles/GetPmfFromStringsSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/quantiles/GetPmfFromStringsSketch.java
@@ -1,21 +1,34 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.quantiles;
+package org.apache.datasketches.pig.quantiles;
import java.io.IOException;
import java.util.Comparator;
+import org.apache.datasketches.ArrayOfStringsSerDe;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.quantiles.ItemsSketch;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.ArrayOfStringsSerDe;
-import com.yahoo.sketches.quantiles.ItemsSketch;
-
/**
* This UDF is to get an approximation to the Probability Mass Function (PMF) of the input stream
* given a sketch and a set of split points - an array of <i>m</i> unique, monotonically increasing
diff --git a/src/main/java/com/yahoo/sketches/pig/quantiles/GetQuantileFromDoublesSketch.java b/src/main/java/org/apache/datasketches/pig/quantiles/GetQuantileFromDoublesSketch.java
similarity index 60%
rename from src/main/java/com/yahoo/sketches/pig/quantiles/GetQuantileFromDoublesSketch.java
rename to src/main/java/org/apache/datasketches/pig/quantiles/GetQuantileFromDoublesSketch.java
index 97d7060..bab7eae 100644
--- a/src/main/java/com/yahoo/sketches/pig/quantiles/GetQuantileFromDoublesSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/quantiles/GetQuantileFromDoublesSketch.java
@@ -1,19 +1,32 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.quantiles;
+package org.apache.datasketches.pig.quantiles;
import java.io.IOException;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.quantiles.DoublesSketch;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.quantiles.DoublesSketch;
-
/**
* This UDF is to get a quantile value from a DoublesSketch. A single value for a
* given fraction is returned. The fraction represents a normalized rank and must be
diff --git a/src/main/java/com/yahoo/sketches/pig/quantiles/GetQuantileFromStringsSketch.java b/src/main/java/org/apache/datasketches/pig/quantiles/GetQuantileFromStringsSketch.java
similarity index 61%
rename from src/main/java/com/yahoo/sketches/pig/quantiles/GetQuantileFromStringsSketch.java
rename to src/main/java/org/apache/datasketches/pig/quantiles/GetQuantileFromStringsSketch.java
index db0134b..90f496a 100644
--- a/src/main/java/com/yahoo/sketches/pig/quantiles/GetQuantileFromStringsSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/quantiles/GetQuantileFromStringsSketch.java
@@ -1,21 +1,34 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.quantiles;
+package org.apache.datasketches.pig.quantiles;
import java.io.IOException;
import java.util.Comparator;
+import org.apache.datasketches.ArrayOfStringsSerDe;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.quantiles.ItemsSketch;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.ArrayOfStringsSerDe;
-import com.yahoo.sketches.quantiles.ItemsSketch;
-
/**
* This UDF is to get a quantile value from an ItemsSketch<String>. A single value for a
* given fraction is returned. The fraction represents a normalized rank and must be
diff --git a/src/main/java/com/yahoo/sketches/pig/quantiles/GetQuantilesFromDoublesSketch.java b/src/main/java/org/apache/datasketches/pig/quantiles/GetQuantilesFromDoublesSketch.java
similarity index 71%
rename from src/main/java/com/yahoo/sketches/pig/quantiles/GetQuantilesFromDoublesSketch.java
rename to src/main/java/org/apache/datasketches/pig/quantiles/GetQuantilesFromDoublesSketch.java
index bb6ab80..cb7a889 100644
--- a/src/main/java/com/yahoo/sketches/pig/quantiles/GetQuantilesFromDoublesSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/quantiles/GetQuantilesFromDoublesSketch.java
@@ -1,21 +1,34 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.quantiles;
+package org.apache.datasketches.pig.quantiles;
import java.io.IOException;
import java.util.Arrays;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.quantiles.DoublesSketch;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.quantiles.DoublesSketch;
-
/**
* This UDF is to get a list of quantile values from an DoublesSketch given a list of
* fractions or a number of evenly spaced intervals. The fractions represent normalized ranks and
diff --git a/src/main/java/com/yahoo/sketches/pig/quantiles/GetQuantilesFromStringsSketch.java b/src/main/java/org/apache/datasketches/pig/quantiles/GetQuantilesFromStringsSketch.java
similarity index 72%
rename from src/main/java/com/yahoo/sketches/pig/quantiles/GetQuantilesFromStringsSketch.java
rename to src/main/java/org/apache/datasketches/pig/quantiles/GetQuantilesFromStringsSketch.java
index 9a381d6..ad21a79 100644
--- a/src/main/java/com/yahoo/sketches/pig/quantiles/GetQuantilesFromStringsSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/quantiles/GetQuantilesFromStringsSketch.java
@@ -1,23 +1,36 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.quantiles;
+package org.apache.datasketches.pig.quantiles;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
+import org.apache.datasketches.ArrayOfStringsSerDe;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.quantiles.ItemsSketch;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.ArrayOfStringsSerDe;
-import com.yahoo.sketches.quantiles.ItemsSketch;
-
/**
* This UDF is to get a list of quantile values from an ItemsSketch<String> given a list of
* fractions or a number of evenly spaced intervals. The fractions represent normalized ranks and
diff --git a/src/main/java/org/apache/datasketches/pig/quantiles/StringsSketchToString.java b/src/main/java/org/apache/datasketches/pig/quantiles/StringsSketchToString.java
new file mode 100644
index 0000000..2f52c40
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/quantiles/StringsSketchToString.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.quantiles;
+
+import java.io.IOException;
+import java.util.Comparator;
+
+import org.apache.datasketches.ArrayOfStringsSerDe;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.quantiles.ItemsSketch;
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+
+/**
+ * This UDF is to get a human-readable summary of a given sketch.
+ */
+public class StringsSketchToString extends EvalFunc<String> {
+
+ @Override
+ public String exec(final Tuple input) throws IOException {
+ if (input == null) {
+ return null;
+ }
+ if (input.size() != 1) {
+ throw new IllegalArgumentException("expected one input");
+ }
+
+ if (!(input.get(0) instanceof DataByteArray)) {
+ throw new IllegalArgumentException("expected a DataByteArray as a sketch, got "
+ + input.get(0).getClass().getSimpleName());
+ }
+ final DataByteArray dba = (DataByteArray) input.get(0);
+ final ItemsSketch<String> sketch =
+ ItemsSketch.getInstance(Memory.wrap(dba.get()), Comparator.naturalOrder(),
+ new ArrayOfStringsSerDe());
+ return sketch.toString();
+ }
+
+}
diff --git a/src/main/java/com/yahoo/sketches/pig/quantiles/UnionDoublesSketch.java b/src/main/java/org/apache/datasketches/pig/quantiles/UnionDoublesSketch.java
similarity index 91%
rename from src/main/java/com/yahoo/sketches/pig/quantiles/UnionDoublesSketch.java
rename to src/main/java/org/apache/datasketches/pig/quantiles/UnionDoublesSketch.java
index f6f4ab7..f280389 100644
--- a/src/main/java/com/yahoo/sketches/pig/quantiles/UnionDoublesSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/quantiles/UnionDoublesSketch.java
@@ -1,12 +1,30 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.quantiles;
+package org.apache.datasketches.pig.quantiles;
import java.io.IOException;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.quantiles.DoublesSketch;
+import org.apache.datasketches.quantiles.DoublesUnion;
+import org.apache.datasketches.quantiles.DoublesUnionBuilder;
import org.apache.pig.Accumulator;
import org.apache.pig.Algebraic;
import org.apache.pig.EvalFunc;
@@ -19,11 +37,6 @@
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.quantiles.DoublesSketch;
-import com.yahoo.sketches.quantiles.DoublesUnion;
-import com.yahoo.sketches.quantiles.DoublesUnionBuilder;
-
/**
* This is a Pig UDF that merges Quantiles Sketches.
* To assist Pig, this class implements both the <i>Accumulator</i> and <i>Algebraic</i> interfaces.
diff --git a/src/main/java/com/yahoo/sketches/pig/quantiles/UnionItemsSketch.java b/src/main/java/org/apache/datasketches/pig/quantiles/UnionItemsSketch.java
similarity index 91%
rename from src/main/java/com/yahoo/sketches/pig/quantiles/UnionItemsSketch.java
rename to src/main/java/org/apache/datasketches/pig/quantiles/UnionItemsSketch.java
index b6f4fb2..1858ac1 100644
--- a/src/main/java/com/yahoo/sketches/pig/quantiles/UnionItemsSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/quantiles/UnionItemsSketch.java
@@ -1,13 +1,31 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.quantiles;
+package org.apache.datasketches.pig.quantiles;
import java.io.IOException;
import java.util.Comparator;
+import org.apache.datasketches.ArrayOfItemsSerDe;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.quantiles.ItemsSketch;
+import org.apache.datasketches.quantiles.ItemsUnion;
import org.apache.pig.Accumulator;
import org.apache.pig.Algebraic;
import org.apache.pig.EvalFunc;
@@ -20,11 +38,6 @@
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.ArrayOfItemsSerDe;
-import com.yahoo.sketches.quantiles.ItemsSketch;
-import com.yahoo.sketches.quantiles.ItemsUnion;
-
/**
* Computes union of ItemsSketch.
* To assist Pig, this class implements both the <i>Accumulator</i> and <i>Algebraic</i> interfaces.
diff --git a/src/main/java/org/apache/datasketches/pig/quantiles/UnionStringsSketch.java b/src/main/java/org/apache/datasketches/pig/quantiles/UnionStringsSketch.java
new file mode 100644
index 0000000..e6e210b
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/quantiles/UnionStringsSketch.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.quantiles;
+
+import java.util.Comparator;
+
+import org.apache.datasketches.ArrayOfItemsSerDe;
+import org.apache.datasketches.ArrayOfStringsSerDe;
+
+/**
+ * Computes union of ItemsSketch<String>.
+ * It supports all three ways: exec(), Accumulator and Algebraic
+ */
+@SuppressWarnings("javadoc")
+public class UnionStringsSketch extends UnionItemsSketch<String> {
+
+ private static final Comparator<String> COMPARATOR = Comparator.naturalOrder();
+ private static final ArrayOfItemsSerDe<String> SER_DE = new ArrayOfStringsSerDe();
+
+ public UnionStringsSketch() {
+ super(0, COMPARATOR, SER_DE);
+ }
+
+ public UnionStringsSketch(final String kStr) {
+ super(Integer.parseInt(kStr), COMPARATOR, SER_DE);
+ }
+
+ //ALGEBRAIC INTERFACE
+
+ @Override
+ public String getInitial() {
+ return UnionItemsSketchInitial.class.getName();
+ }
+
+ @Override
+ public String getIntermed() {
+ return UnionStringsSketchIntermediateFinal.class.getName();
+ }
+
+ @Override
+ public String getFinal() {
+ return UnionStringsSketchIntermediateFinal.class.getName();
+ }
+
+ public static class UnionStringsSketchIntermediateFinal extends UnionItemsSketchIntermediateFinal<String> {
+
+ public UnionStringsSketchIntermediateFinal() {
+ super(0, COMPARATOR, SER_DE);
+ }
+
+ public UnionStringsSketchIntermediateFinal(final String kStr) {
+ super(Integer.parseInt(kStr), COMPARATOR, SER_DE);
+ }
+
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/pig/quantiles/Util.java b/src/main/java/org/apache/datasketches/pig/quantiles/Util.java
new file mode 100644
index 0000000..867cf5d
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/quantiles/Util.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.quantiles;
+
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+
+class Util {
+
+ static Tuple doubleArrayToTuple(final double[] array) throws ExecException {
+ final Tuple tuple = TupleFactory.getInstance().newTuple(array.length);
+ for (int i = 0; i < array.length; i++) {
+ tuple.set(i, array[i]);
+ }
+ return tuple;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/pig/quantiles/package-info.java b/src/main/java/org/apache/datasketches/pig/quantiles/package-info.java
new file mode 100644
index 0000000..4ef049b
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/quantiles/package-info.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Pig UDFs for Quantiles sketches.
+ * This includes UDFs for generic ItemsSketch and specialized DoublesSketch.
+ *
+ * <p>The generic implementation is in the form of abstract classes DataToItemsSketch and
+ * UnionItemsSketch to be specialized for particular types of items.
+ * An implementation for strings is provided: DataToStringsSketch, UnionStringsSketch,
+ * plus UDFs to obtain the results from sketches:
+ * GetQuantileFromStringsSketch, GetQuantilesFromStringsSketch and GetPmfFromStringsSketch.
+ *
+ * <p>Support for DoublesSketch: DataToDoublesSketch, UnionDoublesSketch,
+ * GetQuantileFromDoublesSketch, GetQuantilesFromDoublesSketch, GetPmfFromDoublesSketch
+ *
+ * @author Alexander Saydakov
+ */
+package org.apache.datasketches.pig.quantiles;
diff --git a/src/main/java/com/yahoo/sketches/pig/sampling/ArrayOfTuplesSerDe.java b/src/main/java/org/apache/datasketches/pig/sampling/ArrayOfTuplesSerDe.java
similarity index 67%
rename from src/main/java/com/yahoo/sketches/pig/sampling/ArrayOfTuplesSerDe.java
rename to src/main/java/org/apache/datasketches/pig/sampling/ArrayOfTuplesSerDe.java
index 2009d9d..c508eba 100644
--- a/src/main/java/com/yahoo/sketches/pig/sampling/ArrayOfTuplesSerDe.java
+++ b/src/main/java/org/apache/datasketches/pig/sampling/ArrayOfTuplesSerDe.java
@@ -1,23 +1,36 @@
/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.sampling;
+package org.apache.datasketches.pig.sampling;
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
+import org.apache.datasketches.ArrayOfItemsSerDe;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.memory.WritableMemory;
import org.apache.pig.data.DataReaderWriter;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.WritableByteArray;
-import com.yahoo.memory.Memory;
-import com.yahoo.memory.WritableMemory;
-import com.yahoo.sketches.ArrayOfItemsSerDe;
-
/**
* This <tt>ArrayOfItemsSerDe</tt> implementation takes advantage of the Pig methods used in
* Pig's own BinStorage to serialize arbitrary <tt>Tuple</tt> data.
diff --git a/src/main/java/com/yahoo/sketches/pig/sampling/DataToVarOptSketch.java b/src/main/java/org/apache/datasketches/pig/sampling/DataToVarOptSketch.java
similarity index 80%
rename from src/main/java/com/yahoo/sketches/pig/sampling/DataToVarOptSketch.java
rename to src/main/java/org/apache/datasketches/pig/sampling/DataToVarOptSketch.java
index bd43569..c4a36e4 100644
--- a/src/main/java/com/yahoo/sketches/pig/sampling/DataToVarOptSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/sampling/DataToVarOptSketch.java
@@ -1,15 +1,30 @@
/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.sampling;
+package org.apache.datasketches.pig.sampling;
-import static com.yahoo.sketches.pig.sampling.VarOptCommonImpl.DEFAULT_TARGET_K;
-import static com.yahoo.sketches.pig.sampling.VarOptCommonImpl.DEFAULT_WEIGHT_IDX;
+import static org.apache.datasketches.pig.sampling.VarOptCommonImpl.DEFAULT_TARGET_K;
+import static org.apache.datasketches.pig.sampling.VarOptCommonImpl.DEFAULT_WEIGHT_IDX;
import java.io.IOException;
+import org.apache.datasketches.sampling.VarOptItemsSketch;
import org.apache.pig.AccumulatorEvalFunc;
import org.apache.pig.Algebraic;
import org.apache.pig.data.DataBag;
@@ -19,8 +34,6 @@
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
-import com.yahoo.sketches.sampling.VarOptItemsSketch;
-
/**
* Creates a binary version of a VarOpt sampling over input tuples. The resulting
* <tt>DataByteArray</tt> can be read in Pig with <tt>GetVarOptSamples</tt>, although the
diff --git a/src/main/java/com/yahoo/sketches/pig/sampling/GetVarOptSamples.java b/src/main/java/org/apache/datasketches/pig/sampling/GetVarOptSamples.java
similarity index 63%
rename from src/main/java/com/yahoo/sketches/pig/sampling/GetVarOptSamples.java
rename to src/main/java/org/apache/datasketches/pig/sampling/GetVarOptSamples.java
index 8aea745..6493d9e 100644
--- a/src/main/java/com/yahoo/sketches/pig/sampling/GetVarOptSamples.java
+++ b/src/main/java/org/apache/datasketches/pig/sampling/GetVarOptSamples.java
@@ -1,16 +1,32 @@
/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.sampling;
+package org.apache.datasketches.pig.sampling;
-import static com.yahoo.sketches.pig.sampling.VarOptCommonImpl.RECORD_ALIAS;
-import static com.yahoo.sketches.pig.sampling.VarOptCommonImpl.WEIGHT_ALIAS;
-import static com.yahoo.sketches.pig.sampling.VarOptCommonImpl.createDataBagFromSketch;
+import static org.apache.datasketches.pig.sampling.VarOptCommonImpl.RECORD_ALIAS;
+import static org.apache.datasketches.pig.sampling.VarOptCommonImpl.WEIGHT_ALIAS;
+import static org.apache.datasketches.pig.sampling.VarOptCommonImpl.createDataBagFromSketch;
import java.io.IOException;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.sampling.VarOptItemsSketch;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
@@ -19,9 +35,6 @@
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.sampling.VarOptItemsSketch;
-
/**
* This UDF extracts samples from the binary image of a VarOpt<Tuple> sketch. Because the
* input is a binary object, this UDF is unable to automatically determine the data schema at query
diff --git a/src/main/java/com/yahoo/sketches/pig/sampling/ReservoirSampling.java b/src/main/java/org/apache/datasketches/pig/sampling/ReservoirSampling.java
similarity index 82%
rename from src/main/java/com/yahoo/sketches/pig/sampling/ReservoirSampling.java
rename to src/main/java/org/apache/datasketches/pig/sampling/ReservoirSampling.java
index 1db8e39..82ccc3b 100644
--- a/src/main/java/com/yahoo/sketches/pig/sampling/ReservoirSampling.java
+++ b/src/main/java/org/apache/datasketches/pig/sampling/ReservoirSampling.java
@@ -1,14 +1,31 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.sampling;
+package org.apache.datasketches.pig.sampling;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import org.apache.datasketches.sampling.ReservoirItemsSketch;
+import org.apache.datasketches.sampling.ReservoirItemsUnion;
+import org.apache.datasketches.sampling.SamplingPigUtil;
import org.apache.pig.AccumulatorEvalFunc;
import org.apache.pig.Algebraic;
import org.apache.pig.EvalFunc;
@@ -21,16 +38,13 @@
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
-import com.yahoo.sketches.sampling.ReservoirItemsSketch;
-import com.yahoo.sketches.sampling.ReservoirItemsUnion;
-import com.yahoo.sketches.sampling.SamplingPigUtil;
-
/**
* This is a Pig UDF that applies reservoir sampling to input tuples. It implements both
* the <tt>Accumulator</tt> and <tt>Algebraic</tt> interfaces for efficient performance.
*
* @author Jon Malkin
*/
+@SuppressWarnings("javadoc")
public class ReservoirSampling extends AccumulatorEvalFunc<Tuple> implements Algebraic {
// defined for test consistency
static final String N_ALIAS = "n";
@@ -59,7 +73,7 @@
@Override
public Tuple exec(final Tuple inputTuple) throws IOException {
- if (inputTuple == null || inputTuple.size() < 1 || inputTuple.isNull(0)) {
+ if ((inputTuple == null) || (inputTuple.size() < 1) || inputTuple.isNull(0)) {
return null;
}
@@ -74,7 +88,7 @@
@Override
public void accumulate(final Tuple inputTuple) throws IOException {
- if (inputTuple == null || inputTuple.size() < 1 || inputTuple.isNull(0)) {
+ if ((inputTuple == null) || (inputTuple.size() < 1) || inputTuple.isNull(0)) {
return;
}
@@ -108,12 +122,12 @@
@Override
public Schema outputSchema(final Schema input) {
- if (input != null && input.size() > 0) {
+ if ((input != null) && (input.size() > 0)) {
try {
Schema source = input;
// if we have a bag, grab one level down to get a tuple
- if (source.size() == 1 && source.getField(0).type == DataType.BAG) {
+ if ((source.size() == 1) && (source.getField(0).type == DataType.BAG)) {
source = source.getField(0).schema;
}
@@ -186,7 +200,7 @@
@Override
public Tuple exec(final Tuple inputTuple) throws IOException {
- if (inputTuple == null || inputTuple.size() < 1 || inputTuple.isNull(0)) {
+ if ((inputTuple == null) || (inputTuple.size() < 1) || inputTuple.isNull(0)) {
return null;
}
@@ -239,7 +253,7 @@
@Override
public Tuple exec(final Tuple inputTuple) throws IOException {
- if (inputTuple == null || inputTuple.size() < 1 || inputTuple.isNull(0)) {
+ if ((inputTuple == null) || (inputTuple.size() < 1) || inputTuple.isNull(0)) {
return null;
}
@@ -250,7 +264,7 @@
final long n = (long) reservoir.get(0);
final int k = (int) reservoir.get(1);
- if (n <= k && k <= targetK_) {
+ if ((n <= k) && (k <= targetK_)) {
for (Tuple t : (DataBag) reservoir.get(2)) {
union.update(t);
}
diff --git a/src/main/java/com/yahoo/sketches/pig/sampling/ReservoirUnion.java b/src/main/java/org/apache/datasketches/pig/sampling/ReservoirUnion.java
similarity index 80%
rename from src/main/java/com/yahoo/sketches/pig/sampling/ReservoirUnion.java
rename to src/main/java/org/apache/datasketches/pig/sampling/ReservoirUnion.java
index 02afa63..0fe52b4 100644
--- a/src/main/java/com/yahoo/sketches/pig/sampling/ReservoirUnion.java
+++ b/src/main/java/org/apache/datasketches/pig/sampling/ReservoirUnion.java
@@ -1,14 +1,31 @@
/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.sampling;
+package org.apache.datasketches.pig.sampling;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import org.apache.datasketches.sampling.ReservoirItemsSketch;
+import org.apache.datasketches.sampling.ReservoirItemsUnion;
+import org.apache.datasketches.sampling.SamplingPigUtil;
import org.apache.pig.AccumulatorEvalFunc;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.BagFactory;
@@ -18,10 +35,6 @@
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
-import com.yahoo.sketches.sampling.ReservoirItemsSketch;
-import com.yahoo.sketches.sampling.ReservoirItemsUnion;
-import com.yahoo.sketches.sampling.SamplingPigUtil;
-
/**
* This is a Pig UDF that unions reservoir samples. It implements
* the <tt>Accumulator</tt> interface for more efficient performance. Input is
diff --git a/src/main/java/com/yahoo/sketches/pig/sampling/VarOptCommonImpl.java b/src/main/java/org/apache/datasketches/pig/sampling/VarOptCommonImpl.java
similarity index 88%
rename from src/main/java/com/yahoo/sketches/pig/sampling/VarOptCommonImpl.java
rename to src/main/java/org/apache/datasketches/pig/sampling/VarOptCommonImpl.java
index 56899f7..75992fc 100644
--- a/src/main/java/com/yahoo/sketches/pig/sampling/VarOptCommonImpl.java
+++ b/src/main/java/org/apache/datasketches/pig/sampling/VarOptCommonImpl.java
@@ -1,12 +1,30 @@
/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.sampling;
+package org.apache.datasketches.pig.sampling;
import java.io.IOException;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.sampling.VarOptItemsSamples;
+import org.apache.datasketches.sampling.VarOptItemsSketch;
+import org.apache.datasketches.sampling.VarOptItemsUnion;
import org.apache.pig.EvalFunc;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.BagFactory;
@@ -15,11 +33,6 @@
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.sampling.VarOptItemsSamples;
-import com.yahoo.sketches.sampling.VarOptItemsSketch;
-import com.yahoo.sketches.sampling.VarOptItemsUnion;
-
/**
* A collection of methods and constants used across VarOpt UDFs.
*
diff --git a/src/main/java/com/yahoo/sketches/pig/sampling/VarOptSampling.java b/src/main/java/org/apache/datasketches/pig/sampling/VarOptSampling.java
similarity index 75%
rename from src/main/java/com/yahoo/sketches/pig/sampling/VarOptSampling.java
rename to src/main/java/org/apache/datasketches/pig/sampling/VarOptSampling.java
index fb485b9..85ed71a 100644
--- a/src/main/java/com/yahoo/sketches/pig/sampling/VarOptSampling.java
+++ b/src/main/java/org/apache/datasketches/pig/sampling/VarOptSampling.java
@@ -1,19 +1,35 @@
/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.sampling;
+package org.apache.datasketches.pig.sampling;
-import static com.yahoo.sketches.pig.sampling.VarOptCommonImpl.DEFAULT_TARGET_K;
-import static com.yahoo.sketches.pig.sampling.VarOptCommonImpl.DEFAULT_WEIGHT_IDX;
-import static com.yahoo.sketches.pig.sampling.VarOptCommonImpl.RECORD_ALIAS;
-import static com.yahoo.sketches.pig.sampling.VarOptCommonImpl.WEIGHT_ALIAS;
-import static com.yahoo.sketches.pig.sampling.VarOptCommonImpl.createDataBagFromSketch;
-import static com.yahoo.sketches.pig.sampling.VarOptCommonImpl.unionSketches;
+import static org.apache.datasketches.pig.sampling.VarOptCommonImpl.DEFAULT_TARGET_K;
+import static org.apache.datasketches.pig.sampling.VarOptCommonImpl.DEFAULT_WEIGHT_IDX;
+import static org.apache.datasketches.pig.sampling.VarOptCommonImpl.RECORD_ALIAS;
+import static org.apache.datasketches.pig.sampling.VarOptCommonImpl.WEIGHT_ALIAS;
+import static org.apache.datasketches.pig.sampling.VarOptCommonImpl.createDataBagFromSketch;
+import static org.apache.datasketches.pig.sampling.VarOptCommonImpl.unionSketches;
import java.io.IOException;
+import org.apache.datasketches.sampling.VarOptItemsSketch;
+import org.apache.datasketches.sampling.VarOptItemsUnion;
import org.apache.pig.AccumulatorEvalFunc;
import org.apache.pig.Algebraic;
import org.apache.pig.EvalFunc;
@@ -23,15 +39,13 @@
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
-import com.yahoo.sketches.sampling.VarOptItemsSketch;
-import com.yahoo.sketches.sampling.VarOptItemsUnion;
-
/**
* Applies VarOpt sampling to input tuples. Implements both the <tt>Accumulator</tt> and
* <tt>Algebraic</tt> interfaces for efficient performance.
*
* @author Jon Malkin
*/
+@SuppressWarnings("javadoc")
public class VarOptSampling extends AccumulatorEvalFunc<DataBag> implements Algebraic {
private final int targetK_;
private final int weightIdx_;
@@ -77,7 +91,7 @@
@Override
public void accumulate(final Tuple inputTuple) throws IOException {
- if (inputTuple == null || inputTuple.size() < 1 || inputTuple.isNull(0)) {
+ if ((inputTuple == null) || (inputTuple.size() < 1) || inputTuple.isNull(0)) {
return;
}
@@ -110,7 +124,7 @@
@Override
public Schema outputSchema(final Schema input) {
try {
- if (input == null || input.size() == 0) {
+ if ((input == null) || (input.size() == 0)) {
throw new IllegalArgumentException("Degenerate input schema to VarOptSampling");
}
@@ -122,8 +136,8 @@
final Schema record = input.getField(0).schema; // record has a tuple in field 0
final Schema fields = record.getField(0).schema;
- if (fields.getField(weightIdx_).type != DataType.DOUBLE
- && fields.getField(weightIdx_).type != DataType.FLOAT) {
+ if ((fields.getField(weightIdx_).type != DataType.DOUBLE)
+ && (fields.getField(weightIdx_).type != DataType.FLOAT)) {
throw new IllegalArgumentException("weightIndex item of VarOpt tuple must be a "
+ "weight (double/float), found " + fields.getField(0).type
+ ": " + fields.toString());
@@ -200,7 +214,7 @@
@Override
public DataBag exec(final Tuple inputTuple) throws IOException {
- if (inputTuple == null || inputTuple.size() < 1 || inputTuple.isNull(0)) {
+ if ((inputTuple == null) || (inputTuple.size() < 1) || inputTuple.isNull(0)) {
return null;
}
diff --git a/src/main/java/com/yahoo/sketches/pig/sampling/VarOptUnion.java b/src/main/java/org/apache/datasketches/pig/sampling/VarOptUnion.java
similarity index 72%
rename from src/main/java/com/yahoo/sketches/pig/sampling/VarOptUnion.java
rename to src/main/java/org/apache/datasketches/pig/sampling/VarOptUnion.java
index 9df4e51..108f9b0 100644
--- a/src/main/java/com/yahoo/sketches/pig/sampling/VarOptUnion.java
+++ b/src/main/java/org/apache/datasketches/pig/sampling/VarOptUnion.java
@@ -1,14 +1,30 @@
/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.sampling;
+package org.apache.datasketches.pig.sampling;
-import static com.yahoo.sketches.pig.sampling.VarOptCommonImpl.DEFAULT_TARGET_K;
+import static org.apache.datasketches.pig.sampling.VarOptCommonImpl.DEFAULT_TARGET_K;
import java.io.IOException;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.sampling.VarOptItemsUnion;
import org.apache.pig.AccumulatorEvalFunc;
import org.apache.pig.Algebraic;
import org.apache.pig.data.DataBag;
@@ -17,9 +33,6 @@
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.schema.Schema;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.sampling.VarOptItemsUnion;
-
/**
* Accepts binary VarOpt sketch images and unions them into a single binary output sketch.
* Due to using opaque binary objects, schema information is unavailable.
diff --git a/src/main/java/org/apache/datasketches/pig/sampling/package-info.java b/src/main/java/org/apache/datasketches/pig/sampling/package-info.java
new file mode 100644
index 0000000..a2fd470
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/sampling/package-info.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * <p>This package is dedicated to streaming algorithms that enable fixed size, uniform sampling of
+ * unweighted items from a stream.</p>
+ *
+ * <p>These sketches are mergeable, but do <em>not</em> serialize to a compact form.</p>
+ *
+ * @author Jon Malkin
+ */
+package org.apache.datasketches.pig.sampling;
diff --git a/src/main/java/com/yahoo/sketches/pig/theta/AexcludeB.java b/src/main/java/org/apache/datasketches/pig/theta/AexcludeB.java
similarity index 74%
rename from src/main/java/com/yahoo/sketches/pig/theta/AexcludeB.java
rename to src/main/java/org/apache/datasketches/pig/theta/AexcludeB.java
index 683d229..5619d62 100644
--- a/src/main/java/com/yahoo/sketches/pig/theta/AexcludeB.java
+++ b/src/main/java/org/apache/datasketches/pig/theta/AexcludeB.java
@@ -1,16 +1,35 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.theta;
+package org.apache.datasketches.pig.theta;
-import static com.yahoo.sketches.Util.DEFAULT_UPDATE_SEED;
-import static com.yahoo.sketches.pig.theta.PigUtil.compactOrderedSketchToTuple;
-import static com.yahoo.sketches.pig.theta.PigUtil.extractFieldAtIndex;
+import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
+import static org.apache.datasketches.pig.theta.PigUtil.compactOrderedSketchToTuple;
+import static org.apache.datasketches.pig.theta.PigUtil.extractFieldAtIndex;
import java.io.IOException;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.theta.AnotB;
+import org.apache.datasketches.theta.CompactSketch;
+import org.apache.datasketches.theta.SetOperation;
+import org.apache.datasketches.theta.Sketch;
import org.apache.pig.EvalFunc;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.DataByteArray;
@@ -19,20 +38,12 @@
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.theta.AnotB;
-import com.yahoo.sketches.theta.CompactSketch;
-import com.yahoo.sketches.theta.SetOperation;
-import com.yahoo.sketches.theta.Sketch;
-
/**
* This is a Pig UDF that performs the A-NOT-B Set Operation on two given Sketches. Because this
* operation is fundamentally asymmetric, it is structured as a single stateless operation rather
* than stateful as are Union and Intersection UDFs, which can be iterative.
* The requirement to perform iterative A\B\C\... is rare. If needed, it can be rendered easily by
* the caller.
- *
- * @author Lee Rhodes
*/
public class AexcludeB extends EvalFunc<Tuple> {
private final long seed_;
@@ -64,7 +75,7 @@
*/
public AexcludeB(final long seed) {
super();
- this.seed_ = seed;
+ seed_ = seed;
}
// @formatter:off
diff --git a/src/main/java/com/yahoo/sketches/pig/theta/DataToSketch.java b/src/main/java/org/apache/datasketches/pig/theta/DataToSketch.java
similarity index 90%
rename from src/main/java/com/yahoo/sketches/pig/theta/DataToSketch.java
rename to src/main/java/org/apache/datasketches/pig/theta/DataToSketch.java
index 0661c68..434ed1c 100644
--- a/src/main/java/com/yahoo/sketches/pig/theta/DataToSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/theta/DataToSketch.java
@@ -1,23 +1,42 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.theta;
+package org.apache.datasketches.pig.theta;
-import static com.yahoo.sketches.Util.DEFAULT_NOMINAL_ENTRIES;
-import static com.yahoo.sketches.Util.DEFAULT_UPDATE_SEED;
-import static com.yahoo.sketches.Util.checkIfPowerOf2;
-import static com.yahoo.sketches.Util.checkProbability;
-import static com.yahoo.sketches.pig.theta.PigUtil.RF;
-import static com.yahoo.sketches.pig.theta.PigUtil.compactOrderedSketchToTuple;
-import static com.yahoo.sketches.pig.theta.PigUtil.emptySketchTuple;
-import static com.yahoo.sketches.pig.theta.PigUtil.extractBag;
-import static com.yahoo.sketches.pig.theta.PigUtil.extractFieldAtIndex;
-import static com.yahoo.sketches.pig.theta.PigUtil.extractTypeAtIndex;
+import static org.apache.datasketches.Util.DEFAULT_NOMINAL_ENTRIES;
+import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
+import static org.apache.datasketches.Util.checkIfPowerOf2;
+import static org.apache.datasketches.Util.checkProbability;
+import static org.apache.datasketches.pig.theta.PigUtil.RF;
+import static org.apache.datasketches.pig.theta.PigUtil.compactOrderedSketchToTuple;
+import static org.apache.datasketches.pig.theta.PigUtil.emptySketchTuple;
+import static org.apache.datasketches.pig.theta.PigUtil.extractBag;
+import static org.apache.datasketches.pig.theta.PigUtil.extractFieldAtIndex;
+import static org.apache.datasketches.pig.theta.PigUtil.extractTypeAtIndex;
import java.io.IOException;
+import org.apache.datasketches.Util;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.theta.CompactSketch;
+import org.apache.datasketches.theta.SetOperation;
+import org.apache.datasketches.theta.Union;
import org.apache.pig.Accumulator;
import org.apache.pig.Algebraic;
import org.apache.pig.EvalFunc;
@@ -28,17 +47,9 @@
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.Util;
-import com.yahoo.sketches.theta.CompactSketch;
-import com.yahoo.sketches.theta.SetOperation;
-import com.yahoo.sketches.theta.Union;
-
/**
* This is a Pig UDF that builds Sketches from data.
* To assist Pig, this class implements both the <i>Accumulator</i> and <i>Algebraic</i> interfaces.
- *
- * @author Lee Rhodes
*/
public class DataToSketch extends EvalFunc<Tuple> implements Accumulator<Tuple>, Algebraic {
//With the single exception of the Accumulator interface, UDFs are stateless.
@@ -111,10 +122,10 @@
*/
public DataToSketch(final int nomEntries, final float p, final long seed) {
super();
- this.nomEntries_ = nomEntries;
- this.p_ = p;
- this.seed_ = seed;
- this.emptyCompactOrderedSketchTuple_ = emptySketchTuple(seed);
+ nomEntries_ = nomEntries;
+ p_ = p;
+ seed_ = seed;
+ emptyCompactOrderedSketchTuple_ = emptySketchTuple(seed);
//Catch these errors during construction, don't wait for the exec to be called.
checkIfPowerOf2(nomEntries, "nomEntries");
checkProbability(p, "p");
@@ -496,10 +507,10 @@
* @param seed <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
*/
public IntermediateFinal(final int nomEntries, final float p, final long seed) {
- this.myNomEntries_ = nomEntries;
- this.myP_ = p;
- this.mySeed_ = seed;
- this.myEmptyCompactOrderedSketchTuple_ = emptySketchTuple(seed);
+ myNomEntries_ = nomEntries;
+ myP_ = p;
+ mySeed_ = seed;
+ myEmptyCompactOrderedSketchTuple_ = emptySketchTuple(seed);
}
@Override //IntermediateFinal exec
diff --git a/src/main/java/com/yahoo/sketches/pig/theta/ErrorBounds.java b/src/main/java/org/apache/datasketches/pig/theta/ErrorBounds.java
similarity index 74%
rename from src/main/java/com/yahoo/sketches/pig/theta/ErrorBounds.java
rename to src/main/java/org/apache/datasketches/pig/theta/ErrorBounds.java
index 0689805..4e4225c 100644
--- a/src/main/java/com/yahoo/sketches/pig/theta/ErrorBounds.java
+++ b/src/main/java/org/apache/datasketches/pig/theta/ErrorBounds.java
@@ -1,14 +1,30 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.theta;
+package org.apache.datasketches.pig.theta;
-import static com.yahoo.sketches.pig.theta.PigUtil.tupleToSketch;
+import static org.apache.datasketches.pig.theta.PigUtil.tupleToSketch;
import java.io.IOException;
+import org.apache.datasketches.Util;
+import org.apache.datasketches.theta.Sketch;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
@@ -16,9 +32,6 @@
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
-import com.yahoo.sketches.Util;
-import com.yahoo.sketches.theta.Sketch;
-
//@formatter:off
/**
* This is a User Defined Function (UDF) for returning the Double value result along with a lower and
@@ -36,8 +49,6 @@
* </ul>
* </li>
* </ul>
- *
- * @author Lee Rhodes
*/
//@formatter:on
public class ErrorBounds extends EvalFunc<Tuple> {
diff --git a/src/main/java/org/apache/datasketches/pig/theta/Estimate.java b/src/main/java/org/apache/datasketches/pig/theta/Estimate.java
new file mode 100644
index 0000000..38c5631
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/theta/Estimate.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.theta;
+
+import static org.apache.datasketches.pig.theta.PigUtil.tupleToSketch;
+
+import java.io.IOException;
+
+import org.apache.datasketches.Util;
+import org.apache.datasketches.theta.Sketch;
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.Tuple;
+
+/**
+ * Returns the unique count estimate of a sketch as a Double.
+ */
+public class Estimate extends EvalFunc<Double> {
+ private final long seed_;
+
+ /**
+ * Constructs with the DEFAULT_UPDATE_SEED used when deserializing the sketch.
+ */
+ public Estimate() {
+ this(Util.DEFAULT_UPDATE_SEED);
+ }
+
+ /**
+ * Constructs with the given seed.
+ * @param seedStr the string seed used when deserializing the sketch.
+ */
+ public Estimate(final String seedStr) {
+ this(Long.parseLong(seedStr));
+ }
+
+ /**
+ * Constructs with the given seed.
+ * @param seed used when deserializing the sketch.
+ */
+ public Estimate(final long seed) {
+ super();
+ seed_ = seed;
+ }
+
+ @Override
+ public Double exec(final Tuple sketchTuple) throws IOException { //throws is in API
+ if ((sketchTuple == null) || (sketchTuple.size() == 0)) {
+ return null;
+ }
+ final Sketch sketch = tupleToSketch(sketchTuple, seed_);
+ return sketch.getEstimate();
+ }
+}
diff --git a/src/main/java/com/yahoo/sketches/pig/theta/Intersect.java b/src/main/java/org/apache/datasketches/pig/theta/Intersect.java
similarity index 88%
rename from src/main/java/com/yahoo/sketches/pig/theta/Intersect.java
rename to src/main/java/org/apache/datasketches/pig/theta/Intersect.java
index dac6f93..424f7f0 100644
--- a/src/main/java/com/yahoo/sketches/pig/theta/Intersect.java
+++ b/src/main/java/org/apache/datasketches/pig/theta/Intersect.java
@@ -1,19 +1,38 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.theta;
+package org.apache.datasketches.pig.theta;
-import static com.yahoo.sketches.Util.DEFAULT_UPDATE_SEED;
-import static com.yahoo.sketches.pig.theta.PigUtil.compactOrderedSketchToTuple;
-import static com.yahoo.sketches.pig.theta.PigUtil.emptySketchTuple;
-import static com.yahoo.sketches.pig.theta.PigUtil.extractBag;
-import static com.yahoo.sketches.pig.theta.PigUtil.extractFieldAtIndex;
-import static com.yahoo.sketches.pig.theta.PigUtil.extractTypeAtIndex;
+import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
+import static org.apache.datasketches.pig.theta.PigUtil.compactOrderedSketchToTuple;
+import static org.apache.datasketches.pig.theta.PigUtil.emptySketchTuple;
+import static org.apache.datasketches.pig.theta.PigUtil.extractBag;
+import static org.apache.datasketches.pig.theta.PigUtil.extractFieldAtIndex;
+import static org.apache.datasketches.pig.theta.PigUtil.extractTypeAtIndex;
import java.io.IOException;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.theta.CompactSketch;
+import org.apache.datasketches.theta.Intersection;
+import org.apache.datasketches.theta.SetOperation;
+import org.apache.datasketches.theta.Sketch;
import org.apache.pig.Accumulator;
import org.apache.pig.Algebraic;
import org.apache.pig.EvalFunc;
@@ -24,17 +43,9 @@
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.theta.CompactSketch;
-import com.yahoo.sketches.theta.Intersection;
-import com.yahoo.sketches.theta.SetOperation;
-import com.yahoo.sketches.theta.Sketch;
-
/**
* This is a Pig UDF that performs the Intersection Set Operation on Sketches.
* To assist Pig, this class implements both the <i>Accumulator</i> and <i>Algebraic</i> interfaces.
- *
- * @author Lee Rhodes
*/
public class Intersect extends EvalFunc<Tuple> implements Accumulator<Tuple>, Algebraic {
//With the single exception of the Accumulator interface, UDFs are stateless.
@@ -71,8 +82,8 @@
*/
public Intersect(final long seed) {
super();
- this.seed_ = seed;
- this.emptyCompactOrderedSketchTuple_ = emptySketchTuple(seed);
+ seed_ = seed;
+ emptyCompactOrderedSketchTuple_ = emptySketchTuple(seed);
}
//@formatter:off
@@ -329,8 +340,8 @@
* @param seed <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
*/
public IntermediateFinal(final long seed) {
- this.mySeed_ = seed;
- this.myEmptyCompactOrderedSketchTuple_ = emptySketchTuple(seed);
+ mySeed_ = seed;
+ myEmptyCompactOrderedSketchTuple_ = emptySketchTuple(seed);
}
@Override //IntermediateFinal exec
diff --git a/src/main/java/com/yahoo/sketches/pig/theta/JaccardSimilarity.java b/src/main/java/org/apache/datasketches/pig/theta/JaccardSimilarity.java
similarity index 77%
rename from src/main/java/com/yahoo/sketches/pig/theta/JaccardSimilarity.java
rename to src/main/java/org/apache/datasketches/pig/theta/JaccardSimilarity.java
index 6fd8186..6d90349 100644
--- a/src/main/java/com/yahoo/sketches/pig/theta/JaccardSimilarity.java
+++ b/src/main/java/org/apache/datasketches/pig/theta/JaccardSimilarity.java
@@ -1,24 +1,37 @@
/*
- * Copyright 2018, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.theta;
+package org.apache.datasketches.pig.theta;
-import static com.yahoo.sketches.Util.DEFAULT_UPDATE_SEED;
-import static com.yahoo.sketches.pig.theta.PigUtil.extractFieldAtIndex;
+import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
+import static org.apache.datasketches.pig.theta.PigUtil.extractFieldAtIndex;
import java.io.IOException;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.theta.Sketch;
import org.apache.pig.EvalFunc;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.theta.Sketch;
-
/**
* This is a Pig UDF that performs the JaccardSimilarity Operation on two given
* Sketches.
@@ -118,7 +131,7 @@
}
final double[] jaccardTupple =
- com.yahoo.sketches.theta.JaccardSimilarity.jaccard(sketchA, sketchB);
+ org.apache.datasketches.theta.JaccardSimilarity.jaccard(sketchA, sketchB);
return doubleArrayToTuple(jaccardTupple);
}
diff --git a/src/main/java/com/yahoo/sketches/pig/theta/PigUtil.java b/src/main/java/org/apache/datasketches/pig/theta/PigUtil.java
similarity index 80%
rename from src/main/java/com/yahoo/sketches/pig/theta/PigUtil.java
rename to src/main/java/org/apache/datasketches/pig/theta/PigUtil.java
index 7245dba..b7f1257 100644
--- a/src/main/java/com/yahoo/sketches/pig/theta/PigUtil.java
+++ b/src/main/java/org/apache/datasketches/pig/theta/PigUtil.java
@@ -1,27 +1,38 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.theta;
+package org.apache.datasketches.pig.theta;
import java.io.IOException;
+import org.apache.datasketches.ResizeFactor;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.theta.CompactSketch;
+import org.apache.datasketches.theta.Sketch;
+import org.apache.datasketches.theta.UpdateSketch;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.ResizeFactor;
-import com.yahoo.sketches.theta.CompactSketch;
-import com.yahoo.sketches.theta.Sketch;
-import com.yahoo.sketches.theta.UpdateSketch;
-
/**
* Common methods for the pig classes.
- *
- * @author Lee Rhodes
*/
class PigUtil {
static final ResizeFactor RF = ResizeFactor.X8;
diff --git a/src/main/java/com/yahoo/sketches/pig/theta/SketchToString.java b/src/main/java/org/apache/datasketches/pig/theta/SketchToString.java
similarity index 68%
rename from src/main/java/com/yahoo/sketches/pig/theta/SketchToString.java
rename to src/main/java/org/apache/datasketches/pig/theta/SketchToString.java
index 58fd79b..f49493f 100644
--- a/src/main/java/com/yahoo/sketches/pig/theta/SketchToString.java
+++ b/src/main/java/org/apache/datasketches/pig/theta/SketchToString.java
@@ -1,20 +1,33 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.theta;
+package org.apache.datasketches.pig.theta;
-import static com.yahoo.sketches.pig.theta.PigUtil.tupleToSketch;
+import static org.apache.datasketches.pig.theta.PigUtil.tupleToSketch;
import java.io.IOException;
+import org.apache.datasketches.Util;
+import org.apache.datasketches.theta.Sketch;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.Tuple;
-import com.yahoo.sketches.Util;
-import com.yahoo.sketches.theta.Sketch;
-
/**
* This is a User Defined Function (UDF) for "pretty printing" the summary of a sketch
* from a Sketch Tuple.
@@ -22,7 +35,6 @@
* <p>
* Refer to {@link DataToSketch#exec(Tuple)} for the definition of a Sketch Tuple.
* </p>
- * @author Lee Rhodes
*/
public class SketchToString extends EvalFunc<String> {
private boolean detailOut = false;
diff --git a/src/main/java/com/yahoo/sketches/pig/theta/Union.java b/src/main/java/org/apache/datasketches/pig/theta/Union.java
similarity index 89%
rename from src/main/java/com/yahoo/sketches/pig/theta/Union.java
rename to src/main/java/org/apache/datasketches/pig/theta/Union.java
index b669eb0..3444430 100644
--- a/src/main/java/com/yahoo/sketches/pig/theta/Union.java
+++ b/src/main/java/org/apache/datasketches/pig/theta/Union.java
@@ -1,23 +1,41 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.theta;
+package org.apache.datasketches.pig.theta;
-import static com.yahoo.sketches.Util.DEFAULT_NOMINAL_ENTRIES;
-import static com.yahoo.sketches.Util.DEFAULT_UPDATE_SEED;
-import static com.yahoo.sketches.Util.checkIfPowerOf2;
-import static com.yahoo.sketches.Util.checkProbability;
-import static com.yahoo.sketches.pig.theta.PigUtil.RF;
-import static com.yahoo.sketches.pig.theta.PigUtil.compactOrderedSketchToTuple;
-import static com.yahoo.sketches.pig.theta.PigUtil.emptySketchTuple;
-import static com.yahoo.sketches.pig.theta.PigUtil.extractBag;
-import static com.yahoo.sketches.pig.theta.PigUtil.extractFieldAtIndex;
-import static com.yahoo.sketches.pig.theta.PigUtil.extractTypeAtIndex;
+import static org.apache.datasketches.Util.DEFAULT_NOMINAL_ENTRIES;
+import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
+import static org.apache.datasketches.Util.checkIfPowerOf2;
+import static org.apache.datasketches.Util.checkProbability;
+import static org.apache.datasketches.pig.theta.PigUtil.RF;
+import static org.apache.datasketches.pig.theta.PigUtil.compactOrderedSketchToTuple;
+import static org.apache.datasketches.pig.theta.PigUtil.emptySketchTuple;
+import static org.apache.datasketches.pig.theta.PigUtil.extractBag;
+import static org.apache.datasketches.pig.theta.PigUtil.extractFieldAtIndex;
+import static org.apache.datasketches.pig.theta.PigUtil.extractTypeAtIndex;
import java.io.IOException;
+import org.apache.datasketches.Util;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.theta.CompactSketch;
+import org.apache.datasketches.theta.SetOperation;
import org.apache.pig.Accumulator;
import org.apache.pig.Algebraic;
import org.apache.pig.EvalFunc;
@@ -28,16 +46,9 @@
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.Util;
-import com.yahoo.sketches.theta.CompactSketch;
-import com.yahoo.sketches.theta.SetOperation;
-
/**
* This is a Pig UDF that performs the Union Set Operation on Sketches.
* To assist Pig, this class implements both the <i>Accumulator</i> and <i>Algebraic</i> interfaces.
- *
- * @author Lee Rhodes
*/
public class Union extends EvalFunc<Tuple> implements Accumulator<Tuple>, Algebraic {
//With the single exception of the Accumulator interface, UDFs are stateless.
@@ -46,7 +57,7 @@
private final float p_;
private final long seed_;
private final Tuple emptyCompactOrderedSketchTuple_;
- private com.yahoo.sketches.theta.Union accumUnion_;
+ private org.apache.datasketches.theta.Union accumUnion_;
//TOP LEVEL API
@@ -116,10 +127,10 @@
*/
public Union(final int nomEntries, final float p, final long seed) {
super();
- this.nomEntries_ = nomEntries;
- this.p_ = p;
- this.seed_ = seed;
- this.emptyCompactOrderedSketchTuple_ = emptySketchTuple(seed);
+ nomEntries_ = nomEntries;
+ p_ = p;
+ seed_ = seed;
+ emptyCompactOrderedSketchTuple_ = emptySketchTuple(seed);
//Catch these errors during construction, don't wait for the exec to be called.
checkIfPowerOf2(nomEntries, "nomEntries");
checkProbability(p, "p");
@@ -175,7 +186,7 @@
public Tuple exec(final Tuple inputTuple) throws IOException { //throws is in API
//The exec is a stateless function. It operates on the input and returns a result.
// It can only call static functions.
- final com.yahoo.sketches.theta.Union union =
+ final org.apache.datasketches.theta.Union union =
SetOperation.builder().setP(p_).setSeed(seed_).setResizeFactor(RF)
.setNominalEntries(nomEntries_).buildUnion();
final DataBag bag = extractBag(inputTuple);
@@ -281,7 +292,7 @@
* @param bag A bag of sketchTuples.
* @param union The union to update
*/
- private static void updateUnion(final DataBag bag, final com.yahoo.sketches.theta.Union union) {
+ private static void updateUnion(final DataBag bag, final org.apache.datasketches.theta.Union union) {
// Bag is not empty. process each innerTuple in the bag
for (Tuple innerTuple : bag) {
// validate the inner Tuples
@@ -449,16 +460,16 @@
* @param seed <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
*/
public IntermediateFinal(final int nomEntries, final float p, final long seed) {
- this.myNomEntries_ = nomEntries;
- this.myP_ = p;
- this.mySeed_ = seed;
- this.myEmptyCompactOrderedSketchTuple_ = emptySketchTuple(seed);
+ myNomEntries_ = nomEntries;
+ myP_ = p;
+ mySeed_ = seed;
+ myEmptyCompactOrderedSketchTuple_ = emptySketchTuple(seed);
}
@Override //IntermediateFinal exec
public Tuple exec(final Tuple inputTuple) throws IOException { //throws is in API
- final com.yahoo.sketches.theta.Union union =
+ final org.apache.datasketches.theta.Union union =
SetOperation.builder().setP(myP_).setSeed(mySeed_).setResizeFactor(RF)
.setNominalEntries(myNomEntries_).buildUnion();
final DataBag outerBag = extractBag(inputTuple); //InputTuple.bag0
diff --git a/src/main/java/org/apache/datasketches/pig/theta/package-info.java b/src/main/java/org/apache/datasketches/pig/theta/package-info.java
new file mode 100644
index 0000000..cd651d7
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/theta/package-info.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Pig UDFs for Theta sketch.
+ */
+package org.apache.datasketches.pig.theta;
diff --git a/src/main/java/org/apache/datasketches/pig/tuple/AlgebraicInitial.java b/src/main/java/org/apache/datasketches/pig/tuple/AlgebraicInitial.java
new file mode 100644
index 0000000..b504f2f
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/tuple/AlgebraicInitial.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.tuple;
+
+import java.io.IOException;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.Tuple;
+
+/**
+ * This is a common pass-through implementation for initial step of an Algebraic operation
+ */
+public abstract class AlgebraicInitial extends EvalFunc<Tuple> {
+ @Override
+ public Tuple exec(final Tuple inputTuple) throws IOException {
+ final DataBag bag = (DataBag) inputTuple.get(0);
+ if (bag == null) {
+ throw new IllegalArgumentException("InputTuple.Field0: Bag may not be null");
+ }
+ return inputTuple;
+ }
+}
diff --git a/src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchStats.java b/src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchStats.java
new file mode 100644
index 0000000..1a43423
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchStats.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.tuple;
+
+import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
+import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
+import org.apache.datasketches.tuple.ArrayOfDoublesSketchIterator;
+
+class ArrayOfDoublesSketchStats {
+
+ /**
+ * Convert sketch to summary statistics.
+ *
+ * @param sketch ArrayOfDoublesSketch to convert to summary statistics.
+ * @return An array of SummaryStatistics.
+ */
+ static SummaryStatistics[] sketchToSummaryStatistics(final ArrayOfDoublesSketch sketch) {
+ final SummaryStatistics[] summaryStatistics = new SummaryStatistics[sketch.getNumValues()];
+ for (int i = 0; i < sketch.getNumValues(); i++) {
+ summaryStatistics[i] = new SummaryStatistics();
+ }
+ final ArrayOfDoublesSketchIterator it = sketch.iterator();
+ while (it.next()) {
+ final double[] values = it.getValues();
+ for (int i = 0; i < it.getValues().length; i++) {
+ summaryStatistics[i].addValue(values[i]);
+ }
+ }
+ return summaryStatistics;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchToEstimateAndErrorBounds.java b/src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchToEstimateAndErrorBounds.java
new file mode 100644
index 0000000..000bdd8
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchToEstimateAndErrorBounds.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.tuple;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
+import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+
+/**
+ * This is a User Defined Function (UDF) for obtaining the unique count estimate
+ * along with a lower and upper bound from an ArrayOfDoublesSketch.
+ *
+ * <p>The result is a tuple with three double values: estimate, lower bound and upper bound.
+ * The bounds are given at 95.5% confidence.
+ *
+ * @author Alexander Saydakov
+ */
+public class ArrayOfDoublesSketchToEstimateAndErrorBounds extends EvalFunc<Tuple> {
+
+ @Override
+ public Tuple exec(final Tuple input) throws IOException {
+ if ((input == null) || (input.size() == 0)) {
+ return null;
+ }
+
+ final DataByteArray dba = (DataByteArray) input.get(0);
+ final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dba.get()));
+
+ return TupleFactory.getInstance().newTuple(Arrays.asList(
+ sketch.getEstimate(),
+ sketch.getLowerBound(2),
+ sketch.getUpperBound(2)
+ ));
+ }
+
+}
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToEstimates.java b/src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchToEstimates.java
similarity index 60%
rename from src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToEstimates.java
rename to src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchToEstimates.java
index 87c05af..ea5a410 100644
--- a/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToEstimates.java
+++ b/src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchToEstimates.java
@@ -1,21 +1,34 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.tuple;
+package org.apache.datasketches.pig.tuple;
import java.io.IOException;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
+import org.apache.datasketches.tuple.ArrayOfDoublesSketchIterator;
+import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.tuple.ArrayOfDoublesSketch;
-import com.yahoo.sketches.tuple.ArrayOfDoublesSketchIterator;
-import com.yahoo.sketches.tuple.ArrayOfDoublesSketches;
-
/**
* This UDF converts an ArrayOfDoubles sketch to estimates.
* The result will be a tuple with N + 1 double values, where
diff --git a/src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchToMeans.java b/src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchToMeans.java
new file mode 100644
index 0000000..ce624ba
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchToMeans.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.tuple;
+
+import java.io.IOException;
+
+import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
+import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+
+/**
+ * This UDF converts an ArrayOfDoubles sketch to mean values.
+ * The result will be a tuple with N double values, where
+ * N is the number of double values kept in the sketch per key.
+ */
+public class ArrayOfDoublesSketchToMeans extends EvalFunc<Tuple> {
+
+ @Override
+ public Tuple exec(final Tuple input) throws IOException {
+ if ((input == null) || (input.size() == 0)) {
+ return null;
+ }
+
+ final DataByteArray dba = (DataByteArray) input.get(0);
+ final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dba.get()));
+
+ if (sketch.getRetainedEntries() < 1) {
+ return null;
+ }
+
+ final SummaryStatistics[] summaries = ArrayOfDoublesSketchStats.sketchToSummaryStatistics(sketch);
+
+ final Tuple means = TupleFactory.getInstance().newTuple(sketch.getNumValues());
+ for (int i = 0; i < sketch.getNumValues(); i++) {
+ means.set(i, summaries[i].getMean());
+ }
+ return means;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchToNumberOfRetainedEntries.java b/src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchToNumberOfRetainedEntries.java
new file mode 100644
index 0000000..0b2d506
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchToNumberOfRetainedEntries.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.tuple;
+
+import java.io.IOException;
+
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
+import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+
+/**
+ * This is a User Defined Function (UDF) for obtaining the number of retained entries
+ * from an ArrayOfDoublesSketch.
+ *
+ * <p>The result is an integer value.
+ *
+ * @author Alexander Saydakov
+ */
+public class ArrayOfDoublesSketchToNumberOfRetainedEntries extends EvalFunc<Integer> {
+
+ @Override
+ public Integer exec(final Tuple input) throws IOException {
+ if ((input == null) || (input.size() == 0)) {
+ return null;
+ }
+
+ final DataByteArray dba = (DataByteArray) input.get(0);
+ final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dba.get()));
+
+ return sketch.getRetainedEntries();
+ }
+
+}
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToQuantilesSketch.java b/src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchToQuantilesSketch.java
similarity index 60%
rename from src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToQuantilesSketch.java
rename to src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchToQuantilesSketch.java
index ea57557..c6b879b 100644
--- a/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchToQuantilesSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchToQuantilesSketch.java
@@ -1,24 +1,37 @@
/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.tuple;
+package org.apache.datasketches.pig.tuple;
import java.io.IOException;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.quantiles.DoublesSketch;
+import org.apache.datasketches.quantiles.DoublesSketchBuilder;
+import org.apache.datasketches.quantiles.UpdateDoublesSketch;
+import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
+import org.apache.datasketches.tuple.ArrayOfDoublesSketchIterator;
+import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.quantiles.DoublesSketch;
-import com.yahoo.sketches.quantiles.DoublesSketchBuilder;
-import com.yahoo.sketches.quantiles.UpdateDoublesSketch;
-import com.yahoo.sketches.tuple.ArrayOfDoublesSketch;
-import com.yahoo.sketches.tuple.ArrayOfDoublesSketchIterator;
-import com.yahoo.sketches.tuple.ArrayOfDoublesSketches;
-
/**
* This UDF converts a given column of double values from an ArrayOfDoubles sketch
* to a quantiles DoublesSketch to further analyze the distribution of these values.
diff --git a/src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchToVariances.java b/src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchToVariances.java
new file mode 100644
index 0000000..7a4d28a
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchToVariances.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.tuple;
+
+import java.io.IOException;
+
+import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
+import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+
+/**
+ * This UDF converts an ArrayOfDoubles sketch to variance values.
+ * The result will be a tuple with N double values, where
+ * N is the number of double values kept in the sketch per key.
+ */
+public class ArrayOfDoublesSketchToVariances extends EvalFunc<Tuple> {
+
+ @Override
+ public Tuple exec(final Tuple input) throws IOException {
+ if ((input == null) || (input.size() == 0)) {
+ return null;
+ }
+
+ final DataByteArray dba = (DataByteArray) input.get(0);
+ final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dba.get()));
+
+ if (sketch.getRetainedEntries() < 1) {
+ return null;
+ }
+
+ final SummaryStatistics[] summaries = ArrayOfDoublesSketchStats.sketchToSummaryStatistics(sketch);
+
+ final Tuple variances = TupleFactory.getInstance().newTuple(sketch.getNumValues());
+ for (int i = 0; i < sketch.getNumValues(); i++) {
+ variances.set(i, summaries[i].getVariance());
+ }
+ return variances;
+ }
+
+}
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchesToPValueEstimates.java b/src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchesToPValueEstimates.java
similarity index 68%
rename from src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchesToPValueEstimates.java
rename to src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchesToPValueEstimates.java
index 420b87c..1e68e08 100644
--- a/src/main/java/com/yahoo/sketches/pig/tuple/ArrayOfDoublesSketchesToPValueEstimates.java
+++ b/src/main/java/org/apache/datasketches/pig/tuple/ArrayOfDoublesSketchesToPValueEstimates.java
@@ -1,24 +1,36 @@
/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.tuple;
+package org.apache.datasketches.pig.tuple;
import java.io.IOException;
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.apache.commons.math3.stat.inference.TTest;
-
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
+import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.tuple.ArrayOfDoublesSketch;
-import com.yahoo.sketches.tuple.ArrayOfDoublesSketches;
-
/**
* Calculate p-values given two ArrayOfDoublesSketch. Each value in the sketch
* is treated as a separate metric measurement, and a p-value will be generated
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/DataToArrayOfDoublesSketch.java b/src/main/java/org/apache/datasketches/pig/tuple/DataToArrayOfDoublesSketch.java
similarity index 84%
rename from src/main/java/com/yahoo/sketches/pig/tuple/DataToArrayOfDoublesSketch.java
rename to src/main/java/org/apache/datasketches/pig/tuple/DataToArrayOfDoublesSketch.java
index 32ed0cc..a2156eb 100644
--- a/src/main/java/com/yahoo/sketches/pig/tuple/DataToArrayOfDoublesSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/tuple/DataToArrayOfDoublesSketch.java
@@ -1,9 +1,23 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.tuple;
+package org.apache.datasketches.pig.tuple;
import org.apache.pig.Algebraic;
@@ -11,6 +25,7 @@
* This UDF creates an ArrayOfDoublesSketch from raw data.
* It supports all three ways: exec(), Accumulator and Algebraic.
*/
+@SuppressWarnings("javadoc")
public class DataToArrayOfDoublesSketch extends DataToArrayOfDoublesSketchBase implements Algebraic {
/**
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/DataToArrayOfDoublesSketchAlgebraicIntermediateFinal.java b/src/main/java/org/apache/datasketches/pig/tuple/DataToArrayOfDoublesSketchAlgebraicIntermediateFinal.java
similarity index 71%
rename from src/main/java/com/yahoo/sketches/pig/tuple/DataToArrayOfDoublesSketchAlgebraicIntermediateFinal.java
rename to src/main/java/org/apache/datasketches/pig/tuple/DataToArrayOfDoublesSketchAlgebraicIntermediateFinal.java
index 0845fd8..0d2c340 100644
--- a/src/main/java/com/yahoo/sketches/pig/tuple/DataToArrayOfDoublesSketchAlgebraicIntermediateFinal.java
+++ b/src/main/java/org/apache/datasketches/pig/tuple/DataToArrayOfDoublesSketchAlgebraicIntermediateFinal.java
@@ -1,27 +1,40 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.tuple;
+package org.apache.datasketches.pig.tuple;
-import static com.yahoo.sketches.Util.DEFAULT_NOMINAL_ENTRIES;
+import static org.apache.datasketches.Util.DEFAULT_NOMINAL_ENTRIES;
import java.io.IOException;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.tuple.ArrayOfDoublesSetOperationBuilder;
+import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
+import org.apache.datasketches.tuple.ArrayOfDoublesUnion;
+import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketch;
+import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
import org.apache.log4j.Logger;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.tuple.ArrayOfDoublesSetOperationBuilder;
-import com.yahoo.sketches.tuple.ArrayOfDoublesSketches;
-import com.yahoo.sketches.tuple.ArrayOfDoublesUnion;
-import com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketch;
-import com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
-
/**
* Class used to calculate the intermediate pass (combiner) or the final pass
* (reducer) of an Algebraic sketch operation. This may be called multiple times
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/DataToArrayOfDoublesSketchBase.java b/src/main/java/org/apache/datasketches/pig/tuple/DataToArrayOfDoublesSketchBase.java
similarity index 82%
rename from src/main/java/com/yahoo/sketches/pig/tuple/DataToArrayOfDoublesSketchBase.java
rename to src/main/java/org/apache/datasketches/pig/tuple/DataToArrayOfDoublesSketchBase.java
index 15d52cf..7461f03 100644
--- a/src/main/java/com/yahoo/sketches/pig/tuple/DataToArrayOfDoublesSketchBase.java
+++ b/src/main/java/org/apache/datasketches/pig/tuple/DataToArrayOfDoublesSketchBase.java
@@ -1,14 +1,30 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.tuple;
+package org.apache.datasketches.pig.tuple;
-import static com.yahoo.sketches.Util.DEFAULT_NOMINAL_ENTRIES;
+import static org.apache.datasketches.Util.DEFAULT_NOMINAL_ENTRIES;
import java.io.IOException;
+import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketch;
+import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
import org.apache.log4j.Logger;
import org.apache.pig.Accumulator;
import org.apache.pig.EvalFunc;
@@ -18,9 +34,6 @@
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
-import com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketch;
-import com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
-
abstract class DataToArrayOfDoublesSketchBase extends EvalFunc<Tuple> implements Accumulator<Tuple> {
private final int sketchSize_;
private final float samplingProbability_;
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/DataToDoubleSummarySketch.java b/src/main/java/org/apache/datasketches/pig/tuple/DataToDoubleSummarySketch.java
similarity index 69%
rename from src/main/java/com/yahoo/sketches/pig/tuple/DataToDoubleSummarySketch.java
rename to src/main/java/org/apache/datasketches/pig/tuple/DataToDoubleSummarySketch.java
index acee0c5..8ee0fbc 100644
--- a/src/main/java/com/yahoo/sketches/pig/tuple/DataToDoubleSummarySketch.java
+++ b/src/main/java/org/apache/datasketches/pig/tuple/DataToDoubleSummarySketch.java
@@ -1,27 +1,41 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.tuple;
+package org.apache.datasketches.pig.tuple;
+import org.apache.datasketches.tuple.adouble.DoubleSummary;
+import org.apache.datasketches.tuple.adouble.DoubleSummaryDeserializer;
+import org.apache.datasketches.tuple.adouble.DoubleSummaryFactory;
+import org.apache.datasketches.tuple.adouble.DoubleSummarySetOperations;
import org.apache.pig.Algebraic;
-import com.yahoo.sketches.tuple.adouble.DoubleSummary;
-import com.yahoo.sketches.tuple.adouble.DoubleSummaryDeserializer;
-import com.yahoo.sketches.tuple.adouble.DoubleSummaryFactory;
-import com.yahoo.sketches.tuple.adouble.DoubleSummarySetOperations;
-
/**
* This UDF creates a Sketch<DoubleSummary> from raw data.
* It supports all three ways: exec(), Accumulator and Algebraic.
*/
+@SuppressWarnings("javadoc")
public class DataToDoubleSummarySketch extends DataToSketch<Double, DoubleSummary> implements Algebraic {
/**
* Constructor with default sketch size and default mode (sum)
*/
public DataToDoubleSummarySketch() {
- super(new DoubleSummaryFactory());
+ super(new DoubleSummaryFactory(DoubleSummary.Mode.Sum));
}
/**
@@ -29,7 +43,7 @@
* @param sketchSize String representation of sketch size
*/
public DataToDoubleSummarySketch(final String sketchSize) {
- super(Integer.parseInt(sketchSize), new DoubleSummaryFactory());
+ super(Integer.parseInt(sketchSize), new DoubleSummaryFactory(DoubleSummary.Mode.Sum));
}
/**
@@ -88,7 +102,8 @@
* Default sketch size and default mode
*/
public IntermediateFinal() {
- super(new DoubleSummaryFactory(), new DoubleSummarySetOperations(), new DoubleSummaryDeserializer());
+ super(new DoubleSummaryFactory(DoubleSummary.Mode.Sum),
+ new DoubleSummarySetOperations(DoubleSummary.Mode.Sum), new DoubleSummaryDeserializer());
}
/**
@@ -97,7 +112,8 @@
* @param sketchSize String representation of sketch size
*/
public IntermediateFinal(final String sketchSize) {
- super(Integer.parseInt(sketchSize), new DoubleSummaryFactory(), new DoubleSummarySetOperations(),
+ super(Integer.parseInt(sketchSize), new DoubleSummaryFactory(DoubleSummary.Mode.Sum),
+ new DoubleSummarySetOperations(DoubleSummary.Mode.Sum),
new DoubleSummaryDeserializer());
}
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/DataToSketch.java b/src/main/java/org/apache/datasketches/pig/tuple/DataToSketch.java
similarity index 82%
rename from src/main/java/com/yahoo/sketches/pig/tuple/DataToSketch.java
rename to src/main/java/org/apache/datasketches/pig/tuple/DataToSketch.java
index 20759dc..0d26398 100644
--- a/src/main/java/com/yahoo/sketches/pig/tuple/DataToSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/tuple/DataToSketch.java
@@ -1,14 +1,32 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.tuple;
+package org.apache.datasketches.pig.tuple;
-import static com.yahoo.sketches.Util.DEFAULT_NOMINAL_ENTRIES;
+import static org.apache.datasketches.Util.DEFAULT_NOMINAL_ENTRIES;
import java.io.IOException;
+import org.apache.datasketches.tuple.SummaryFactory;
+import org.apache.datasketches.tuple.UpdatableSketch;
+import org.apache.datasketches.tuple.UpdatableSketchBuilder;
+import org.apache.datasketches.tuple.UpdatableSummary;
import org.apache.log4j.Logger;
import org.apache.pig.Accumulator;
import org.apache.pig.EvalFunc;
@@ -18,11 +36,6 @@
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
-import com.yahoo.sketches.tuple.SummaryFactory;
-import com.yahoo.sketches.tuple.UpdatableSketch;
-import com.yahoo.sketches.tuple.UpdatableSketchBuilder;
-import com.yahoo.sketches.tuple.UpdatableSummary;
-
/**
* This is a generic implementation to be specialized in concrete UDFs
* @param <U> Update type
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/DataToSketchAlgebraicIntermediateFinal.java b/src/main/java/org/apache/datasketches/pig/tuple/DataToSketchAlgebraicIntermediateFinal.java
similarity index 79%
rename from src/main/java/com/yahoo/sketches/pig/tuple/DataToSketchAlgebraicIntermediateFinal.java
rename to src/main/java/org/apache/datasketches/pig/tuple/DataToSketchAlgebraicIntermediateFinal.java
index 878d9d3..749cd74 100644
--- a/src/main/java/com/yahoo/sketches/pig/tuple/DataToSketchAlgebraicIntermediateFinal.java
+++ b/src/main/java/org/apache/datasketches/pig/tuple/DataToSketchAlgebraicIntermediateFinal.java
@@ -1,29 +1,42 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.tuple;
+package org.apache.datasketches.pig.tuple;
-import static com.yahoo.sketches.Util.DEFAULT_NOMINAL_ENTRIES;
+import static org.apache.datasketches.Util.DEFAULT_NOMINAL_ENTRIES;
import java.io.IOException;
+import org.apache.datasketches.tuple.Sketch;
+import org.apache.datasketches.tuple.SummaryDeserializer;
+import org.apache.datasketches.tuple.SummaryFactory;
+import org.apache.datasketches.tuple.SummarySetOperations;
+import org.apache.datasketches.tuple.Union;
+import org.apache.datasketches.tuple.UpdatableSketch;
+import org.apache.datasketches.tuple.UpdatableSketchBuilder;
+import org.apache.datasketches.tuple.UpdatableSummary;
import org.apache.log4j.Logger;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
-import com.yahoo.sketches.tuple.Sketch;
-import com.yahoo.sketches.tuple.SummaryDeserializer;
-import com.yahoo.sketches.tuple.SummaryFactory;
-import com.yahoo.sketches.tuple.SummarySetOperations;
-import com.yahoo.sketches.tuple.Union;
-import com.yahoo.sketches.tuple.UpdatableSketch;
-import com.yahoo.sketches.tuple.UpdatableSketchBuilder;
-import com.yahoo.sketches.tuple.UpdatableSummary;
-
/**
* Class used to calculate the intermediate pass (combiner) or the final pass
* (reducer) of an Algebraic sketch operation. This may be called multiple times
diff --git a/src/main/java/org/apache/datasketches/pig/tuple/DoubleSummarySketchToEstimates.java b/src/main/java/org/apache/datasketches/pig/tuple/DoubleSummarySketchToEstimates.java
new file mode 100644
index 0000000..c8940e7
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/tuple/DoubleSummarySketchToEstimates.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.tuple;
+
+import java.io.IOException;
+
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.tuple.Sketch;
+import org.apache.datasketches.tuple.SketchIterator;
+import org.apache.datasketches.tuple.Sketches;
+import org.apache.datasketches.tuple.SummaryDeserializer;
+import org.apache.datasketches.tuple.adouble.DoubleSummary;
+import org.apache.datasketches.tuple.adouble.DoubleSummaryDeserializer;
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+
+/**
+ * This UDF converts a Sketch<DoubleSummary> to estimates.
+ * The first estimate is the estimate of the number of unique
+ * keys in the original population.
+ * The second is the estimate of the sum of the parameter
+ * in the original population (sums of the values in the sketch
+ * scaled to the original population). This estimate assumes
+ * that the DoubleSummary was used in the Sum mode.
+ */
+public class DoubleSummarySketchToEstimates extends EvalFunc<Tuple> {
+
+ private static final SummaryDeserializer<DoubleSummary> SUMMARY_DESERIALIZER =
+ new DoubleSummaryDeserializer();
+
+ @Override
+ public Tuple exec(final Tuple input) throws IOException {
+ if ((input == null) || (input.size() == 0)) {
+ return null;
+ }
+
+ final DataByteArray dba = (DataByteArray) input.get(0);
+ final Sketch<DoubleSummary> sketch = Sketches.heapifySketch(
+ Memory.wrap(dba.get()), SUMMARY_DESERIALIZER);
+
+ final Tuple output = TupleFactory.getInstance().newTuple(2);
+ output.set(0, sketch.getEstimate());
+ double sum = 0;
+ final SketchIterator<DoubleSummary> it = sketch.iterator();
+ while (it.next()) {
+ sum += it.getSummary().getValue();
+ }
+ output.set(1, sum / sketch.getTheta());
+
+ return output;
+ }
+}
diff --git a/src/main/java/org/apache/datasketches/pig/tuple/DoubleSummarySketchToPercentile.java b/src/main/java/org/apache/datasketches/pig/tuple/DoubleSummarySketchToPercentile.java
new file mode 100644
index 0000000..67ea162
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/tuple/DoubleSummarySketchToPercentile.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.tuple;
+
+import java.io.IOException;
+
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.quantiles.DoublesSketch;
+import org.apache.datasketches.quantiles.UpdateDoublesSketch;
+import org.apache.datasketches.tuple.Sketch;
+import org.apache.datasketches.tuple.SketchIterator;
+import org.apache.datasketches.tuple.Sketches;
+import org.apache.datasketches.tuple.SummaryDeserializer;
+import org.apache.datasketches.tuple.adouble.DoubleSummary;
+import org.apache.datasketches.tuple.adouble.DoubleSummaryDeserializer;
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+
+/**
+ * This UDF is to get a percentile value from a Sketch<DoubleSummary>.
+ * The values from DoubleSummary objects in the sketch are extracted,
+ * and a single value with the given rank is returned. The rank is in
+ * percent. For example, 50th percentile is the median value of the
+ * distribution (the number separating the higher half of a probability
+ * distribution from the lower half).
+ */
+public class DoubleSummarySketchToPercentile extends EvalFunc<Double> {
+
+ private static final SummaryDeserializer<DoubleSummary> SUMMARY_DESERIALIZER =
+ new DoubleSummaryDeserializer();
+ private static final int QUANTILES_SKETCH_SIZE = 1024;
+
+ @Override
+ public Double exec(final Tuple input) throws IOException {
+ if (input.size() != 2) {
+ throw new IllegalArgumentException("expected two inputs: sketch and pecentile");
+ }
+
+ final DataByteArray dba = (DataByteArray) input.get(0);
+ final Sketch<DoubleSummary> sketch = Sketches.heapifySketch(
+ Memory.wrap(dba.get()), SUMMARY_DESERIALIZER);
+
+ final double percentile = (double) input.get(1);
+ if ((percentile < 0) || (percentile > 100)) {
+ throw new IllegalArgumentException("percentile must be between 0 and 100");
+ }
+
+ final UpdateDoublesSketch qs = DoublesSketch.builder().setK(QUANTILES_SKETCH_SIZE).build();
+ final SketchIterator<DoubleSummary> it = sketch.iterator();
+ while (it.next()) {
+ qs.update(it.getSummary().getValue());
+ }
+ return qs.getQuantile(percentile / 100);
+ }
+
+}
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/UnionArrayOfDoublesSketch.java b/src/main/java/org/apache/datasketches/pig/tuple/UnionArrayOfDoublesSketch.java
similarity index 78%
rename from src/main/java/com/yahoo/sketches/pig/tuple/UnionArrayOfDoublesSketch.java
rename to src/main/java/org/apache/datasketches/pig/tuple/UnionArrayOfDoublesSketch.java
index aafb12a..91e3a56 100644
--- a/src/main/java/com/yahoo/sketches/pig/tuple/UnionArrayOfDoublesSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/tuple/UnionArrayOfDoublesSketch.java
@@ -1,9 +1,23 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.tuple;
+package org.apache.datasketches.pig.tuple;
import org.apache.pig.Algebraic;
@@ -11,6 +25,7 @@
* This is to union ArrayOfDoublesSketches.
* It supports all three ways: exec(), Accumulator and Algebraic
*/
+@SuppressWarnings("javadoc")
public class UnionArrayOfDoublesSketch extends UnionArrayOfDoublesSketchBase implements Algebraic {
/**
* Constructor with default sketch size and default number of values of 1.
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/UnionArrayOfDoublesSketchAlgebraicIntermediateFinal.java b/src/main/java/org/apache/datasketches/pig/tuple/UnionArrayOfDoublesSketchAlgebraicIntermediateFinal.java
similarity index 71%
rename from src/main/java/com/yahoo/sketches/pig/tuple/UnionArrayOfDoublesSketchAlgebraicIntermediateFinal.java
rename to src/main/java/org/apache/datasketches/pig/tuple/UnionArrayOfDoublesSketchAlgebraicIntermediateFinal.java
index 2a55f1e..20df310 100644
--- a/src/main/java/com/yahoo/sketches/pig/tuple/UnionArrayOfDoublesSketchAlgebraicIntermediateFinal.java
+++ b/src/main/java/org/apache/datasketches/pig/tuple/UnionArrayOfDoublesSketchAlgebraicIntermediateFinal.java
@@ -1,25 +1,38 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.tuple;
+package org.apache.datasketches.pig.tuple;
-import static com.yahoo.sketches.Util.DEFAULT_NOMINAL_ENTRIES;
+import static org.apache.datasketches.Util.DEFAULT_NOMINAL_ENTRIES;
import java.io.IOException;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.tuple.ArrayOfDoublesSetOperationBuilder;
+import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
+import org.apache.datasketches.tuple.ArrayOfDoublesUnion;
import org.apache.log4j.Logger;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.tuple.ArrayOfDoublesSetOperationBuilder;
-import com.yahoo.sketches.tuple.ArrayOfDoublesSketches;
-import com.yahoo.sketches.tuple.ArrayOfDoublesUnion;
-
/**
* This is to calculate the intermediate pass (combiner) or the final pass
* (reducer) of an Algebraic sketch operation. This may be called multiple times
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/UnionArrayOfDoublesSketchBase.java b/src/main/java/org/apache/datasketches/pig/tuple/UnionArrayOfDoublesSketchBase.java
similarity index 72%
rename from src/main/java/com/yahoo/sketches/pig/tuple/UnionArrayOfDoublesSketchBase.java
rename to src/main/java/org/apache/datasketches/pig/tuple/UnionArrayOfDoublesSketchBase.java
index 4d772d3..2769b4f 100644
--- a/src/main/java/com/yahoo/sketches/pig/tuple/UnionArrayOfDoublesSketchBase.java
+++ b/src/main/java/org/apache/datasketches/pig/tuple/UnionArrayOfDoublesSketchBase.java
@@ -1,14 +1,33 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.tuple;
+package org.apache.datasketches.pig.tuple;
-import static com.yahoo.sketches.Util.DEFAULT_NOMINAL_ENTRIES;
+import static org.apache.datasketches.Util.DEFAULT_NOMINAL_ENTRIES;
import java.io.IOException;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.tuple.ArrayOfDoublesSetOperationBuilder;
+import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
+import org.apache.datasketches.tuple.ArrayOfDoublesUnion;
+import org.apache.datasketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
import org.apache.log4j.Logger;
import org.apache.pig.Accumulator;
import org.apache.pig.EvalFunc;
@@ -17,12 +36,6 @@
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.tuple.ArrayOfDoublesSetOperationBuilder;
-import com.yahoo.sketches.tuple.ArrayOfDoublesSketches;
-import com.yahoo.sketches.tuple.ArrayOfDoublesUnion;
-import com.yahoo.sketches.tuple.ArrayOfDoublesUpdatableSketchBuilder;
-
abstract class UnionArrayOfDoublesSketchBase extends EvalFunc<Tuple> implements Accumulator<Tuple> {
private final int sketchSize_;
private final int numValues_;
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/UnionDoubleSummarySketch.java b/src/main/java/org/apache/datasketches/pig/tuple/UnionDoubleSummarySketch.java
similarity index 67%
rename from src/main/java/com/yahoo/sketches/pig/tuple/UnionDoubleSummarySketch.java
rename to src/main/java/org/apache/datasketches/pig/tuple/UnionDoubleSummarySketch.java
index 6f6b389..61b03c5 100644
--- a/src/main/java/com/yahoo/sketches/pig/tuple/UnionDoubleSummarySketch.java
+++ b/src/main/java/org/apache/datasketches/pig/tuple/UnionDoubleSummarySketch.java
@@ -1,27 +1,42 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.tuple;
+package org.apache.datasketches.pig.tuple;
+import org.apache.datasketches.tuple.adouble.DoubleSummary;
+import org.apache.datasketches.tuple.adouble.DoubleSummaryDeserializer;
+import org.apache.datasketches.tuple.adouble.DoubleSummarySetOperations;
import org.apache.pig.Algebraic;
-import com.yahoo.sketches.tuple.adouble.DoubleSummary;
-import com.yahoo.sketches.tuple.adouble.DoubleSummaryDeserializer;
-import com.yahoo.sketches.tuple.adouble.DoubleSummarySetOperations;
-
/**
* This is to union Sketch<DoubleSummary>.
* It supports all three ways: exec(), Accumulator and Algebraic
*/
+@SuppressWarnings("javadoc")
public class UnionDoubleSummarySketch extends UnionSketch<DoubleSummary> implements Algebraic {
/**
* Constructor with default sketch size and default mode (sum)
*/
public UnionDoubleSummarySketch() {
- super(new DoubleSummarySetOperations(), new DoubleSummaryDeserializer());
+ super(new DoubleSummarySetOperations(DoubleSummary.Mode.Sum),
+ new DoubleSummaryDeserializer());
}
/**
@@ -29,7 +44,8 @@
* @param sketchSize String representation of sketch size
*/
public UnionDoubleSummarySketch(final String sketchSize) {
- super(Integer.parseInt(sketchSize), new DoubleSummarySetOperations(), new DoubleSummaryDeserializer());
+ super(Integer.parseInt(sketchSize),
+ new DoubleSummarySetOperations(DoubleSummary.Mode.Sum), new DoubleSummaryDeserializer());
}
/**
@@ -87,7 +103,7 @@
* Default sketch size and default mode.
*/
public IntermediateFinal() {
- super(new DoubleSummarySetOperations(), new DoubleSummaryDeserializer());
+ super(new DoubleSummarySetOperations(DoubleSummary.Mode.Sum), new DoubleSummaryDeserializer());
}
/**
@@ -96,7 +112,8 @@
* @param sketchSize String representation of sketch size
*/
public IntermediateFinal(final String sketchSize) {
- super(Integer.parseInt(sketchSize), new DoubleSummarySetOperations(), new DoubleSummaryDeserializer());
+ super(Integer.parseInt(sketchSize),
+ new DoubleSummarySetOperations(DoubleSummary.Mode.Sum), new DoubleSummaryDeserializer());
}
/**
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/UnionSketch.java b/src/main/java/org/apache/datasketches/pig/tuple/UnionSketch.java
similarity index 76%
rename from src/main/java/com/yahoo/sketches/pig/tuple/UnionSketch.java
rename to src/main/java/org/apache/datasketches/pig/tuple/UnionSketch.java
index b136c9d..1bba6fe 100644
--- a/src/main/java/com/yahoo/sketches/pig/tuple/UnionSketch.java
+++ b/src/main/java/org/apache/datasketches/pig/tuple/UnionSketch.java
@@ -1,14 +1,34 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.tuple;
+package org.apache.datasketches.pig.tuple;
-import static com.yahoo.sketches.Util.DEFAULT_NOMINAL_ENTRIES;
+import static org.apache.datasketches.Util.DEFAULT_NOMINAL_ENTRIES;
import java.io.IOException;
+import org.apache.datasketches.tuple.Sketch;
+import org.apache.datasketches.tuple.Sketches;
+import org.apache.datasketches.tuple.Summary;
+import org.apache.datasketches.tuple.SummaryDeserializer;
+import org.apache.datasketches.tuple.SummarySetOperations;
+import org.apache.datasketches.tuple.Union;
import org.apache.log4j.Logger;
import org.apache.pig.Accumulator;
import org.apache.pig.EvalFunc;
@@ -17,13 +37,6 @@
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
-import com.yahoo.sketches.tuple.Sketch;
-import com.yahoo.sketches.tuple.Sketches;
-import com.yahoo.sketches.tuple.Summary;
-import com.yahoo.sketches.tuple.SummaryDeserializer;
-import com.yahoo.sketches.tuple.SummarySetOperations;
-import com.yahoo.sketches.tuple.Union;
-
/**
* This is a generic implementation to be specialized in concrete UDFs
* @param <S> Summary type
diff --git a/src/main/java/com/yahoo/sketches/pig/tuple/UnionSketchAlgebraicIntermediateFinal.java b/src/main/java/org/apache/datasketches/pig/tuple/UnionSketchAlgebraicIntermediateFinal.java
similarity index 75%
rename from src/main/java/com/yahoo/sketches/pig/tuple/UnionSketchAlgebraicIntermediateFinal.java
rename to src/main/java/org/apache/datasketches/pig/tuple/UnionSketchAlgebraicIntermediateFinal.java
index c545254..655dd71 100644
--- a/src/main/java/com/yahoo/sketches/pig/tuple/UnionSketchAlgebraicIntermediateFinal.java
+++ b/src/main/java/org/apache/datasketches/pig/tuple/UnionSketchAlgebraicIntermediateFinal.java
@@ -1,26 +1,39 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.tuple;
+package org.apache.datasketches.pig.tuple;
-import static com.yahoo.sketches.Util.DEFAULT_NOMINAL_ENTRIES;
+import static org.apache.datasketches.Util.DEFAULT_NOMINAL_ENTRIES;
import java.io.IOException;
+import org.apache.datasketches.tuple.Sketch;
+import org.apache.datasketches.tuple.Summary;
+import org.apache.datasketches.tuple.SummaryDeserializer;
+import org.apache.datasketches.tuple.SummarySetOperations;
+import org.apache.datasketches.tuple.Union;
import org.apache.log4j.Logger;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
-import com.yahoo.sketches.tuple.Sketch;
-import com.yahoo.sketches.tuple.Summary;
-import com.yahoo.sketches.tuple.SummaryDeserializer;
-import com.yahoo.sketches.tuple.SummarySetOperations;
-import com.yahoo.sketches.tuple.Union;
-
/**
* This is to calculate the intermediate pass (combiner) or the final pass
* (reducer) of an Algebraic sketch operation. This may be called multiple times
diff --git a/src/main/java/org/apache/datasketches/pig/tuple/Util.java b/src/main/java/org/apache/datasketches/pig/tuple/Util.java
new file mode 100644
index 0000000..0717ce1
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/tuple/Util.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.tuple;
+
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.tuple.Sketch;
+import org.apache.datasketches.tuple.Sketches;
+import org.apache.datasketches.tuple.Summary;
+import org.apache.datasketches.tuple.SummaryDeserializer;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+
+final class Util {
+
+ static final TupleFactory tupleFactory = TupleFactory.getInstance();
+
+ static Tuple doubleArrayToTuple(final double[] array) throws ExecException {
+ final Tuple tuple = tupleFactory.newTuple(array.length);
+ for (int i = 0; i < array.length; i++) {
+ tuple.set(i, array[i]);
+ }
+ return tuple;
+ }
+
+ static <S extends Summary> Sketch<S> deserializeSketchFromTuple(final Tuple tuple,
+ final SummaryDeserializer<S> summaryDeserializer) throws ExecException {
+ final byte[] bytes = ((DataByteArray) tuple.get(0)).get();
+ return Sketches.heapifySketch(Memory.wrap(bytes), summaryDeserializer);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/pig/tuple/package-info.java b/src/main/java/org/apache/datasketches/pig/tuple/package-info.java
new file mode 100644
index 0000000..645912e
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/pig/tuple/package-info.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Pig UDFs for Tuple sketches.
+ * Tuple sketches are based on the idea of Theta sketches with the addition of
+ * values associated with unique keys.
+ * Two sets of tuple sketch classes are available at the moment:
+ * generic Tuple sketches with user-defined Summary, and a faster specialized
+ * implementation with an array of double values.
+ *
+ * <p>There are two sets of Pig UDFs: one for generic Tuple sketch with an example
+ * implementation for DoubleSummay, and another one for a specialized ArrayOfDoublesSketch.
+ *
+ * <p> The generic implementation is in the form of abstract classes DataToSketch and
+ * UnionSketch to be specialized for particular types of Summary.
+ * An example implementation for DoubleSumamry is provided: DataToDoubleSummarySketch and
+ * UnionDoubleSummarySketch, as well as UDFs to obtain the results from sketches:
+ * DoubleSumamrySketchToEstimates and DoubleSummarySketchToPercentile.
+ *
+ * <p>UDFs for ArrayOfDoublesSketch: DataToArrayOfDoublesSketch, UnionArrayOfDoublesSketch,
+ * ArrayOfDoublesSketchToEstimates.
+ *
+ * @author Alexander Saydakov
+ */
+package org.apache.datasketches.pig.tuple;
diff --git a/src/main/java/org/apache/datasketches/sampling/SamplingPigUtil.java b/src/main/java/org/apache/datasketches/sampling/SamplingPigUtil.java
new file mode 100644
index 0000000..3f8e856
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/sampling/SamplingPigUtil.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.sampling;
+
+import java.util.ArrayList;
+
+/**
+ * @author Jon Malkin
+ */
+@SuppressWarnings("javadoc")
+public final class SamplingPigUtil {
+
+ public static <T> ArrayList<T> getRawSamplesAsList(final ReservoirItemsSketch<T> sketch) {
+ return sketch.getRawSamplesAsList();
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/sampling/package-info.java b/src/main/java/org/apache/datasketches/sampling/package-info.java
new file mode 100644
index 0000000..d60e87e
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/sampling/package-info.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * The sampling package
+ */
+package org.apache.datasketches.sampling;
diff --git a/src/main/javadoc/overview.html b/src/main/javadoc/overview.html
index dbebd2c..0ed21b9 100644
--- a/src/main/javadoc/overview.html
+++ b/src/main/javadoc/overview.html
@@ -1,14 +1,32 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
<html>
<head>
</head>
<body>
-<h2>Sketching Pig Library</h2>
+<h2>Sketching Pig Adaptors</h2>
<h3>Overview</h3>
-<p>The Sketching Pig Library provides access to the theta sketches in sketches-core repository
+<p>The Sketching Pig Library provides access to the sketches in datasketches-java repository
from Hadoop Pig.
</p>
diff --git a/src/main/javadoc/resources/dictionary.html b/src/main/javadoc/resources/dictionary.html
index 7de8514..18004cc 100644
--- a/src/main/javadoc/resources/dictionary.html
+++ b/src/main/javadoc/resources/dictionary.html
@@ -1,4 +1,24 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN" "http://www.w3.org/TR/html4/frameset.dtd">
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN" "https://www.w3.org/TR/html4/frameset.dtd">
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
<html>
<head>
<title>Sketch Library Dictionary</title>
@@ -7,7 +27,7 @@
<h2>Sketch Library Dictionary</h2>
<h3><a name="accuracy">Sketch Accuracy</a></h3>
-About sketch accuracy...
+Refers to sketch accuracy...
<h3><a name="alphaTCF">Alpha TCF</a></h3>
The Alpha Theta Choosing Function (TCF) and the theory behind it is fully described in the
@@ -57,19 +77,24 @@
For the Theta Alpha Sketch, a retained hash value is considered <i>dirty</i> if it is ≥ <a href="#thetaLong">Theta Long</a> or < 0.
See <a href="#validHash">Valid Hash</a>.
-<h3><a name="empty">Empty</a></h3>
-In Theta Sketches, the state <i>Empty</i> for a sketch means that the sketch cache has zero hash values and that none of the
-update methods have been called with valid data. The internal <i>empty flag</i> is one of the state variables
-within the sketch and is accessed with the <i>isEmpty()</i> method, which is useful for rejecting empty
-sketches from a set operation.
+<h3><a name="empty">isEmpty()</a></h3>
+In Theta Sketches, the state <i>isEmpty()</i> for a sketch means that the sketch cache has zero hash values and that none of the
+update methods have been called with valid data. In other words, the sketch has never seen any data.
+This state is equivalent to "null" in the sense that it is safe to exclude empty sketches from set operations.
-<p>Note that <i>Empty</i> does not mean that theta is 1.0 because if <i>p</i> < 1.0, theta will be set
+<p>Note that <i>isEmpty()</i> does not mean that theta is 1.0 because if <i>p</i> < 1.0, theta will be set
equal to <i>p</i> during construction.
-Also, a cache of zero values does not mean that the sketch is <i>Empty</i> since set intersection or difference
-operations can result in a sketch with zero values but with a valid theta and upper and lower bounds.
+Also, a cache of zero values (<i>getRetainedEntries(true) = 0</i>) does not mean that the sketch is <i>Empty</i> since
+set intersection or difference operations can result in a sketch with zero values.
+If the sourcing sketches had seen data then a resulting intersection or difference sketch will be <i>not Empty</i>
+and have valid upper and lower bounds even if the cache has zero values. In other words, the resulting sketch represents
+a valid distribution of data that just happens to have zero samples collected from it.
-<p>These are subtle distinctions and exist for mathematical correctness. Excluding sketches that are <i>not Empty</i>
-from set operations that would otherwise include them could result in impacting the accuracy of results.
+<p>Note also that a virgin Intersection object will return <i>isEmpty() == false</i>. This is because a virgin Intersection object represents
+the Universe Set, which is clearly not empty.</p>
+
+<p>These are subtle distinctions and exist for mathematical correctness. Excluding sketches that just have <i>getRetainedEntries(true) = 0</i>
+from set operations them could result in impacting the accuracy of results.
<h3><a name="estMode">Estimation Mode</a></h3>
Once a Theta Sketch exceeds the configured <a href="#nomEntries">Nominal Entries</a>, or <i>k</i>, number of retained hash values,
@@ -104,30 +129,33 @@
Alpha Sketches this value is 512. Specifying a value less than this minimum value just results in the minimum value being used.
<h3><a name="numStdDev">Number of Standard Deviations</a></h3>
-This is usually a positive integer (1, 2, or 3) used in the getUpperBounds(int numStdDev) and
-getLowerBounds(int numStdDev) methods and usually represents the +/- deviation from the center of the
-Standard Normal Gaussian Distribution.<br>
-A value of 1 will produce 68.3% confidence bounds.<br>
-A value of 2 will produce 95.4% confidence bounds.<br>
-A value of 3 will produce 99.7% confidence bounds.<br>
+This is a positive number, which may be either an integer (1, 2, or 3) or a double ≤ 3.0.
+This value is used in the getUpperBounds(int numStdDev) and
+getLowerBounds(int numStdDev) methods and represents (theoretically) the +/- standard deviation from the center of the
+Standard Normal Gaussian Distribution. For example:
-<p>However, for Theta Sketches with very small counts (< 100) and (theta < 1.0) the error distribution of the sketch is no longer Gaussian
-and becomes quite asymmetric as the counts approach one.
-Nonetheless, special mathematical approximation methods are used to
-estimate the value of the upper and lower bounds such that they are accurate to within one percent of the estimation of the mean.
-See <a href="#accuracy">Sketch Accuracy</a>.
+<p>getUpperBound(1) returns the estimated quantile(0.841) of the distribution.<br>
+getLowerBound(1) returns the estimated quantile(0.158) of the distribution.<br>
+getUpperBound(2) returns the estimated quantile(0.977) of the distribution.<br>
+getLowerBound(2) returns the estimated quantile(0.023) of the distribution.<br>
+getUpperBound(3) returns the estimated quantile(0.9986) of the distribution.<br>
+getLowerBound(3) returns the estimated quantile(0.0013) of the distribution.<br>
+</p>
-<h3><a name="p">Sampling Probability <i>p</i></a></h3>
-For Theta Sketches, the uniform random pre-sketching sampling probability.
-Depending on the specific sketch, the constructor data type is <i>float</i> or <i>String</i>.
-Incoming hashed data values are sampled by this probability factor before being submitted to
-the sketching algorithm. For example, if <i>p</i> were set to 0.25, then on average, only one
-forth of the incoming values, selected uniformly and at random, would be evaluated by the
-sketching algorithm to be retained by the sketch.
-Its default value is 1.0 (no sampling).
-Its value must be in the range: 0 < p ≤ 1.0.
-<p>This mode is particularly useful when merging large numbers of
-<a href="#degenerateSketch">degenerate sketches</a>.
+<p>However, for sketches with small configured values of <i>Nominal Entries < 4096</i> for Theta or <i>lgConfigK < 12</i> for HLL,
+the error distribution of the sketch becomes quite asymmetric and cannot be approximated with a Gaussian. In these cases the interpretation of
+<i>numStdDev</i> is that of an index that returns the quantile of the sketch error distribution that corresponds to fractional normalized rank
+of the standard normal distribution at the specified <i>numStdDev</i>.
+
+<p>Thus, getUpperBound(1) and getLowerBound(2) represent the 68.3% confidence bounds,
+getUpperBound(2) and getLowerBound(2) represent the 95.4% confidence bounds, and
+getUpperBound(3) and getLowerBound(3) represent the 99.7% confidence bounds.
+<br>
+
+<p>For some sketches where the error distribution is not Gaussian, special mathematical approximation methods are used.
+See <a href="#accuracy">Sketch Accuracy</a>.</p>
+
+
<h3><a name="quickSelectTCF">Quick Select TCF</a></h3>
The fundamental Theta Sketch QuickSelect algorithm is described in classic algorithm texts by Sedgewick and
@@ -168,6 +196,18 @@
"X2" means the internal cache will start very small and double in size until the target size is reached.<br>
Similarly, "X4" is a factor of 4 and "X8 is a factor of 8.
+<h3><a name="p">Sampling Probability <i>p</i></a></h3>
+For Theta Sketches, the uniform random pre-sketching sampling probability.
+Depending on the specific sketch, the constructor data type is <i>float</i> or <i>String</i>.
+Incoming hashed data values are sampled by this probability factor before being submitted to
+the sketching algorithm. For example, if <i>p</i> were set to 0.25, then on average, only one
+forth of the incoming values, selected uniformly and at random, would be evaluated by the
+sketching algorithm to be retained by the sketch.
+Its default value is 1.0 (no sampling).
+Its value must be in the range: 0 < p ≤ 1.0.
+<p>This mode is particularly useful when merging large numbers of
+<a href="#degenerateSketch">degenerate sketches</a>.
+
<h3><a name="seed">Seed</a></h3>
For Theta Sketches, the long (64-bit) seed is required by the Update Hash Function.
This seed value is intentionally not serialized along with this sketch in order to provide
@@ -189,6 +229,13 @@
(1) that two sketches undergoing set operations were, in fact, created using matching <a href="#seed">Update Hash Seeds</a>;
or (2) that when deserializing or wrapping a sketch image that the caller has the correct seed.
+<h3><a name="SnowPlow">Snow Plow Effect</a></h3>
+When coordinated hash tables are merged and if the merging process does not update the target sketch with sufficient randomness, clustering
+in the target hash table can be greatly exaggerated causing poor speed performance for both updates and searches. This is called the
+"snowplow" effect because of the analogy of visualizing the clusters in a hash table as piles of snow that grow larger and larger. Since the
+size of the clusters are only represented by their width (not height like piles of snow), the clusters push themselves out horizontally and
+merge together as if they were pushed together with a snowplow.
+
<h3><a name="tcf">Theta Choosing Function (TCF)</a></h3>
For Theta Sketches, the Theta Choosing Function (TCF) and the theory behind it is fully described in the
<a href="https://github.com/DataSketches/DataSketches.github.io/blob/master/docs/pdf/ThetaSketchFramework.pdf">Theta Sketch Framework</a> paper.
diff --git a/src/main/javadoc/stylesheet.css b/src/main/javadoc/stylesheet.css
deleted file mode 100644
index 1892267..0000000
--- a/src/main/javadoc/stylesheet.css
+++ /dev/null
@@ -1,646 +0,0 @@
-/* Javadoc style sheet */
-/*
-Overall document style
-*/
-
-/*
-@import url('resources/fonts/dejavu.css');
-*/
-
-body {
- background-color:#ffffff;
- color:#353833;
- font-family:'DejaVu Sans', Arial, Helvetica, sans-serif;
- font-size:14px;
- margin:0;
-}
-a:link, a:visited {
- text-decoration:none;
- color:#4A6782;
-}
-a:hover, a:focus {
- text-decoration:none;
- color:#bb7a2a;
-}
-a:active {
- text-decoration:none;
- color:#4A6782;
-}
-a[name] {
- color:#353833;
-}
-a[name]:hover {
- text-decoration:none;
- color:#353833;
-}
-pre {
- font-family:'DejaVu Sans Mono', monospace;
- font-size:14px;
-}
-h1 {
- font-size:20px;
-}
-h2 {
- font-size:18px;
-}
-h3 {
- font-size:16px;
- font-style:italic;
-}
-h4 {
- font-size:13px;
-}
-h5 {
- font-size:12px;
-}
-h6 {
- font-size:11px;
-}
-ul {
- list-style-type:disc;
-}
-code, tt {
- font-family:'DejaVu Sans Mono', monospace;
- font-size:14px;
- padding-top:4px;
- margin-top:8px;
- line-height:1.4em;
-}
-dt code {
- font-family:'DejaVu Sans Mono', monospace;
- font-size:14px;
- padding-top:4px;
-}
-table tr td dt code {
- font-family:'DejaVu Sans Mono', monospace;
- font-size:14px;
- vertical-align:top;
- padding-top:4px;
-}
-sup {
- font-size:8px;
-}
-/*
-Document title and Copyright styles
-*/
-.clear {
- clear:both;
- height:0px;
- overflow:hidden;
-}
-.aboutLanguage {
- float:right;
- padding:0px 21px;
- font-size:11px;
- z-index:200;
- margin-top:-9px;
-}
-.legalCopy {
- margin-left:.5em;
-}
-.bar a, .bar a:link, .bar a:visited, .bar a:active {
- color:#FFFFFF;
- text-decoration:none;
-}
-.bar a:hover, .bar a:focus {
- color:#bb7a2a;
-}
-.tab {
- background-color:#0066FF;
- color:#ffffff;
- padding:8px;
- width:5em;
- font-weight:bold;
-}
-/*
-Navigation bar styles
-*/
-.bar {
- background-color:#4D7A97;
- color:#FFFFFF;
- padding:.8em .5em .4em .8em;
- height:auto;/*height:1.8em;*/
- font-size:11px;
- margin:0;
-}
-.topNav {
- background-color:#4D7A97;
- color:#FFFFFF;
- float:left;
- padding:0;
- width:100%;
- clear:right;
- height:2.8em;
- padding-top:10px;
- overflow:hidden;
- font-size:12px;
-}
-.bottomNav {
- margin-top:10px;
- background-color:#4D7A97;
- color:#FFFFFF;
- float:left;
- padding:0;
- width:100%;
- clear:right;
- height:2.8em;
- padding-top:10px;
- overflow:hidden;
- font-size:12px;
-}
-.subNav {
- background-color:#dee3e9;
- float:left;
- width:100%;
- overflow:hidden;
- font-size:12px;
-}
-.subNav div {
- clear:left;
- float:left;
- padding:0 0 5px 6px;
- text-transform:uppercase;
-}
-ul.navList, ul.subNavList {
- float:left;
- margin:0 25px 0 0;
- padding:0;
-}
-ul.navList li{
- list-style:none;
- float:left;
- padding: 5px 6px;
- text-transform:uppercase;
-}
-ul.subNavList li{
- list-style:none;
- float:left;
-}
-.topNav a:link, .topNav a:active, .topNav a:visited, .bottomNav a:link, .bottomNav a:active, .bottomNav a:visited {
- color:#FFFFFF;
- text-decoration:none;
- text-transform:uppercase;
-}
-.topNav a:hover, .bottomNav a:hover {
- text-decoration:none;
- color:#bb7a2a;
- text-transform:uppercase;
-}
-.navBarCell1Rev {
- background-color:#F8981D;
- color:#253441;
- margin: auto 5px;
-}
-.skipNav {
- position:absolute;
- top:auto;
- left:-9999px;
- overflow:hidden;
-}
-/*
-Page header and footer styles
-*/
-.header, .footer {
- clear:both;
- margin:0 20px;
- padding:5px 0 0 0;
-}
-.indexHeader {
- margin:10px;
- position:relative;
-}
-.indexHeader span{
- margin-right:15px;
-}
-.indexHeader h1 {
- font-size:13px;
-}
-.title {
- color:#2c4557;
- margin:10px 0;
-}
-.subTitle {
- margin:5px 0 0 0;
-}
-.header ul {
- margin:0 0 15px 0;
- padding:0;
-}
-.footer ul {
- margin:20px 0 5px 0;
-}
-.header ul li, .footer ul li {
- list-style:none;
- font-size:13px;
-}
-/*
-Heading styles
-*/
-div.details ul.blockList ul.blockList ul.blockList li.blockList h4, div.details ul.blockList ul.blockList ul.blockListLast li.blockList h4 {
- background-color:#dee3e9;
- border:1px solid #d0d9e0;
- margin:0 0 6px -8px;
- padding:7px 5px;
-}
-ul.blockList ul.blockList ul.blockList li.blockList h3 {
- background-color:#dee3e9;
- border:1px solid #d0d9e0;
- margin:0 0 6px -8px;
- padding:7px 5px;
-}
-ul.blockList ul.blockList li.blockList h3 {
- padding:0;
- margin:15px 0;
-}
-ul.blockList li.blockList h2 {
- padding:0px 0 20px 0;
-}
-/*
-Page layout container styles
-*/
-.contentContainer, .sourceContainer, .classUseContainer, .serializedFormContainer, .constantValuesContainer {
- clear:both;
- padding:10px 20px;
- position:relative;
-}
-.indexContainer {
- margin:10px;
- position:relative;
- font-size:12px;
-}
-.indexContainer h2 {
- font-size:13px;
- padding:0 0 3px 0;
-}
-.indexContainer ul {
- margin:0;
- padding:0;
-}
-.indexContainer ul li {
- list-style:none;
- padding-top:2px;
-}
-.contentContainer .description dl dt, .contentContainer .details dl dt, .serializedFormContainer dl dt {
- font-size:12px;
- font-weight:bold;
- margin:10px 0 0 0;
- color:#4E4E4E;
-}
-.contentContainer .description dl dd, .contentContainer .details dl dd, .serializedFormContainer dl dd {
- margin:5px 0 10px 0px;
- font-size:14px;
- font-family:'DejaVu Sans Mono',monospace;
-}
-.serializedFormContainer dl.nameValue dt {
- margin-left:1px;
- font-size:1.1em;
- display:inline;
- font-weight:bold;
-}
-.serializedFormContainer dl.nameValue dd {
- margin:0 0 0 1px;
- font-size:1.1em;
- display:inline;
-}
-/*
-List styles
-*/
-ul.horizontal li {
- display:inline;
- font-size:0.9em;
-}
-ul.inheritance {
- margin:0;
- padding:0;
-}
-ul.inheritance li {
- display:inline;
- list-style:none;
-}
-ul.inheritance li ul.inheritance {
- margin-left:15px;
- padding-left:15px;
- padding-top:1px;
-}
-ul.blockList, ul.blockListLast {
- margin:10px 0 10px 0;
- padding:0;
-}
-ul.blockList li.blockList, ul.blockListLast li.blockList {
- list-style:none;
- margin-bottom:15px;
- line-height:1.4;
-}
-ul.blockList ul.blockList li.blockList, ul.blockList ul.blockListLast li.blockList {
- padding:0px 20px 5px 10px;
- border:1px solid #ededed;
- background-color:#f8f8f8;
-}
-ul.blockList ul.blockList ul.blockList li.blockList, ul.blockList ul.blockList ul.blockListLast li.blockList {
- padding:0 0 5px 8px;
- background-color:#ffffff;
- border:none;
-}
-ul.blockList ul.blockList ul.blockList ul.blockList li.blockList {
- margin-left:0;
- padding-left:0;
- padding-bottom:15px;
- border:none;
-}
-ul.blockList ul.blockList ul.blockList ul.blockList li.blockListLast {
- list-style:none;
- border-bottom:none;
- padding-bottom:0;
-}
-table tr td dl, table tr td dl dt, table tr td dl dd {
- margin-top:0;
- margin-bottom:1px;
-}
-/*
-Table styles
-*/
-.overviewSummary, .memberSummary, .typeSummary, .useSummary, .constantsSummary, .deprecatedSummary {
- width:100%;
- border-left:1px solid #EEE;
- border-right:1px solid #EEE;
- border-bottom:1px solid #EEE;
-}
-.overviewSummary, .memberSummary {
- padding:0px;
-}
-.overviewSummary caption, .memberSummary caption, .typeSummary caption,
-.useSummary caption, .constantsSummary caption, .deprecatedSummary caption {
- position:relative;
- text-align:left;
- background-repeat:no-repeat;
- color:#253441;
- font-weight:bold;
- clear:none;
- overflow:hidden;
- padding:0px;
- padding-top:10px;
- padding-left:1px;
- margin:0px;
- white-space:pre;
-}
-.overviewSummary caption a:link, .memberSummary caption a:link, .typeSummary caption a:link,
-.useSummary caption a:link, .constantsSummary caption a:link, .deprecatedSummary caption a:link,
-.overviewSummary caption a:hover, .memberSummary caption a:hover, .typeSummary caption a:hover,
-.useSummary caption a:hover, .constantsSummary caption a:hover, .deprecatedSummary caption a:hover,
-.overviewSummary caption a:active, .memberSummary caption a:active, .typeSummary caption a:active,
-.useSummary caption a:active, .constantsSummary caption a:active, .deprecatedSummary caption a:active,
-.overviewSummary caption a:visited, .memberSummary caption a:visited, .typeSummary caption a:visited,
-.useSummary caption a:visited, .constantsSummary caption a:visited, .deprecatedSummary caption a:visited {
- color:#FFFFFF;
-}
-.overviewSummary caption span, .memberSummary caption span, .typeSummary caption span,
-.useSummary caption span, .constantsSummary caption span, .deprecatedSummary caption span {
- white-space:nowrap;
- padding-top:5px;
- padding-left:12px;
- padding-right:12px;
- padding-bottom:7px;
- display:inline-block;
- float:left;
- background-color:#F8981D;
- border: none;
- height:16px;
-}
-.memberSummary caption span.activeTableTab span {
- white-space:nowrap;
- padding-top:5px;
- padding-left:12px;
- padding-right:12px;
- margin-right:3px;
- display:inline-block;
- float:left;
- background-color:#F8981D;
- height:16px;
-}
-.memberSummary caption span.tableTab span {
- white-space:nowrap;
- padding-top:5px;
- padding-left:12px;
- padding-right:12px;
- margin-right:3px;
- display:inline-block;
- float:left;
- background-color:#4D7A97;
- height:16px;
-}
-.memberSummary caption span.tableTab, .memberSummary caption span.activeTableTab {
- padding-top:0px;
- padding-left:0px;
- padding-right:0px;
- background-image:none;
- float:none;
- display:inline;
-}
-.overviewSummary .tabEnd, .memberSummary .tabEnd, .typeSummary .tabEnd,
-.useSummary .tabEnd, .constantsSummary .tabEnd, .deprecatedSummary .tabEnd {
- display:none;
- width:5px;
- position:relative;
- float:left;
- background-color:#F8981D;
-}
-.memberSummary .activeTableTab .tabEnd {
- display:none;
- width:5px;
- margin-right:3px;
- position:relative;
- float:left;
- background-color:#F8981D;
-}
-.memberSummary .tableTab .tabEnd {
- display:none;
- width:5px;
- margin-right:3px;
- position:relative;
- background-color:#4D7A97;
- float:left;
-
-}
-.overviewSummary td, .memberSummary td, .typeSummary td,
-.useSummary td, .constantsSummary td, .deprecatedSummary td {
- text-align:left;
- padding:0px 0px 12px 10px;
-}
-th.colOne, th.colFirst, th.colLast, .useSummary th, .constantsSummary th,
-td.colOne, td.colFirst, td.colLast, .useSummary td, .constantsSummary td{
- vertical-align:top;
- padding-right:0px;
- padding-top:8px;
- padding-bottom:3px;
-}
-th.colFirst, th.colLast, th.colOne, .constantsSummary th {
- background:#dee3e9;
- text-align:left;
- padding:8px 3px 3px 7px;
-}
-td.colFirst, th.colFirst {
- white-space:nowrap;
- font-size:13px;
-}
-td.colLast, th.colLast {
- font-size:13px;
-}
-td.colOne, th.colOne {
- font-size:13px;
-}
-.overviewSummary td.colFirst, .overviewSummary th.colFirst,
-.useSummary td.colFirst, .useSummary th.colFirst,
-.overviewSummary td.colOne, .overviewSummary th.colOne,
-.memberSummary td.colFirst, .memberSummary th.colFirst,
-.memberSummary td.colOne, .memberSummary th.colOne,
-.typeSummary td.colFirst{
- width:25%;
- vertical-align:top;
-}
-td.colOne a:link, td.colOne a:active, td.colOne a:visited, td.colOne a:hover, td.colFirst a:link, td.colFirst a:active, td.colFirst a:visited, td.colFirst a:hover, td.colLast a:link, td.colLast a:active, td.colLast a:visited, td.colLast a:hover, .constantValuesContainer td a:link, .constantValuesContainer td a:active, .constantValuesContainer td a:visited, .constantValuesContainer td a:hover {
- font-weight:bold;
-}
-.tableSubHeadingColor {
- background-color:#EEEEFF;
-}
-.altColor {
- background-color:#FFFFFF;
-}
-.rowColor {
- background-color:#EEEEEF;
-}
-/*
-Content styles
-*/
-.description pre {
- margin-top:0;
-}
-.deprecatedContent {
- margin:0;
- padding:10px 0;
-}
-.docSummary {
- padding:0;
-}
-
-ul.blockList ul.blockList ul.blockList li.blockList h3 {
- font-style:normal;
-}
-
-div.block {
- font-size:14px;
- font-family:'DejaVu Serif', Georgia, "Times New Roman", Times, serif;
-}
-
-td.colLast div {
- padding-top:0px;
-}
-
-
-td.colLast a {
- padding-bottom:3px;
-}
-/*
-Formatting effect styles
-*/
-.sourceLineNo {
- color:green;
- padding:0 30px 0 0;
-}
-h1.hidden {
- visibility:hidden;
- overflow:hidden;
- font-size:10px;
-}
-.block {
- display:block;
- margin:3px 10px 2px 0px;
- color:#474747;
-}
-.deprecatedLabel, .descfrmTypeLabel, .memberNameLabel, .memberNameLink,
-.overrideSpecifyLabel, .packageHierarchyLabel, .paramLabel, .returnLabel,
-.seeLabel, .simpleTagLabel, .throwsLabel, .typeNameLabel, .typeNameLink {
- font-weight:bold;
-}
-.deprecationComment, .emphasizedPhrase, .interfaceName {
- font-style:italic;
-}
-
-div.block div.block span.deprecationComment, div.block div.block span.emphasizedPhrase,
-div.block div.block span.interfaceName {
- font-style:normal;
-}
-
-div.contentContainer ul.blockList li.blockList h2{
- padding-bottom:0px;
-}
-
-/*
-Overrides
-@import "stylesheetOrig.css";
-*/
-
-
-
-body {
- font-family: Arial, Helvetica, sans-serif;
- font-size: 12px;
-}
-pre {
- font-family: monospace;
- font-size: 12px;
-}
-code, tt, dt code, table tr td dt code {
- font-family: monospace;
- font-size: 12px;
-}
-.contentContainer .description dl dt, .contentContainer .details dl dt, .serializedFormContainer dl dt {
- font-size: 13px;
-}
-.contentContainer .description dl dd, .contentContainer .details dl dd, .serializedFormContainer dl dd {
- margin-left: 20px;
- font-size: 12px;
- font-family: inherit;
-}
-div.block {
- font-size: 12px;
- font-family: inherit;
-}
-h4 {
- font-size: 15px;
-}
-.memberSummary caption {
- padding-top: 0;
-}
-div.summary th {
- border: 1px solid #9eadc0;
-}
-div.summary td {
- border-left: 1px solid #9eadc0;
- border-right: 1px solid #9eadc0;
-}
-div.summary th.colFirst,
-div.summary td.colFirst {
- border-right: none;
-}
-div.summary th.colLast,
-div.summary td.colLast {
- border-left: none;
-}
-div.summary table {
- border-bottom: 1px solid #9eadc0;
- margin-bottom: 15px;
-}
-div.summary ul.blockList ul.blockList ul.blockList {
- margin-top: 20px;
-}
-ul.blockList ul.blockList li.blockList,
-ul.blockList ul.blockList ul.blockList li.blockList,
-ul.blockList ul.blockList ul.blockListLast li.blockList {
- border: 1px solid #9eadc0;
-}
-div.summary ul.blockList ul.blockList ul.blockList li.blockList h3,
-div.details ul.blockList ul.blockList ul.blockList li.blockList h4,
-div.details ul.blockList ul.blockList ul.blockListLast li.blockList h4 {
- border-bottom: 1px solid #9eadc0;
-}
diff --git a/src/test/java/com/yahoo/sketches/pig/hll/SketchToEstimateTest.java b/src/test/java/com/yahoo/sketches/pig/hll/SketchToEstimateTest.java
deleted file mode 100644
index 71b5d45..0000000
--- a/src/test/java/com/yahoo/sketches/pig/hll/SketchToEstimateTest.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.hll;
-
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.DataByteArray;
-import org.apache.pig.data.TupleFactory;
-import org.testng.Assert;
-import org.testng.annotations.Test;
-
-import com.yahoo.sketches.hll.HllSketch;
-
-public class SketchToEstimateTest {
-
- private static final TupleFactory tupleFactory = TupleFactory.getInstance();
-
- @Test
- public void nullInputTuple() throws Exception {
- EvalFunc<Double> func = new SketchToEstimate();
- Double result = func.exec(null);
- Assert.assertNull(result);
- }
-
- @Test
- public void emptyInputTuple() throws Exception {
- EvalFunc<Double> func = new SketchToEstimate();
- Double result = func.exec(tupleFactory.newTuple());
- Assert.assertNull(result);
- }
-
- @Test
- public void normalCase() throws Exception {
- EvalFunc<Double> func = new SketchToEstimate();
- HllSketch sketch = new HllSketch(12);
- sketch.update(1);
- sketch.update(2);
- Double result = func.exec(tupleFactory.newTuple(new DataByteArray(sketch.toCompactByteArray())));
- Assert.assertNotNull(result);
- Assert.assertEquals(result, 2.0, 0.01);
- }
-
-}
diff --git a/src/test/java/com/yahoo/sketches/pig/hll/SketchToStringTest.java b/src/test/java/com/yahoo/sketches/pig/hll/SketchToStringTest.java
deleted file mode 100644
index 0a4db90..0000000
--- a/src/test/java/com/yahoo/sketches/pig/hll/SketchToStringTest.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.hll;
-
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.DataByteArray;
-import org.apache.pig.data.TupleFactory;
-import org.testng.Assert;
-import org.testng.annotations.Test;
-
-import com.yahoo.sketches.hll.HllSketch;
-
-public class SketchToStringTest {
-
- private static final TupleFactory tupleFactory = TupleFactory.getInstance();
-
- @Test
- public void nullInputTuple() throws Exception {
- EvalFunc<String> func = new SketchToString();
- String result = func.exec(null);
- Assert.assertNull(result);
- }
-
- @Test
- public void emptyInputTuple() throws Exception {
- EvalFunc<String> func = new SketchToString();
- String result = func.exec(tupleFactory.newTuple());
- Assert.assertNull(result);
- }
-
- @Test
- public void normalCase() throws Exception {
- EvalFunc<String> func = new SketchToString();
- HllSketch sketch = new HllSketch(12);
- String result = func.exec(tupleFactory.newTuple(new DataByteArray(sketch.toCompactByteArray())));
- Assert.assertNotNull(result);
- Assert.assertTrue(result.length() > 0);
- }
-
-}
diff --git a/src/test/java/com/yahoo/sketches/pig/kll/SketchToStringTest.java b/src/test/java/com/yahoo/sketches/pig/kll/SketchToStringTest.java
deleted file mode 100644
index 62e1d09..0000000
--- a/src/test/java/com/yahoo/sketches/pig/kll/SketchToStringTest.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-
-package com.yahoo.sketches.pig.kll;
-
-import java.util.Arrays;
-
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.DataByteArray;
-import org.apache.pig.data.TupleFactory;
-
-import org.testng.annotations.Test;
-
-import com.yahoo.sketches.kll.KllFloatsSketch;
-
-import org.testng.Assert;
-
-public class SketchToStringTest {
- private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
-
- @Test
- public void normalCase() throws Exception {
- final EvalFunc<String> func = new SketchToString();
- final KllFloatsSketch sketch = new KllFloatsSketch();
- final String result = func.exec(TUPLE_FACTORY.newTuple(Arrays.asList(new DataByteArray(sketch.toByteArray()))));
- Assert.assertNotNull(result);
- }
-
- @Test(expectedExceptions = IllegalArgumentException.class)
- public void noInputs() throws Exception {
- final EvalFunc<String> func = new SketchToString();
- func.exec(TUPLE_FACTORY.newTuple());
- }
-
- @Test(expectedExceptions = IllegalArgumentException.class)
- public void tooManyInputs() throws Exception {
- final EvalFunc<String> func = new SketchToString();
- func.exec(TUPLE_FACTORY.newTuple(2));
- }
-
- @Test(expectedExceptions = IllegalArgumentException.class)
- public void wrongTypeForSketch() throws Exception {
- final EvalFunc<String> func = new SketchToString();
- func.exec(TUPLE_FACTORY.newTuple(Arrays.asList(1.0)));
- }
-
-}
diff --git a/src/test/java/com/yahoo/sketches/pig/theta/EstimateTest.java b/src/test/java/com/yahoo/sketches/pig/theta/EstimateTest.java
deleted file mode 100644
index 4991866..0000000
--- a/src/test/java/com/yahoo/sketches/pig/theta/EstimateTest.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-package com.yahoo.sketches.pig.theta;
-
-import static com.yahoo.sketches.Util.DEFAULT_UPDATE_SEED;
-import static com.yahoo.sketches.pig.PigTestingUtil.createDbaFromQssRange;
-import static org.testng.Assert.assertEquals;
-import static org.testng.Assert.assertNotNull;
-import static org.testng.Assert.assertNull;
-
-import java.io.IOException;
-
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.data.TupleFactory;
-import org.testng.annotations.Test;
-
-import com.yahoo.sketches.pig.theta.Estimate;
-
-/**
- * @author Lee Rhodes
- */
-public class EstimateTest {
-
- @Test
- public void testNullEmpty() throws IOException {
- EvalFunc<Double> func = new Estimate();
- Tuple inputTuple = null;
- Double returnValue = func.exec(inputTuple);
- assertNull(returnValue);
-
- inputTuple = TupleFactory.getInstance().newTuple(0);
- returnValue = func.exec(inputTuple);
- assertNull(returnValue);
- }
-
- @Test
- public void testExact() throws IOException {
- EvalFunc<Double> func = new Estimate();
-
- Tuple dataTuple = TupleFactory.getInstance().newTuple(1);
- dataTuple.set(0, createDbaFromQssRange(64, 0, 64));
-
- Double result = func.exec(dataTuple);
- assertNotNull(result);
- assertEquals(result, 64.0, 0.0);
- }
-
- @Test
- public void testExactWithSeed() throws IOException {
- EvalFunc<Double> func = new Estimate(Long.toString(DEFAULT_UPDATE_SEED));
-
- Tuple dataTuple = TupleFactory.getInstance().newTuple(1);
- dataTuple.set(0, createDbaFromQssRange(64, 0, 64));
-
- Double result = func.exec(dataTuple);
- assertNotNull(result);
- assertEquals(result, 64.0, 0.0);
- }
-
- @Test
- public void printlnTest() {
- println(this.getClass().getSimpleName());
- }
-
- /**
- * @param s value to print
- */
- static void println(String s) {
- //System.out.println(s); //disable here
- }
-
-}
diff --git a/src/test/java/com/yahoo/sketches/pig/theta/PigUtilTest.java b/src/test/java/com/yahoo/sketches/pig/theta/PigUtilTest.java
deleted file mode 100644
index 4e3a270..0000000
--- a/src/test/java/com/yahoo/sketches/pig/theta/PigUtilTest.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-package com.yahoo.sketches.pig.theta;
-
-import static com.yahoo.sketches.pig.theta.PigUtil.compactOrderedSketchToTuple;
-import static com.yahoo.sketches.pig.theta.PigUtil.extractTypeAtIndex;
-import static org.testng.Assert.assertNull;
-
-import org.apache.pig.data.Tuple;
-import org.apache.pig.data.TupleFactory;
-import org.testng.annotations.Test;
-
-import com.yahoo.sketches.theta.CompactSketch;
-import com.yahoo.sketches.theta.UpdateSketch;
-
-/**
- * @author Lee Rhodes
- */
-public class PigUtilTest {
-
- @Test(expectedExceptions = IllegalArgumentException.class)
- public void checkCompOrdSketchToTuple() {
- UpdateSketch usk = UpdateSketch.builder().setNominalEntries(16).build();
- for (int i=0; i<16; i++) usk.update(i);
- CompactSketch csk = usk.compact(false, null);
- compactOrderedSketchToTuple(csk);
- }
-
- @Test
- public void checkExtractTypeAtIndex() {
- Tuple tuple = TupleFactory.getInstance().newTuple(0);
- assertNull(extractTypeAtIndex(tuple, 0));
- }
-
- @Test
- public void printlnTest() {
- println(this.getClass().getSimpleName());
- }
-
- /**
- * @param s value to print
- */
- static void println(String s) {
- //System.out.println(s); //disable here
- }
-
-}
diff --git a/src/test/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToEstimatesTest.java b/src/test/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToEstimatesTest.java
deleted file mode 100644
index d8e9fb9..0000000
--- a/src/test/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToEstimatesTest.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-package com.yahoo.sketches.pig.tuple;
-
-import org.testng.annotations.Test;
-import org.testng.Assert;
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.DataByteArray;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.data.TupleFactory;
-
-import com.yahoo.sketches.tuple.UpdatableSketch;
-import com.yahoo.sketches.tuple.UpdatableSketchBuilder;
-import com.yahoo.sketches.tuple.adouble.DoubleSummary;
-import com.yahoo.sketches.tuple.adouble.DoubleSummaryFactory;
-
-public class DoubleSummarySketchToEstimatesTest {
- @Test
- public void nullInput() throws Exception {
- EvalFunc<Tuple> func = new DoubleSummarySketchToEstimates();
- Tuple resultTuple = func.exec(null);
- Assert.assertNull(resultTuple);
- }
-
- @Test
- public void emptyInput() throws Exception {
- EvalFunc<Tuple> func = new DoubleSummarySketchToEstimates();
- Tuple resultTuple = func.exec(TupleFactory.getInstance().newTuple());
- Assert.assertNull(resultTuple);
- }
-
- @Test
- public void emptySketch() throws Exception {
- EvalFunc<Tuple> func = new DoubleSummarySketchToEstimates();
- UpdatableSketch<Double, DoubleSummary> sketch = new UpdatableSketchBuilder<Double, DoubleSummary>(new DoubleSummaryFactory()).build();
- Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.compact().toByteArray()));
- Tuple resultTuple = func.exec(inputTuple);
- Assert.assertNotNull(resultTuple);
- Assert.assertEquals(resultTuple.size(), 2);
- Assert.assertEquals(resultTuple.get(0), 0.0);
- Assert.assertEquals(resultTuple.get(0), 0.0);
- }
-
- @Test
- public void normalCase() throws Exception {
- EvalFunc<Tuple> func = new DoubleSummarySketchToEstimates();
- UpdatableSketch<Double, DoubleSummary> sketch = new UpdatableSketchBuilder<Double, DoubleSummary>(new DoubleSummaryFactory()).build();
- int iterations = 100000;
- for (int i = 0; i < iterations; i++) sketch.update(i, 1.0);
- for (int i = 0; i < iterations; i++) sketch.update(i, 1.0);
- Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.compact().toByteArray()));
- Tuple resultTuple = func.exec(inputTuple);
- Assert.assertNotNull(resultTuple);
- Assert.assertEquals(resultTuple.size(), 2);
- Assert.assertEquals((double) resultTuple.get(0), iterations, iterations * 0.03);
- Assert.assertEquals((double) resultTuple.get(1), 2 * iterations, 2 * iterations * 0.03);
- }
-}
diff --git a/src/test/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToPercentileTest.java b/src/test/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToPercentileTest.java
deleted file mode 100644
index 3738b22..0000000
--- a/src/test/java/com/yahoo/sketches/pig/tuple/DoubleSummarySketchToPercentileTest.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
- */
-package com.yahoo.sketches.pig.tuple;
-
-import org.testng.annotations.Test;
-import org.testng.Assert;
-
-import java.util.Arrays;
-
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.DataByteArray;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.data.TupleFactory;
-
-import com.yahoo.sketches.tuple.UpdatableSketch;
-import com.yahoo.sketches.tuple.UpdatableSketchBuilder;
-import com.yahoo.sketches.tuple.adouble.DoubleSummary;
-import com.yahoo.sketches.tuple.adouble.DoubleSummaryFactory;
-
-public class DoubleSummarySketchToPercentileTest {
- @Test
- public void emptySketch() throws Exception {
- EvalFunc<Double> func = new DoubleSummarySketchToPercentile();
- UpdatableSketch<Double, DoubleSummary> sketch = new UpdatableSketchBuilder<Double, DoubleSummary>(new DoubleSummaryFactory()).build();
- Tuple inputTuple = TupleFactory.getInstance().newTuple(Arrays.asList(new DataByteArray(sketch.compact().toByteArray()), 0.0));
- double result = func.exec(inputTuple);
- Assert.assertEquals(result, Double.NaN);
- }
-
- @Test
- public void normalCase() throws Exception {
- EvalFunc<Double> func = new DoubleSummarySketchToPercentile();
- UpdatableSketch<Double, DoubleSummary> sketch = new UpdatableSketchBuilder<Double, DoubleSummary>(new DoubleSummaryFactory()).build();
- int iterations = 100000;
- for (int i = 0; i < iterations; i++) sketch.update(i, (double) i);
- for (int i = 0; i < iterations; i++) sketch.update(i, (double) i);
- Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.compact().toByteArray()), 50.0);
- double result = func.exec(inputTuple);
- Assert.assertEquals(result, iterations, iterations * 0.02);
- }
-
- @Test(expectedExceptions = IllegalArgumentException.class)
- public void wrongNumberOfInputs() throws Exception {
- EvalFunc<Double> func = new DoubleSummarySketchToPercentile();
- func.exec(PigUtil.objectsToTuple(1.0));
- }
-
- @Test(expectedExceptions = IllegalArgumentException.class)
- public void percentileOutOfRange() throws Exception {
- EvalFunc<Double> func = new DoubleSummarySketchToPercentile();
- UpdatableSketch<Double, DoubleSummary> sketch = new UpdatableSketchBuilder<Double, DoubleSummary>(new DoubleSummaryFactory()).build();
- func.exec(PigUtil.objectsToTuple(new DataByteArray(sketch.compact().toByteArray()), 200.0));
- }
-}
diff --git a/src/test/java/com/yahoo/sketches/pig/PigTestingUtil.java b/src/test/java/org/apache/datasketches/pig/PigTestingUtil.java
similarity index 66%
rename from src/test/java/com/yahoo/sketches/pig/PigTestingUtil.java
rename to src/test/java/org/apache/datasketches/pig/PigTestingUtil.java
index f17d6d4..5a3af0a 100644
--- a/src/test/java/com/yahoo/sketches/pig/PigTestingUtil.java
+++ b/src/test/java/org/apache/datasketches/pig/PigTestingUtil.java
@@ -1,27 +1,40 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig;
+
+package org.apache.datasketches.pig;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
-import com.yahoo.sketches.Family;
-import com.yahoo.sketches.theta.UpdateSketch;
+import org.apache.datasketches.Family;
+import org.apache.datasketches.theta.UpdateSketch;
-/**
- * @author Lee Rhodes
- */
+@SuppressWarnings("javadoc")
public class PigTestingUtil {
public static final String LS = System.getProperty("line.separator");
-
-
+
+
/**
* Returns a tuple constructed from the given array of objects.
- *
+ *
* @param in Array of objects.
* @throws ExecException this is thrown by Pig
* @return tuple
@@ -34,11 +47,11 @@
}
return tuple;
}
-
+
/**
* Returns a Pig DataByteArray constructed from a QuickSelectSketch.
- *
- * @param nomSize of the Sketch. Note, minimum size is 16.
+ *
+ * @param nomSize of the Sketch. Note, minimum size is 16.
* Cache size will autoscale from a minimum of 16.
* @param start start value
* @param numValues number of values in the range
@@ -52,11 +65,11 @@
byte[] byteArr = skA.compact(true, null).toByteArray();
return new DataByteArray(byteArr);
}
-
+
/**
* Returns a Pig DataByteArray constructed from a AlphaSketch.
- *
- * @param nomSize of the Sketch. Note, minimum nominal size is 512.
+ *
+ * @param nomSize of the Sketch. Note, minimum nominal size is 512.
* Cache size will autoscale from a minimum of 512.
* @param start start value
* @param numValues number of values in the range
@@ -71,12 +84,12 @@
byte[] byteArr = skA.compact(true, null).toByteArray();
return new DataByteArray(byteArr);
}
-
+
/**
- * @param s value to print
+ * @param s value to print
*/
static void println(String s) {
//System.out.println(s);
}
-
+
}
diff --git a/src/test/java/com/yahoo/sketches/pig/cpc/DataToSketchTest.java b/src/test/java/org/apache/datasketches/pig/cpc/DataToSketchTest.java
similarity index 92%
rename from src/test/java/com/yahoo/sketches/pig/cpc/DataToSketchTest.java
rename to src/test/java/org/apache/datasketches/pig/cpc/DataToSketchTest.java
index 4ee2ac0..b18bbf6 100644
--- a/src/test/java/com/yahoo/sketches/pig/cpc/DataToSketchTest.java
+++ b/src/test/java/org/apache/datasketches/pig/cpc/DataToSketchTest.java
@@ -1,11 +1,25 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.cpc;
+package org.apache.datasketches.pig.cpc;
-import static com.yahoo.sketches.Util.DEFAULT_UPDATE_SEED;
+import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
import org.apache.pig.Accumulator;
import org.apache.pig.EvalFunc;
@@ -17,8 +31,9 @@
import org.testng.Assert;
import org.testng.annotations.Test;
-import com.yahoo.sketches.cpc.CpcSketch;
+import org.apache.datasketches.cpc.CpcSketch;
+@SuppressWarnings("javadoc")
public class DataToSketchTest {
private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
@@ -320,7 +335,7 @@
static CpcSketch getSketch(final DataByteArray dba, final long seed) throws Exception {
Assert.assertNotNull(dba);
Assert.assertTrue(dba.size() > 0);
- return CpcSketch.heapify(dba.get(), seed);
+ return CpcSketch.heapify(dba.get(), seed);
}
}
diff --git a/src/test/java/com/yahoo/sketches/pig/cpc/GetEstimateAndErrorBoundsTest.java b/src/test/java/org/apache/datasketches/pig/cpc/GetEstimateAndErrorBoundsTest.java
similarity index 77%
rename from src/test/java/com/yahoo/sketches/pig/cpc/GetEstimateAndErrorBoundsTest.java
rename to src/test/java/org/apache/datasketches/pig/cpc/GetEstimateAndErrorBoundsTest.java
index de92ba3..66036b1 100644
--- a/src/test/java/com/yahoo/sketches/pig/cpc/GetEstimateAndErrorBoundsTest.java
+++ b/src/test/java/org/apache/datasketches/pig/cpc/GetEstimateAndErrorBoundsTest.java
@@ -1,9 +1,23 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.cpc;
+package org.apache.datasketches.pig.cpc;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
@@ -15,8 +29,9 @@
import org.testng.Assert;
import org.testng.annotations.Test;
-import com.yahoo.sketches.cpc.CpcSketch;
+import org.apache.datasketches.cpc.CpcSketch;
+@SuppressWarnings("javadoc")
public class GetEstimateAndErrorBoundsTest {
private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
diff --git a/src/test/java/com/yahoo/sketches/pig/cpc/GetEstimateTest.java b/src/test/java/org/apache/datasketches/pig/cpc/GetEstimateTest.java
similarity index 61%
rename from src/test/java/com/yahoo/sketches/pig/cpc/GetEstimateTest.java
rename to src/test/java/org/apache/datasketches/pig/cpc/GetEstimateTest.java
index 2563b89..e889c4e 100644
--- a/src/test/java/com/yahoo/sketches/pig/cpc/GetEstimateTest.java
+++ b/src/test/java/org/apache/datasketches/pig/cpc/GetEstimateTest.java
@@ -1,9 +1,23 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.cpc;
+package org.apache.datasketches.pig.cpc;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
@@ -11,8 +25,9 @@
import org.testng.Assert;
import org.testng.annotations.Test;
-import com.yahoo.sketches.cpc.CpcSketch;
+import org.apache.datasketches.cpc.CpcSketch;
+@SuppressWarnings("javadoc")
public class GetEstimateTest {
private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
diff --git a/src/test/java/com/yahoo/sketches/pig/cpc/SketchToStringTest.java b/src/test/java/org/apache/datasketches/pig/cpc/SketchToStringTest.java
similarity index 66%
rename from src/test/java/com/yahoo/sketches/pig/cpc/SketchToStringTest.java
rename to src/test/java/org/apache/datasketches/pig/cpc/SketchToStringTest.java
index 17f308b..eb0a5fe 100644
--- a/src/test/java/com/yahoo/sketches/pig/cpc/SketchToStringTest.java
+++ b/src/test/java/org/apache/datasketches/pig/cpc/SketchToStringTest.java
@@ -1,9 +1,23 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.cpc;
+package org.apache.datasketches.pig.cpc;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
@@ -11,8 +25,9 @@
import org.testng.Assert;
import org.testng.annotations.Test;
-import com.yahoo.sketches.cpc.CpcSketch;
+import org.apache.datasketches.cpc.CpcSketch;
+@SuppressWarnings("javadoc")
public class SketchToStringTest {
private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
diff --git a/src/test/java/com/yahoo/sketches/pig/cpc/UnionSketchTest.java b/src/test/java/org/apache/datasketches/pig/cpc/UnionSketchTest.java
similarity index 92%
rename from src/test/java/com/yahoo/sketches/pig/cpc/UnionSketchTest.java
rename to src/test/java/org/apache/datasketches/pig/cpc/UnionSketchTest.java
index 28cb96a..55e8b78 100644
--- a/src/test/java/com/yahoo/sketches/pig/cpc/UnionSketchTest.java
+++ b/src/test/java/org/apache/datasketches/pig/cpc/UnionSketchTest.java
@@ -1,9 +1,23 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.cpc;
+package org.apache.datasketches.pig.cpc;
import org.apache.pig.Accumulator;
import org.apache.pig.EvalFunc;
@@ -15,8 +29,9 @@
import org.testng.Assert;
import org.testng.annotations.Test;
-import com.yahoo.sketches.cpc.CpcSketch;
+import org.apache.datasketches.cpc.CpcSketch;
+@SuppressWarnings("javadoc")
public class UnionSketchTest {
private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
diff --git a/src/test/java/com/yahoo/sketches/pig/frequencies/DataToFrequentStringsSketchTest.java b/src/test/java/org/apache/datasketches/pig/frequencies/DataToFrequentStringsSketchTest.java
similarity index 86%
rename from src/test/java/com/yahoo/sketches/pig/frequencies/DataToFrequentStringsSketchTest.java
rename to src/test/java/org/apache/datasketches/pig/frequencies/DataToFrequentStringsSketchTest.java
index 14139c9..63e4052 100644
--- a/src/test/java/com/yahoo/sketches/pig/frequencies/DataToFrequentStringsSketchTest.java
+++ b/src/test/java/org/apache/datasketches/pig/frequencies/DataToFrequentStringsSketchTest.java
@@ -1,8 +1,23 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.frequencies;
+
+package org.apache.datasketches.pig.frequencies;
import org.apache.pig.Accumulator;
import org.apache.pig.EvalFunc;
@@ -14,12 +29,14 @@
import org.testng.Assert;
import org.testng.annotations.Test;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.ArrayOfStringsSerDe;
-import com.yahoo.sketches.frequencies.ItemsSketch;
-import com.yahoo.sketches.pig.tuple.PigUtil;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.ArrayOfStringsSerDe;
+import org.apache.datasketches.frequencies.ItemsSketch;
+import org.apache.datasketches.pig.tuple.PigUtil;
+@SuppressWarnings("javadoc")
public class DataToFrequentStringsSketchTest {
+
@Test
public void execNullInputTuple() throws Exception {
EvalFunc<Tuple> func = new DataToFrequentStringsSketch("8");
@@ -174,7 +191,7 @@
bag.add(PigUtil.objectsToTuple(PigUtil.tuplesToBag(PigUtil.objectsToTuple("a"))));
// this is to simulate the output from a prior call of IntermediateFinal
- ItemsSketch<String> s = new ItemsSketch<String>(8);
+ ItemsSketch<String> s = new ItemsSketch<>(8);
s.update("b", 1L);
s.update("a", 2L);
s.update("b", 3L);
diff --git a/src/test/java/com/yahoo/sketches/pig/frequencies/FrequentStringsSketchToEstimatesTest.java b/src/test/java/org/apache/datasketches/pig/frequencies/FrequentStringsSketchToEstimatesTest.java
similarity index 77%
rename from src/test/java/com/yahoo/sketches/pig/frequencies/FrequentStringsSketchToEstimatesTest.java
rename to src/test/java/org/apache/datasketches/pig/frequencies/FrequentStringsSketchToEstimatesTest.java
index 2f43424..d981e83 100644
--- a/src/test/java/com/yahoo/sketches/pig/frequencies/FrequentStringsSketchToEstimatesTest.java
+++ b/src/test/java/org/apache/datasketches/pig/frequencies/FrequentStringsSketchToEstimatesTest.java
@@ -1,8 +1,23 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.frequencies;
+
+package org.apache.datasketches.pig.frequencies;
import java.util.Iterator;
@@ -17,11 +32,13 @@
import org.testng.Assert;
import org.testng.annotations.Test;
-import com.yahoo.sketches.ArrayOfStringsSerDe;
-import com.yahoo.sketches.frequencies.ItemsSketch;
-import com.yahoo.sketches.pig.tuple.PigUtil;
+import org.apache.datasketches.ArrayOfStringsSerDe;
+import org.apache.datasketches.frequencies.ItemsSketch;
+import org.apache.datasketches.pig.tuple.PigUtil;
+@SuppressWarnings("javadoc")
public class FrequentStringsSketchToEstimatesTest {
+
@Test
public void nullInput() throws Exception {
EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates();
@@ -39,7 +56,7 @@
@Test
public void emptySketch() throws Exception {
EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates();
- ItemsSketch<String> sketch = new ItemsSketch<String>(8);
+ ItemsSketch<String> sketch = new ItemsSketch<>(8);
Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe())));
DataBag bag = func.exec(inputTuple);
Assert.assertNotNull(bag);
@@ -49,7 +66,7 @@
@Test
public void exact() throws Exception {
EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates();
- ItemsSketch<String> sketch = new ItemsSketch<String>(8);
+ ItemsSketch<String> sketch = new ItemsSketch<>(8);
sketch.update("a");
sketch.update("a");
sketch.update("b");
@@ -76,7 +93,7 @@
@Test
public void estimation() throws Exception {
- ItemsSketch<String> sketch = new ItemsSketch<String>(8);
+ ItemsSketch<String> sketch = new ItemsSketch<>(8);
sketch.update("1", 1000);
sketch.update("2", 500);
sketch.update("3", 200);
diff --git a/src/test/java/com/yahoo/sketches/pig/frequencies/UnionFrequentStringsSketchTest.java b/src/test/java/org/apache/datasketches/pig/frequencies/UnionFrequentStringsSketchTest.java
similarity index 86%
rename from src/test/java/com/yahoo/sketches/pig/frequencies/UnionFrequentStringsSketchTest.java
rename to src/test/java/org/apache/datasketches/pig/frequencies/UnionFrequentStringsSketchTest.java
index 84e20fd..3645aba 100644
--- a/src/test/java/com/yahoo/sketches/pig/frequencies/UnionFrequentStringsSketchTest.java
+++ b/src/test/java/org/apache/datasketches/pig/frequencies/UnionFrequentStringsSketchTest.java
@@ -1,8 +1,23 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.frequencies;
+
+package org.apache.datasketches.pig.frequencies;
import org.apache.pig.Accumulator;
import org.apache.pig.EvalFunc;
@@ -14,13 +29,15 @@
import org.testng.Assert;
import org.testng.annotations.Test;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.ArrayOfStringsSerDe;
-import com.yahoo.sketches.frequencies.ErrorType;
-import com.yahoo.sketches.frequencies.ItemsSketch;
-import com.yahoo.sketches.pig.tuple.PigUtil;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.ArrayOfStringsSerDe;
+import org.apache.datasketches.frequencies.ErrorType;
+import org.apache.datasketches.frequencies.ItemsSketch;
+import org.apache.datasketches.pig.tuple.PigUtil;
+@SuppressWarnings("javadoc")
public class UnionFrequentStringsSketchTest {
+
@Test
public void execNullInput() throws Exception {
EvalFunc<Tuple> func = new UnionFrequentStringsSketch("8");
@@ -40,13 +57,13 @@
EvalFunc<Tuple> func = new UnionFrequentStringsSketch("8");
DataBag bag = BagFactory.getInstance().newDefaultBag();
{
- ItemsSketch<String> sketch = new ItemsSketch<String>(8);
+ ItemsSketch<String> sketch = new ItemsSketch<>(8);
sketch.update("a");
sketch.update("b");
bag.add(PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe()))));
}
{
- ItemsSketch<String> sketch = new ItemsSketch<String>(8);
+ ItemsSketch<String> sketch = new ItemsSketch<>(8);
sketch.update("a");
sketch.update("b");
bag.add(PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe()))));
@@ -151,7 +168,7 @@
Accumulator<Tuple> func = new UnionFrequentStringsSketch("8");
DataBag bag = BagFactory.getInstance().newDefaultBag();
{
- ItemsSketch<String> sketch = new ItemsSketch<String>(8);
+ ItemsSketch<String> sketch = new ItemsSketch<>(8);
bag.add(PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe()))));
}
func.accumulate(PigUtil.objectsToTuple(bag));
@@ -170,7 +187,7 @@
Accumulator<Tuple> func = new UnionFrequentStringsSketch("8");
DataBag bag = BagFactory.getInstance().newDefaultBag();
{
- ItemsSketch<String> sketch = new ItemsSketch<String>(8);
+ ItemsSketch<String> sketch = new ItemsSketch<>(8);
sketch.update("a");
sketch.update("b");
bag.add(PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe()))));
@@ -179,7 +196,7 @@
bag = BagFactory.getInstance().newDefaultBag();
{
- ItemsSketch<String> sketch = new ItemsSketch<String>(8);
+ ItemsSketch<String> sketch = new ItemsSketch<>(8);
sketch.update("a");
sketch.update("b");
bag.add(PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe()))));
@@ -220,7 +237,7 @@
// this is to simulate the output from Initial
{
- ItemsSketch<String> sketch = new ItemsSketch<String>(8);
+ ItemsSketch<String> sketch = new ItemsSketch<>(8);
sketch.update("a");
sketch.update("b");
DataBag innerBag = PigUtil.tuplesToBag(PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe()))));
@@ -229,7 +246,7 @@
// this is to simulate the output from a prior call of IntermediateFinal
{
- ItemsSketch<String> sketch = new ItemsSketch<String>(8);
+ ItemsSketch<String> sketch = new ItemsSketch<>(8);
sketch.update("a", 2L);
sketch.update("b", 3L);
bag.add(PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe()))));
@@ -254,7 +271,7 @@
// this is to simulate the output from Initial
{
- ItemsSketch<String> sketch = new ItemsSketch<String>(8);
+ ItemsSketch<String> sketch = new ItemsSketch<>(8);
sketch.update("a", 10);
sketch.update("b");
sketch.update("c");
@@ -269,7 +286,7 @@
// this is to simulate the output from a prior call of IntermediateFinal
{
- ItemsSketch<String> sketch = new ItemsSketch<String>(8);
+ ItemsSketch<String> sketch = new ItemsSketch<>(8);
sketch.update("a");
sketch.update("a");
sketch.update("g", 5);
@@ -295,7 +312,9 @@
// only 2 items ("a" and "g") should have counts more than 1
int count = 0;
for (ItemsSketch.Row<String> item: items) {
- if (item.getLowerBound() > 1) count++;
+ if (item.getLowerBound() > 1) {
+ count++;
+ }
}
Assert.assertEquals(count, 2);
}
diff --git a/src/test/java/com/yahoo/sketches/pig/hash/MurmurHash3Test.java b/src/test/java/org/apache/datasketches/pig/hash/MurmurHash3Test.java
similarity index 89%
rename from src/test/java/com/yahoo/sketches/pig/hash/MurmurHash3Test.java
rename to src/test/java/org/apache/datasketches/pig/hash/MurmurHash3Test.java
index 43ff230..caf2c5f 100644
--- a/src/test/java/com/yahoo/sketches/pig/hash/MurmurHash3Test.java
+++ b/src/test/java/org/apache/datasketches/pig/hash/MurmurHash3Test.java
@@ -1,10 +1,25 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.hash;
-import static com.yahoo.sketches.pig.PigTestingUtil.LS;
+package org.apache.datasketches.pig.hash;
+
+import static org.apache.datasketches.pig.PigTestingUtil.LS;
import java.io.IOException;
@@ -21,15 +36,13 @@
/**
* Tests the MurmurHash3 class.
- *
- * @author Lee Rhodes
*/
-@SuppressWarnings({ "unused", "unchecked" })
+@SuppressWarnings({ "unused", "unchecked", "javadoc" })
public class MurmurHash3Test {
private static final TupleFactory mTupleFactory = TupleFactory.getInstance();
-
- private String hashUdfName = "com.yahoo.sketches.pig.hash.MurmurHash3";
-
+
+ private String hashUdfName = "org.apache.datasketches.pig.hash.MurmurHash3";
+
@Test
public void checkExceptions1() throws IOException {
EvalFunc<Tuple> hashUdf =
@@ -40,7 +53,7 @@
out = hashUdf.exec(in);
Assert.assertNull(out);
}
-
+
@Test(expectedExceptions = IllegalArgumentException.class)
public void checkExceptions2() throws IOException {
EvalFunc<Tuple> hashUdf =
@@ -52,7 +65,7 @@
in.set(1, new Double(9001));
out = hashUdf.exec(in);
}
-
+
@Test(expectedExceptions = IllegalArgumentException.class)
public void checkExceptions3() throws IOException {
EvalFunc<Tuple> hashUdf =
@@ -63,7 +76,7 @@
in.set(0, in);
out = hashUdf.exec(in);
}
-
+
@Test(expectedExceptions = IllegalArgumentException.class)
public void checkExceptions4() throws IOException {
EvalFunc<Tuple> hashUdf =
@@ -76,7 +89,7 @@
in.set(2, new Long(8));
out = hashUdf.exec(in);
}
-
+
@Test(expectedExceptions = IllegalArgumentException.class)
public void checkExceptions5() throws IOException {
EvalFunc<Tuple> hashUdf =
@@ -89,62 +102,62 @@
in.set(2, new Integer(0));
out = hashUdf.exec(in);
}
-
+
@Test
public void check1ValidArg() throws IOException {
EvalFunc<Tuple> hashUdf =
(EvalFunc<Tuple>) PigContext.instantiateFuncFromSpec(new FuncSpec(hashUdfName));
Tuple in, out;
-
+
//test Integer, Long, Float, Double, DataByteArray, String
in = mTupleFactory.newTuple(1);
-
+
in.set(0, null);
out = hashUdf.exec(in);
Assert.assertNull(out.get(0));
Assert.assertNull(out.get(1));
Assert.assertNull(out.get(2));
-
+
in.set(0, new Integer(1));
out = hashUdf.exec(in);
checkOutput(out, false);
-
+
in.set(0, new Long(1));
out = hashUdf.exec(in);
checkOutput(out, false);
-
+
in.set(0, new Float(1));
out = hashUdf.exec(in);
checkOutput(out, false);
-
+
in.set(0, new Double(0.0));
out = hashUdf.exec(in);
checkOutput(out, false);
-
+
in.set(0, new Double( -0.0));
out = hashUdf.exec(in);
checkOutput(out, false);
-
+
in.set(0, Double.NaN);
out = hashUdf.exec(in);
checkOutput(out, false);
-
+
in.set(0, new String("1"));
out = hashUdf.exec(in);
checkOutput(out, false);
-
+
in.set(0, new String("")); //empty
out = hashUdf.exec(in);
Assert.assertNull(out.get(0));
Assert.assertNull(out.get(1));
Assert.assertNull(out.get(2));
-
+
byte[] bArr = { 1, 2, 3, 4 };
DataByteArray dba = new DataByteArray(bArr);
in.set(0, dba);
out = hashUdf.exec(in);
checkOutput(out, false);
-
+
bArr = new byte[0]; //empty
dba = new DataByteArray(bArr);
in.set(0, dba);
@@ -153,69 +166,69 @@
Assert.assertNull(out.get(1));
Assert.assertNull(out.get(2));
}
-
+
@Test
public void check2ValidArg() throws IOException {
EvalFunc<Tuple> hashUdf =
(EvalFunc<Tuple>) PigContext.instantiateFuncFromSpec(new FuncSpec(hashUdfName));
Tuple in, out;
-
+
//test String, seed
in = mTupleFactory.newTuple(2);
-
+
in.set(0, new String("1"));
//2nd is null
out = hashUdf.exec(in);
checkOutput(out, false);
-
+
in.set(0, new String("1"));
in.set(1, 9001);
out = hashUdf.exec(in);
checkOutput(out, false);
-
+
in.set(0, new String("1"));
in.set(1, 9001L);
out = hashUdf.exec(in);
checkOutput(out, false);
}
-
+
@Test
public void check3ValidArg() throws IOException {
EvalFunc<Tuple> hashUdf =
(EvalFunc<Tuple>) PigContext.instantiateFuncFromSpec(new FuncSpec(hashUdfName));
Tuple in, out;
-
+
//test String, seed
in = mTupleFactory.newTuple(3);
-
+
in.set(0, new String("1"));
//2nd is null
//3rd is null
out = hashUdf.exec(in);
checkOutput(out, false);
-
+
in.set(0, new String("1"));
in.set(1, 9001);
//3rd is null
out = hashUdf.exec(in);
checkOutput(out, false);
-
+
in.set(0, new String("1"));
in.set(1, 9001);
in.set(2, 7);
out = hashUdf.exec(in);
checkOutput(out, true);
}
-
+
@Test
public void check3ValidArgs() throws IOException {
EvalFunc<Tuple> hashUdf =
(EvalFunc<Tuple>) PigContext.instantiateFuncFromSpec(new FuncSpec(hashUdfName));
Tuple in, out;
-
+
//test multiple integers, seed
in = mTupleFactory.newTuple(3);
-
+
for (int i = 0; i < 10; i++ ) {
in.set(0, i);
in.set(1, 9001);
@@ -224,7 +237,7 @@
checkOutput(out, true);
}
}
-
+
private static void checkOutput(Tuple out, boolean checkMod) throws IOException {
long h0 = (Long) out.get(0);
long h1 = (Long) out.get(1);
@@ -235,7 +248,7 @@
Assert.assertTrue(r >= 0, "" + r);
}
}
-
+
/**
* Test the outputSchema method for MurmurHash3.
* @throws IOException thrown by Pig
@@ -244,54 +257,54 @@
public void outputSchemaTestMurmurHash3Udf() throws IOException {
EvalFunc<Tuple> hashUdf =
(EvalFunc<Tuple>) PigContext.instantiateFuncFromSpec(new FuncSpec(hashUdfName));
-
+
Schema inputSchema = null;
-
+
Schema nullOutputSchema = null;
-
+
Schema outputSchema = null;
Schema.FieldSchema outputOuterFs0 = null;
-
+
Schema outputInnerSchema = null;
Schema.FieldSchema outputInnerFs0 = null;
Schema.FieldSchema outputInnerFs1 = null;
Schema.FieldSchema outputInnerFs2 = null;
-
+
nullOutputSchema = hashUdf.outputSchema(null);
-
+
//CHARARRAY is one of many different input types
inputSchema = Schema.generateNestedSchema(DataType.BAG, DataType.CHARARRAY);
-
+
outputSchema = hashUdf.outputSchema(inputSchema);
outputOuterFs0 = outputSchema.getField(0);
-
+
outputInnerSchema = outputOuterFs0.schema;
outputInnerFs0 = outputInnerSchema.getField(0);
outputInnerFs1 = outputInnerSchema.getField(1);
outputInnerFs2 = outputInnerSchema.getField(2);
-
+
Assert.assertNull(nullOutputSchema, "Should be null");
Assert.assertNotNull(outputOuterFs0, "outputSchema.getField(0) may not be null");
-
+
String expected = "tuple";
String result = DataType.findTypeName(outputOuterFs0.type);
Assert.assertEquals(result, expected);
-
+
expected = "long";
Assert.assertNotNull(outputInnerFs0, "innerSchema.getField(0) may not be null");
result = DataType.findTypeName(outputInnerFs0.type);
Assert.assertEquals(result, expected);
-
+
expected = "long";
Assert.assertNotNull(outputInnerFs1, "innerSchema.getField(1) may not be null");
result = DataType.findTypeName(outputInnerFs1.type);
Assert.assertEquals(result, expected);
-
+
expected = "int";
Assert.assertNotNull(outputInnerFs2, "innerSchema.getField(2) may not be null");
result = DataType.findTypeName(outputInnerFs2.type);
Assert.assertEquals(result, expected);
-
+
//print schemas
//@formatter:off
StringBuilder sb = new StringBuilder();
@@ -310,17 +323,17 @@
//@formatter:on
//end print schemas
}
-
+
@Test
public void printlnTest() {
println(this.getClass().getSimpleName());
}
-
+
/**
- * @param s value to print
+ * @param s value to print
*/
static void println(String s) {
//System.out.println(s); //disable here
}
-
+
}
diff --git a/src/test/java/com/yahoo/sketches/pig/hll/DataToSketchTest.java b/src/test/java/org/apache/datasketches/pig/hll/DataToSketchTest.java
similarity index 91%
rename from src/test/java/com/yahoo/sketches/pig/hll/DataToSketchTest.java
rename to src/test/java/org/apache/datasketches/pig/hll/DataToSketchTest.java
index 1e16cee..407d780 100644
--- a/src/test/java/com/yahoo/sketches/pig/hll/DataToSketchTest.java
+++ b/src/test/java/org/apache/datasketches/pig/hll/DataToSketchTest.java
@@ -1,9 +1,23 @@
/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.hll;
+package org.apache.datasketches.pig.hll;
import org.apache.pig.Accumulator;
import org.apache.pig.EvalFunc;
@@ -15,9 +29,10 @@
import org.testng.Assert;
import org.testng.annotations.Test;
-import com.yahoo.sketches.hll.HllSketch;
-import com.yahoo.sketches.hll.TgtHllType;
+import org.apache.datasketches.hll.HllSketch;
+import org.apache.datasketches.hll.TgtHllType;
+@SuppressWarnings("javadoc")
public class DataToSketchTest {
private static final TupleFactory tupleFactory = TupleFactory.getInstance();
@@ -293,7 +308,7 @@
static HllSketch getSketch(DataByteArray dba) throws Exception {
Assert.assertNotNull(dba);
Assert.assertTrue(dba.size() > 0);
- return HllSketch.heapify(dba.get());
+ return HllSketch.heapify(dba.get());
}
}
diff --git a/src/test/java/com/yahoo/sketches/pig/hll/SketchToEstimateAndErrorBoundsTest.java b/src/test/java/org/apache/datasketches/pig/hll/SketchToEstimateAndErrorBoundsTest.java
similarity index 70%
rename from src/test/java/com/yahoo/sketches/pig/hll/SketchToEstimateAndErrorBoundsTest.java
rename to src/test/java/org/apache/datasketches/pig/hll/SketchToEstimateAndErrorBoundsTest.java
index a59f6bb..745b7ef 100644
--- a/src/test/java/com/yahoo/sketches/pig/hll/SketchToEstimateAndErrorBoundsTest.java
+++ b/src/test/java/org/apache/datasketches/pig/hll/SketchToEstimateAndErrorBoundsTest.java
@@ -1,9 +1,23 @@
/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.hll;
+package org.apache.datasketches.pig.hll;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
@@ -15,8 +29,9 @@
import org.testng.Assert;
import org.testng.annotations.Test;
-import com.yahoo.sketches.hll.HllSketch;
+import org.apache.datasketches.hll.HllSketch;
+@SuppressWarnings("javadoc")
public class SketchToEstimateAndErrorBoundsTest {
private static final TupleFactory tupleFactory = TupleFactory.getInstance();
diff --git a/src/test/java/org/apache/datasketches/pig/hll/SketchToEstimateTest.java b/src/test/java/org/apache/datasketches/pig/hll/SketchToEstimateTest.java
new file mode 100644
index 0000000..4c99a27
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/pig/hll/SketchToEstimateTest.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.hll;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.TupleFactory;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import org.apache.datasketches.hll.HllSketch;
+
+@SuppressWarnings("javadoc")
+public class SketchToEstimateTest {
+
+ private static final TupleFactory tupleFactory = TupleFactory.getInstance();
+
+ @Test
+ public void nullInputTuple() throws Exception {
+ EvalFunc<Double> func = new SketchToEstimate();
+ Double result = func.exec(null);
+ Assert.assertNull(result);
+ }
+
+ @Test
+ public void emptyInputTuple() throws Exception {
+ EvalFunc<Double> func = new SketchToEstimate();
+ Double result = func.exec(tupleFactory.newTuple());
+ Assert.assertNull(result);
+ }
+
+ @Test
+ public void normalCase() throws Exception {
+ EvalFunc<Double> func = new SketchToEstimate();
+ HllSketch sketch = new HllSketch(12);
+ sketch.update(1);
+ sketch.update(2);
+ Double result = func.exec(tupleFactory.newTuple(new DataByteArray(sketch.toCompactByteArray())));
+ Assert.assertNotNull(result);
+ Assert.assertEquals(result, 2.0, 0.01);
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/pig/hll/SketchToStringTest.java b/src/test/java/org/apache/datasketches/pig/hll/SketchToStringTest.java
new file mode 100644
index 0000000..f977be7
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/pig/hll/SketchToStringTest.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.hll;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.TupleFactory;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import org.apache.datasketches.hll.HllSketch;
+
+@SuppressWarnings("javadoc")
+public class SketchToStringTest {
+
+ private static final TupleFactory tupleFactory = TupleFactory.getInstance();
+
+ @Test
+ public void nullInputTuple() throws Exception {
+ EvalFunc<String> func = new SketchToString();
+ String result = func.exec(null);
+ Assert.assertNull(result);
+ }
+
+ @Test
+ public void emptyInputTuple() throws Exception {
+ EvalFunc<String> func = new SketchToString();
+ String result = func.exec(tupleFactory.newTuple());
+ Assert.assertNull(result);
+ }
+
+ @Test
+ public void normalCase() throws Exception {
+ EvalFunc<String> func = new SketchToString();
+ HllSketch sketch = new HllSketch(12);
+ String result = func.exec(tupleFactory.newTuple(new DataByteArray(sketch.toCompactByteArray())));
+ Assert.assertNotNull(result);
+ Assert.assertTrue(result.length() > 0);
+ }
+
+}
diff --git a/src/test/java/com/yahoo/sketches/pig/hll/UnionSketchTest.java b/src/test/java/org/apache/datasketches/pig/hll/UnionSketchTest.java
similarity index 90%
rename from src/test/java/com/yahoo/sketches/pig/hll/UnionSketchTest.java
rename to src/test/java/org/apache/datasketches/pig/hll/UnionSketchTest.java
index 9a0e58b..36cc0b6 100644
--- a/src/test/java/com/yahoo/sketches/pig/hll/UnionSketchTest.java
+++ b/src/test/java/org/apache/datasketches/pig/hll/UnionSketchTest.java
@@ -1,9 +1,23 @@
/*
- * Copyright 2017, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.hll;
+package org.apache.datasketches.pig.hll;
import org.apache.pig.Accumulator;
import org.apache.pig.EvalFunc;
@@ -15,9 +29,10 @@
import org.testng.Assert;
import org.testng.annotations.Test;
-import com.yahoo.sketches.hll.HllSketch;
-import com.yahoo.sketches.hll.TgtHllType;
+import org.apache.datasketches.hll.HllSketch;
+import org.apache.datasketches.hll.TgtHllType;
+@SuppressWarnings("javadoc")
public class UnionSketchTest {
private static final TupleFactory tupleFactory = TupleFactory.getInstance();
diff --git a/src/test/java/com/yahoo/sketches/pig/kll/DataToSketchTest.java b/src/test/java/org/apache/datasketches/pig/kll/DataToSketchTest.java
similarity index 92%
rename from src/test/java/com/yahoo/sketches/pig/kll/DataToSketchTest.java
rename to src/test/java/org/apache/datasketches/pig/kll/DataToSketchTest.java
index dc342d3..18f3115 100644
--- a/src/test/java/com/yahoo/sketches/pig/kll/DataToSketchTest.java
+++ b/src/test/java/org/apache/datasketches/pig/kll/DataToSketchTest.java
@@ -1,9 +1,23 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.kll;
+package org.apache.datasketches.pig.kll;
import org.apache.pig.Accumulator;
import org.apache.pig.EvalFunc;
@@ -15,9 +29,10 @@
import org.testng.Assert;
import org.testng.annotations.Test;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.kll.KllFloatsSketch;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.kll.KllFloatsSketch;
+@SuppressWarnings("javadoc")
public class DataToSketchTest {
private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
private static final BagFactory BAG_FACTORY = BagFactory.getInstance();
@@ -64,7 +79,7 @@
Assert.assertFalse(sketch.isEmpty());
Assert.assertEquals(sketch.getN(), 1);
}
-
+
@Test
public void execMixedNullCase() throws Exception {
final EvalFunc<DataByteArray> func = new DataToSketch();
@@ -113,7 +128,7 @@
sketch = getSketch(result);
Assert.assertFalse(sketch.isEmpty());
Assert.assertEquals(sketch.getN(), 2);
-
+
// mixed null case
bag = BAG_FACTORY.newDefaultBag();
bag.add(TUPLE_FACTORY.newTuple(Float.valueOf(1)));
@@ -194,7 +209,7 @@
Assert.assertFalse(sketch.isEmpty());
Assert.assertEquals(sketch.getN(), 2);
}
-
+
@Test
public void algebraicIntermediateMixedNullCase() throws Exception {
@SuppressWarnings("unchecked")
@@ -274,7 +289,7 @@
Assert.assertFalse(sketch.isEmpty());
Assert.assertEquals(sketch.getN(), 2);
}
-
+
@Test
public void algebraicFinalMixedNullCase() throws Exception {
@SuppressWarnings("unchecked")
diff --git a/src/test/java/com/yahoo/sketches/pig/kll/GetCdfTest.java b/src/test/java/org/apache/datasketches/pig/kll/GetCdfTest.java
similarity index 67%
rename from src/test/java/com/yahoo/sketches/pig/kll/GetCdfTest.java
rename to src/test/java/org/apache/datasketches/pig/kll/GetCdfTest.java
index fce1f3f..ab00c32 100644
--- a/src/test/java/com/yahoo/sketches/pig/kll/GetCdfTest.java
+++ b/src/test/java/org/apache/datasketches/pig/kll/GetCdfTest.java
@@ -1,9 +1,23 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.kll;
+package org.apache.datasketches.pig.kll;
import java.util.Arrays;
@@ -14,10 +28,11 @@
import org.testng.annotations.Test;
-import com.yahoo.sketches.kll.KllFloatsSketch;
+import org.apache.datasketches.kll.KllFloatsSketch;
import org.testng.Assert;
+@SuppressWarnings("javadoc")
public class GetCdfTest {
private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
@@ -33,7 +48,9 @@
public void normalCase() throws Exception {
final EvalFunc<Tuple> func = new GetCdf();
final KllFloatsSketch sketch = new KllFloatsSketch();
- for (int i = 1; i <= 10; i++) sketch.update(i);
+ for (int i = 1; i <= 10; i++) {
+ sketch.update(i);
+ }
final Tuple resultTuple = func.exec(TUPLE_FACTORY.newTuple(Arrays.asList(new DataByteArray(sketch.toByteArray()), 2f, 7f)));
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 3);
diff --git a/src/test/java/com/yahoo/sketches/pig/kll/GetKTest.java b/src/test/java/org/apache/datasketches/pig/kll/GetKTest.java
similarity index 65%
rename from src/test/java/com/yahoo/sketches/pig/kll/GetKTest.java
rename to src/test/java/org/apache/datasketches/pig/kll/GetKTest.java
index e5d65a3..a3cd730 100644
--- a/src/test/java/com/yahoo/sketches/pig/kll/GetKTest.java
+++ b/src/test/java/org/apache/datasketches/pig/kll/GetKTest.java
@@ -1,9 +1,23 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.kll;
+package org.apache.datasketches.pig.kll;
import java.util.Arrays;
@@ -13,10 +27,11 @@
import org.testng.annotations.Test;
-import com.yahoo.sketches.kll.KllFloatsSketch;
+import org.apache.datasketches.kll.KllFloatsSketch;
import org.testng.Assert;
+@SuppressWarnings("javadoc")
public class GetKTest {
private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
diff --git a/src/test/java/com/yahoo/sketches/pig/kll/GetPmfTest.java b/src/test/java/org/apache/datasketches/pig/kll/GetPmfTest.java
similarity index 67%
rename from src/test/java/com/yahoo/sketches/pig/kll/GetPmfTest.java
rename to src/test/java/org/apache/datasketches/pig/kll/GetPmfTest.java
index 4693e84..6d6dd4e 100644
--- a/src/test/java/com/yahoo/sketches/pig/kll/GetPmfTest.java
+++ b/src/test/java/org/apache/datasketches/pig/kll/GetPmfTest.java
@@ -1,9 +1,23 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.kll;
+package org.apache.datasketches.pig.kll;
import java.util.Arrays;
@@ -14,10 +28,11 @@
import org.testng.annotations.Test;
-import com.yahoo.sketches.kll.KllFloatsSketch;
+import org.apache.datasketches.kll.KllFloatsSketch;
import org.testng.Assert;
+@SuppressWarnings("javadoc")
public class GetPmfTest {
private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
@@ -33,7 +48,9 @@
public void normalCase() throws Exception {
final EvalFunc<Tuple> func = new GetPmf();
final KllFloatsSketch sketch = new KllFloatsSketch();
- for (int i = 1; i <= 10; i++) sketch.update(i);
+ for (int i = 1; i <= 10; i++) {
+ sketch.update(i);
+ }
final Tuple resultTuple = func.exec(TUPLE_FACTORY.newTuple(Arrays.asList(new DataByteArray(sketch.toByteArray()), 2f, 7f)));
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 3);
diff --git a/src/test/java/com/yahoo/sketches/pig/kll/GetQuantileTest.java b/src/test/java/org/apache/datasketches/pig/kll/GetQuantileTest.java
similarity index 66%
rename from src/test/java/com/yahoo/sketches/pig/kll/GetQuantileTest.java
rename to src/test/java/org/apache/datasketches/pig/kll/GetQuantileTest.java
index 9b650e3..bde55d2 100644
--- a/src/test/java/com/yahoo/sketches/pig/kll/GetQuantileTest.java
+++ b/src/test/java/org/apache/datasketches/pig/kll/GetQuantileTest.java
@@ -1,9 +1,23 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.kll;
+package org.apache.datasketches.pig.kll;
import java.util.Arrays;
@@ -13,10 +27,11 @@
import org.testng.annotations.Test;
-import com.yahoo.sketches.kll.KllFloatsSketch;
+import org.apache.datasketches.kll.KllFloatsSketch;
import org.testng.Assert;
+@SuppressWarnings("javadoc")
public class GetQuantileTest {
private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
diff --git a/src/test/java/com/yahoo/sketches/pig/kll/GetQuantilesTest.java b/src/test/java/org/apache/datasketches/pig/kll/GetQuantilesTest.java
similarity index 75%
rename from src/test/java/com/yahoo/sketches/pig/kll/GetQuantilesTest.java
rename to src/test/java/org/apache/datasketches/pig/kll/GetQuantilesTest.java
index 5dbc463..8e1827f 100644
--- a/src/test/java/com/yahoo/sketches/pig/kll/GetQuantilesTest.java
+++ b/src/test/java/org/apache/datasketches/pig/kll/GetQuantilesTest.java
@@ -1,9 +1,23 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.kll;
+package org.apache.datasketches.pig.kll;
import java.util.Arrays;
@@ -14,10 +28,11 @@
import org.testng.annotations.Test;
-import com.yahoo.sketches.kll.KllFloatsSketch;
+import org.apache.datasketches.kll.KllFloatsSketch;
import org.testng.Assert;
+@SuppressWarnings("javadoc")
public class GetQuantilesTest {
private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
@@ -61,7 +76,9 @@
public void oneFraction() throws Exception {
final EvalFunc<Tuple> func = new GetQuantiles();
final KllFloatsSketch sketch = new KllFloatsSketch();
- for (int i = 1; i <= 10; i++) sketch.update(i);
+ for (int i = 1; i <= 10; i++) {
+ sketch.update(i);
+ }
final Tuple resultTuple = func.exec(TUPLE_FACTORY.newTuple(Arrays.asList(new DataByteArray(sketch.toByteArray()), 0.5)));
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 1);
@@ -72,7 +89,9 @@
public void severalFractions() throws Exception {
final EvalFunc<Tuple> func = new GetQuantiles();
final KllFloatsSketch sketch = new KllFloatsSketch();
- for (int i = 1; i <= 10; i++) sketch.update(i);
+ for (int i = 1; i <= 10; i++) {
+ sketch.update(i);
+ }
final Tuple resultTuple = func.exec(TUPLE_FACTORY.newTuple(Arrays.asList(new DataByteArray(sketch.toByteArray()), 0.0, 0.5, 1.0)));
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 3);
@@ -85,7 +104,9 @@
public void numberOfEvenlySpacedIntervals() throws Exception {
final EvalFunc<Tuple> func = new GetQuantiles();
final KllFloatsSketch sketch = new KllFloatsSketch();
- for (int i = 1; i <= 10; i++) sketch.update(i);
+ for (int i = 1; i <= 10; i++) {
+ sketch.update(i);
+ }
final Tuple resultTuple = func.exec(TUPLE_FACTORY.newTuple(Arrays.asList(new DataByteArray(sketch.toByteArray()), 3)));
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 3);
diff --git a/src/test/java/com/yahoo/sketches/pig/kll/GetRankTest.java b/src/test/java/org/apache/datasketches/pig/kll/GetRankTest.java
similarity index 66%
rename from src/test/java/com/yahoo/sketches/pig/kll/GetRankTest.java
rename to src/test/java/org/apache/datasketches/pig/kll/GetRankTest.java
index 83d42df..88fc9d7 100644
--- a/src/test/java/com/yahoo/sketches/pig/kll/GetRankTest.java
+++ b/src/test/java/org/apache/datasketches/pig/kll/GetRankTest.java
@@ -1,9 +1,23 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.kll;
+package org.apache.datasketches.pig.kll;
import java.util.Arrays;
@@ -13,10 +27,11 @@
import org.testng.annotations.Test;
-import com.yahoo.sketches.kll.KllFloatsSketch;
+import org.apache.datasketches.kll.KllFloatsSketch;
import org.testng.Assert;
+@SuppressWarnings("javadoc")
public class GetRankTest {
private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
diff --git a/src/test/java/org/apache/datasketches/pig/kll/SketchToStringTest.java b/src/test/java/org/apache/datasketches/pig/kll/SketchToStringTest.java
new file mode 100644
index 0000000..742336e
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/pig/kll/SketchToStringTest.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.pig.kll;
+
+import java.util.Arrays;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.TupleFactory;
+
+import org.testng.annotations.Test;
+
+import org.apache.datasketches.kll.KllFloatsSketch;
+
+import org.testng.Assert;
+
+@SuppressWarnings("javadoc")
+public class SketchToStringTest {
+ private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
+
+ @Test
+ public void normalCase() throws Exception {
+ final EvalFunc<String> func = new SketchToString();
+ final KllFloatsSketch sketch = new KllFloatsSketch();
+ final String result = func.exec(TUPLE_FACTORY.newTuple(Arrays.asList(new DataByteArray(sketch.toByteArray()))));
+ Assert.assertNotNull(result);
+ }
+
+ @Test(expectedExceptions = IllegalArgumentException.class)
+ public void noInputs() throws Exception {
+ final EvalFunc<String> func = new SketchToString();
+ func.exec(TUPLE_FACTORY.newTuple());
+ }
+
+ @Test(expectedExceptions = IllegalArgumentException.class)
+ public void tooManyInputs() throws Exception {
+ final EvalFunc<String> func = new SketchToString();
+ func.exec(TUPLE_FACTORY.newTuple(2));
+ }
+
+ @Test(expectedExceptions = IllegalArgumentException.class)
+ public void wrongTypeForSketch() throws Exception {
+ final EvalFunc<String> func = new SketchToString();
+ func.exec(TUPLE_FACTORY.newTuple(Arrays.asList(1.0)));
+ }
+
+}
diff --git a/src/test/java/com/yahoo/sketches/pig/kll/UnionSketchTest.java b/src/test/java/org/apache/datasketches/pig/kll/UnionSketchTest.java
similarity index 90%
rename from src/test/java/com/yahoo/sketches/pig/kll/UnionSketchTest.java
rename to src/test/java/org/apache/datasketches/pig/kll/UnionSketchTest.java
index dd87b5a..b5c7448 100644
--- a/src/test/java/com/yahoo/sketches/pig/kll/UnionSketchTest.java
+++ b/src/test/java/org/apache/datasketches/pig/kll/UnionSketchTest.java
@@ -1,9 +1,23 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.kll;
+package org.apache.datasketches.pig.kll;
import org.apache.pig.Accumulator;
import org.apache.pig.EvalFunc;
@@ -15,9 +29,10 @@
import org.testng.Assert;
import org.testng.annotations.Test;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.kll.KllFloatsSketch;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.kll.KllFloatsSketch;
+@SuppressWarnings("javadoc")
public class UnionSketchTest {
private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
private static final BagFactory BAG_FACTORY = BagFactory.getInstance();
diff --git a/src/test/java/com/yahoo/sketches/pig/quantiles/DataToDoublesSketchTest.java b/src/test/java/org/apache/datasketches/pig/quantiles/DataToDoublesSketchTest.java
similarity index 88%
rename from src/test/java/com/yahoo/sketches/pig/quantiles/DataToDoublesSketchTest.java
rename to src/test/java/org/apache/datasketches/pig/quantiles/DataToDoublesSketchTest.java
index f08d4a2..b3b160d 100644
--- a/src/test/java/com/yahoo/sketches/pig/quantiles/DataToDoublesSketchTest.java
+++ b/src/test/java/org/apache/datasketches/pig/quantiles/DataToDoublesSketchTest.java
@@ -1,8 +1,23 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.quantiles;
+
+package org.apache.datasketches.pig.quantiles;
import org.apache.pig.Accumulator;
import org.apache.pig.EvalFunc;
@@ -16,10 +31,11 @@
import org.testng.Assert;
import org.testng.annotations.Test;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.quantiles.DoublesSketch;
-import com.yahoo.sketches.quantiles.UpdateDoublesSketch;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.quantiles.DoublesSketch;
+import org.apache.datasketches.quantiles.UpdateDoublesSketch;
+@SuppressWarnings("javadoc")
public class DataToDoublesSketchTest {
private static final TupleFactory tupleFactory = TupleFactory.getInstance();
private static final BagFactory bagFactory = BagFactory.getInstance();
@@ -66,7 +82,7 @@
Assert.assertFalse(sketch.isEmpty());
Assert.assertEquals(sketch.getN(), 1);
}
-
+
@Test
public void execMixedNullCase() throws Exception {
EvalFunc<Tuple> func = new DataToDoublesSketch();
@@ -115,7 +131,7 @@
sketch = getSketch(resultTuple);
Assert.assertFalse(sketch.isEmpty());
Assert.assertEquals(sketch.getN(), 2);
-
+
// mixed null case
bag = bagFactory.newDefaultBag();
bag.add(tupleFactory.newTuple(1.0));
@@ -184,7 +200,7 @@
Assert.assertFalse(sketch.isEmpty());
Assert.assertEquals(sketch.getN(), 2);
}
-
+
@Test
public void algebraicIntermediateFinalMixedNullCase() throws Exception {
EvalFunc<Tuple> func = new DataToDoublesSketch.IntermediateFinal();
diff --git a/src/test/java/com/yahoo/sketches/pig/quantiles/DataToStringsSketchTest.java b/src/test/java/org/apache/datasketches/pig/quantiles/DataToStringsSketchTest.java
similarity index 89%
rename from src/test/java/com/yahoo/sketches/pig/quantiles/DataToStringsSketchTest.java
rename to src/test/java/org/apache/datasketches/pig/quantiles/DataToStringsSketchTest.java
index 1f986c5..af1fbd2 100644
--- a/src/test/java/com/yahoo/sketches/pig/quantiles/DataToStringsSketchTest.java
+++ b/src/test/java/org/apache/datasketches/pig/quantiles/DataToStringsSketchTest.java
@@ -1,8 +1,23 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.quantiles;
+
+package org.apache.datasketches.pig.quantiles;
import java.util.Comparator;
@@ -18,11 +33,12 @@
import org.testng.Assert;
import org.testng.annotations.Test;
-import com.yahoo.memory.Memory;
-import com.yahoo.sketches.ArrayOfItemsSerDe;
-import com.yahoo.sketches.ArrayOfStringsSerDe;
-import com.yahoo.sketches.quantiles.ItemsSketch;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.ArrayOfItemsSerDe;
+import org.apache.datasketches.ArrayOfStringsSerDe;
+import org.apache.datasketches.quantiles.ItemsSketch;
+@SuppressWarnings("javadoc")
public class DataToStringsSketchTest {
private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
@@ -73,7 +89,7 @@
Assert.assertFalse(sketch.isEmpty());
Assert.assertEquals(sketch.getN(), 1);
}
-
+
@Test
public void execMixedNormalCase() throws Exception {
EvalFunc<Tuple> func = new DataToStringsSketch();
@@ -122,7 +138,7 @@
sketch = getSketch(resultTuple);
Assert.assertFalse(sketch.isEmpty());
Assert.assertEquals(sketch.getN(), 2);
-
+
// mixed null case
bag = BAG_FACTORY.newDefaultBag();
bag.add(TUPLE_FACTORY.newTuple("a"));
@@ -200,7 +216,7 @@
Assert.assertFalse(sketch.isEmpty());
Assert.assertEquals(sketch.getN(), 2);
}
-
+
@Test
public void algebraicIntermediateFinalMixedNullCase() throws Exception {
@SuppressWarnings("unchecked")
diff --git a/src/test/java/com/yahoo/sketches/pig/quantiles/DoublesSketchToStringTest.java b/src/test/java/org/apache/datasketches/pig/quantiles/DoublesSketchToStringTest.java
similarity index 63%
rename from src/test/java/com/yahoo/sketches/pig/quantiles/DoublesSketchToStringTest.java
rename to src/test/java/org/apache/datasketches/pig/quantiles/DoublesSketchToStringTest.java
index a6549d4..794368b 100644
--- a/src/test/java/com/yahoo/sketches/pig/quantiles/DoublesSketchToStringTest.java
+++ b/src/test/java/org/apache/datasketches/pig/quantiles/DoublesSketchToStringTest.java
@@ -1,9 +1,23 @@
/*
- * Copyright 2019, Verizon Media.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.quantiles;
+package org.apache.datasketches.pig.quantiles;
import java.util.Arrays;
@@ -13,10 +27,11 @@
import org.testng.annotations.Test;
-import com.yahoo.sketches.quantiles.DoublesSketch;
+import org.apache.datasketches.quantiles.DoublesSketch;
import org.testng.Assert;
+@SuppressWarnings("javadoc")
public class DoublesSketchToStringTest {
private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
diff --git a/src/test/java/com/yahoo/sketches/pig/quantiles/GetKFromDoublesSketchTest.java b/src/test/java/org/apache/datasketches/pig/quantiles/GetKFromDoublesSketchTest.java
similarity index 65%
rename from src/test/java/com/yahoo/sketches/pig/quantiles/GetKFromDoublesSketchTest.java
rename to src/test/java/org/apache/datasketches/pig/quantiles/GetKFromDoublesSketchTest.java
index 6cd3965..3a884f6 100644
--- a/src/test/java/com/yahoo/sketches/pig/quantiles/GetKFromDoublesSketchTest.java
+++ b/src/test/java/org/apache/datasketches/pig/quantiles/GetKFromDoublesSketchTest.java
@@ -1,10 +1,25 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.quantiles;
-import com.yahoo.sketches.quantiles.DoublesSketch;
+package org.apache.datasketches.pig.quantiles;
+
+import org.apache.datasketches.quantiles.DoublesSketch;
import java.util.Arrays;
@@ -15,6 +30,7 @@
import org.testng.annotations.Test;
import org.testng.Assert;
+@SuppressWarnings("javadoc")
public class GetKFromDoublesSketchTest {
private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
diff --git a/src/test/java/com/yahoo/sketches/pig/quantiles/GetKFromStringsSketchTest.java b/src/test/java/org/apache/datasketches/pig/quantiles/GetKFromStringsSketchTest.java
similarity index 65%
rename from src/test/java/com/yahoo/sketches/pig/quantiles/GetKFromStringsSketchTest.java
rename to src/test/java/org/apache/datasketches/pig/quantiles/GetKFromStringsSketchTest.java
index 2ab568f..1ab5155 100644
--- a/src/test/java/com/yahoo/sketches/pig/quantiles/GetKFromStringsSketchTest.java
+++ b/src/test/java/org/apache/datasketches/pig/quantiles/GetKFromStringsSketchTest.java
@@ -1,12 +1,27 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.quantiles;
-import com.yahoo.sketches.ArrayOfItemsSerDe;
-import com.yahoo.sketches.ArrayOfStringsSerDe;
-import com.yahoo.sketches.quantiles.ItemsSketch;
+package org.apache.datasketches.pig.quantiles;
+
+import org.apache.datasketches.ArrayOfItemsSerDe;
+import org.apache.datasketches.ArrayOfStringsSerDe;
+import org.apache.datasketches.quantiles.ItemsSketch;
import java.util.Arrays;
import java.util.Comparator;
@@ -18,6 +33,7 @@
import org.testng.annotations.Test;
import org.testng.Assert;
+@SuppressWarnings("javadoc")
public class GetKFromStringsSketchTest {
private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
diff --git a/src/test/java/com/yahoo/sketches/pig/quantiles/GetPmfFromDoublesSketchTest.java b/src/test/java/org/apache/datasketches/pig/quantiles/GetPmfFromDoublesSketchTest.java
similarity index 66%
rename from src/test/java/com/yahoo/sketches/pig/quantiles/GetPmfFromDoublesSketchTest.java
rename to src/test/java/org/apache/datasketches/pig/quantiles/GetPmfFromDoublesSketchTest.java
index 44d468b..9e1e499 100644
--- a/src/test/java/com/yahoo/sketches/pig/quantiles/GetPmfFromDoublesSketchTest.java
+++ b/src/test/java/org/apache/datasketches/pig/quantiles/GetPmfFromDoublesSketchTest.java
@@ -1,11 +1,26 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.quantiles;
-import com.yahoo.sketches.quantiles.DoublesSketch;
-import com.yahoo.sketches.quantiles.UpdateDoublesSketch;
+package org.apache.datasketches.pig.quantiles;
+
+import org.apache.datasketches.quantiles.DoublesSketch;
+import org.apache.datasketches.quantiles.UpdateDoublesSketch;
import java.util.Arrays;
@@ -17,6 +32,7 @@
import org.testng.annotations.Test;
import org.testng.Assert;
+@SuppressWarnings("javadoc")
public class GetPmfFromDoublesSketchTest {
private static final TupleFactory tupleFactory = TupleFactory.getInstance();
@@ -32,7 +48,9 @@
public void normalCase() throws Exception {
EvalFunc<Tuple> func = new GetPmfFromDoublesSketch();
UpdateDoublesSketch sketch = DoublesSketch.builder().build();
- for (int i = 1; i <= 10; i++) sketch.update(i);
+ for (int i = 1; i <= 10; i++) {
+ sketch.update(i);
+ }
Tuple resultTuple = func.exec(tupleFactory.newTuple(Arrays.asList(new DataByteArray(sketch.toByteArray()), 2.0, 7.0)));
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 3);
diff --git a/src/test/java/com/yahoo/sketches/pig/quantiles/GetPmfFromStringsSketchTest.java b/src/test/java/org/apache/datasketches/pig/quantiles/GetPmfFromStringsSketchTest.java
similarity index 67%
rename from src/test/java/com/yahoo/sketches/pig/quantiles/GetPmfFromStringsSketchTest.java
rename to src/test/java/org/apache/datasketches/pig/quantiles/GetPmfFromStringsSketchTest.java
index ff87598..3c7cd6c 100644
--- a/src/test/java/com/yahoo/sketches/pig/quantiles/GetPmfFromStringsSketchTest.java
+++ b/src/test/java/org/apache/datasketches/pig/quantiles/GetPmfFromStringsSketchTest.java
@@ -1,12 +1,27 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.quantiles;
-import com.yahoo.sketches.ArrayOfItemsSerDe;
-import com.yahoo.sketches.ArrayOfStringsSerDe;
-import com.yahoo.sketches.quantiles.ItemsSketch;
+package org.apache.datasketches.pig.quantiles;
+
+import org.apache.datasketches.ArrayOfItemsSerDe;
+import org.apache.datasketches.ArrayOfStringsSerDe;
+import org.apache.datasketches.quantiles.ItemsSketch;
import java.util.Arrays;
import java.util.Comparator;
@@ -19,6 +34,7 @@
import org.testng.annotations.Test;
import org.testng.Assert;
+@SuppressWarnings("javadoc")
public class GetPmfFromStringsSketchTest {
private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
@@ -37,7 +53,9 @@
public void normalCase() throws Exception {
EvalFunc<Tuple> func = new GetPmfFromStringsSketch();
ItemsSketch<String> sketch = ItemsSketch.getInstance(COMPARATOR);
- for (int i = 1; i <= 10; i++) sketch.update(String.format("%02d", i));
+ for (int i = 1; i <= 10; i++) {
+ sketch.update(String.format("%02d", i));
+ }
Tuple resultTuple = func.exec(TUPLE_FACTORY.newTuple(Arrays.asList(new DataByteArray(sketch.toByteArray(SER_DE)), "02", "07")));
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 3);
diff --git a/src/test/java/com/yahoo/sketches/pig/quantiles/GetQuantileFromDoublesSketchTest.java b/src/test/java/org/apache/datasketches/pig/quantiles/GetQuantileFromDoublesSketchTest.java
similarity index 65%
rename from src/test/java/com/yahoo/sketches/pig/quantiles/GetQuantileFromDoublesSketchTest.java
rename to src/test/java/org/apache/datasketches/pig/quantiles/GetQuantileFromDoublesSketchTest.java
index 4179703..afa026d 100644
--- a/src/test/java/com/yahoo/sketches/pig/quantiles/GetQuantileFromDoublesSketchTest.java
+++ b/src/test/java/org/apache/datasketches/pig/quantiles/GetQuantileFromDoublesSketchTest.java
@@ -1,11 +1,26 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.quantiles;
-import com.yahoo.sketches.quantiles.DoublesSketch;
-import com.yahoo.sketches.quantiles.UpdateDoublesSketch;
+package org.apache.datasketches.pig.quantiles;
+
+import org.apache.datasketches.quantiles.DoublesSketch;
+import org.apache.datasketches.quantiles.UpdateDoublesSketch;
import java.util.Arrays;
@@ -16,6 +31,7 @@
import org.testng.annotations.Test;
import org.testng.Assert;
+@SuppressWarnings("javadoc")
public class GetQuantileFromDoublesSketchTest {
private static final TupleFactory tupleFactory = TupleFactory.getInstance();
diff --git a/src/test/java/com/yahoo/sketches/pig/quantiles/GetQuantileFromStringsSketchTest.java b/src/test/java/org/apache/datasketches/pig/quantiles/GetQuantileFromStringsSketchTest.java
similarity index 67%
rename from src/test/java/com/yahoo/sketches/pig/quantiles/GetQuantileFromStringsSketchTest.java
rename to src/test/java/org/apache/datasketches/pig/quantiles/GetQuantileFromStringsSketchTest.java
index e57099e..68417a2 100644
--- a/src/test/java/com/yahoo/sketches/pig/quantiles/GetQuantileFromStringsSketchTest.java
+++ b/src/test/java/org/apache/datasketches/pig/quantiles/GetQuantileFromStringsSketchTest.java
@@ -1,12 +1,27 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.quantiles;
-import com.yahoo.sketches.ArrayOfItemsSerDe;
-import com.yahoo.sketches.ArrayOfStringsSerDe;
-import com.yahoo.sketches.quantiles.ItemsSketch;
+package org.apache.datasketches.pig.quantiles;
+
+import org.apache.datasketches.ArrayOfItemsSerDe;
+import org.apache.datasketches.ArrayOfStringsSerDe;
+import org.apache.datasketches.quantiles.ItemsSketch;
import java.util.Arrays;
import java.util.Comparator;
@@ -18,6 +33,7 @@
import org.testng.annotations.Test;
import org.testng.Assert;
+@SuppressWarnings("javadoc")
public class GetQuantileFromStringsSketchTest {
private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
diff --git a/src/test/java/com/yahoo/sketches/pig/quantiles/GetQuantilesFromDoublesSketchTest.java b/src/test/java/org/apache/datasketches/pig/quantiles/GetQuantilesFromDoublesSketchTest.java
similarity index 75%
rename from src/test/java/com/yahoo/sketches/pig/quantiles/GetQuantilesFromDoublesSketchTest.java
rename to src/test/java/org/apache/datasketches/pig/quantiles/GetQuantilesFromDoublesSketchTest.java
index 805c4e2..9cdb18d 100644
--- a/src/test/java/com/yahoo/sketches/pig/quantiles/GetQuantilesFromDoublesSketchTest.java
+++ b/src/test/java/org/apache/datasketches/pig/quantiles/GetQuantilesFromDoublesSketchTest.java
@@ -1,11 +1,26 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package com.yahoo.sketches.pig.quantiles;
-import com.yahoo.sketches.quantiles.DoublesSketch;
-import com.yahoo.sketches.quantiles.UpdateDoublesSketch;
+package org.apache.datasketches.pig.quantiles;
+
+import org.apache.datasketches.quantiles.DoublesSketch;
+import org.apache.datasketches.quantiles.UpdateDoublesSketch;
import java.util.Arrays;
@@ -17,6 +32,7 @@
import org.testng.annotations.Test;
import org.testng.Assert;
+@SuppressWarnings("javadoc")
public class GetQuantilesFromDoublesSketchTest {
private static final TupleFactory tupleFactory = TupleFactory.getInstance();
@@ -60,7 +76,9 @@
public void oneFraction() throws Exception {
EvalFunc<Tuple> func = new GetQuantilesFromDoublesSketch();
UpdateDoublesSketch sketch = DoublesSketch.builder().build();
- for (int i = 1; i <= 10; i++) sketch.update(i);
+ for (int i = 1; i <= 10; i++) {
+ sketch.update(i);
+ }
Tuple resultTuple = func.exec(tupleFactory.newTuple(Arrays.asList(new DataByteArray(sketch.toByteArray()), 0.5)));
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 1);
@@ -71,7 +89,9 @@
public void severalFractions() throws Exception {
EvalFunc<Tuple> func = new GetQuantilesFromDoublesSketch();
UpdateDoublesSketch sketch = DoublesSketch.builder().build();
- for (int i = 1; i <= 10; i++) sketch.update(i);
+ for (int i = 1; i <= 10; i++) {
+ sketch.update(i);
+ }
Tuple resultTuple = func.exec(tupleFactory.newTuple(Arrays.asList(new DataByteArray(sketch.toByteArray()), 0.0, 0.5, 1.0)));
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 3);
@@ -84,7 +104,9 @@
public void numberOfEvenlySpacedIntervals() throws Exception {
EvalFunc<Tuple> func = new GetQuantilesFromDoublesSketch();
UpdateDoublesSketch sketch = DoublesSketch.builder().build();
- for (int i = 1; i <= 10; i++) sketch.update(i);
+ for (int i = 1; i <= 10; i++) {
+ sketch.update(i);
+ }
Tuple resultTuple = func.exec(tupleFactory.newTuple(Arrays.asList(new DataByteArray(sketch.toByteArray()), 3)));
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 3);
diff --git a/src/test/java/com/yahoo/sketches/pig/quantiles/GetQuantilesFromStringsSketchTest.java b/src/test/java/org/apache/datasketches/pig/quantiles/GetQuantilesFromStringsSketchTest.java
similarity index 74%
rename from src/test/java/com/yahoo/sketches/pig/quantiles/GetQuantilesFromStringsSketchTest.java
rename to src/test/java/org/apache/datasketches/pig/quantiles/GetQuantilesFromStringsSketchTest.java
index 3855f23..c3f3976 100644
--- a/src/test/java/com/yahoo/sketches/pig/quantiles/GetQuantilesFromStringsSketchTest.java
+++ b/src/test/java/org/apache/datasketches/pig/quantiles/GetQuantilesFromStringsSketchTest.java
@@ -1,12 +1,27 @@
/*
- * Copyright 2016, Yahoo! Inc.
- * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing per