Re-structure and design preparator and algo.
diff --git a/README.md b/README.md
index 75efa05..95a10b8 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,11 @@
 
 # Release Information
 
+## Version 4.0
+
+Re-structure and design preparator and algo. less memory usage and run time is faster.
+Move BIDMach, VW & SPPMI algo changes to `bidmach` branch temporarily.
+
 ## Version 3.1
 
 Fix DataSource to read "content", "e-mail", and use label "spam" for tutorial data.
diff --git a/build.sbt b/build.sbt
index 31188ae..62021b9 100644
--- a/build.sbt
+++ b/build.sbt
@@ -1,22 +1,12 @@
-
 name := "org.template.textclassification"
 
 organization := "io.prediction"
 
 scalaVersion := "2.10.5"
 
-libraryDependencies ++= Seq(
-  "io.prediction"    % "core_2.10"        % pioVersion.value % "provided",
-  "org.apache.spark" %% "spark-core" % "1.4.1" % "provided",
-  "org.apache.spark" %% "spark-mllib" % "1.4.1" % "provided",
-  "com.github.fommil.netlib" % "all" % "1.1.2" pomOnly(),
-  "com.github.johnlangford" % "vw-jni" % "8.0.0",
-  "org.xerial.snappy" % "snappy-java" % "1.1.1.7"
-)
+organization := "io.prediction"
 
-mergeStrategy in assembly <<= (mergeStrategy in assembly) { (old) =>
-  {
-    case y if y.startsWith("doc")     => MergeStrategy.discard
-    case x => old(x)
-  }
-}
+libraryDependencies ++= Seq(
+  "io.prediction"    %% "core"     % pioVersion.value % "provided",
+  "org.apache.spark" %% "spark-core"    % "1.4.1" % "provided",
+  "org.apache.spark" %% "spark-mllib"   % "1.4.1" % "provided")
diff --git a/data/.DS_Store b/data/.DS_Store
deleted file mode 100644
index 5008ddf..0000000
--- a/data/.DS_Store
+++ /dev/null
Binary files differ
diff --git a/getnativepath.java b/getnativepath.java
deleted file mode 100644
index 9a7f2c0..0000000
--- a/getnativepath.java
+++ /dev/null
@@ -1,7 +0,0 @@
-public class getnativepath {
-    public static void main(String [] args) 
-    {
-        String v = System.getProperty("java.library.path");
-        System.out.print(v);
-    }
-}
\ No newline at end of file
diff --git a/lib/Apache_Commons_Math_LICENSE.txt b/lib/Apache_Commons_Math_LICENSE.txt
deleted file mode 100755
index 0333373..0000000
--- a/lib/Apache_Commons_Math_LICENSE.txt
+++ /dev/null
@@ -1,387 +0,0 @@
-
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-
-
-APACHE COMMONS MATH DERIVATIVE WORKS: 
-
-The Apache commons-math library includes a number of subcomponents
-whose implementation is derived from original sources written
-in C or Fortran.  License terms of the original sources
-are reproduced below.
-
-===============================================================================
-For the lmder, lmpar and qrsolv Fortran routine from minpack and translated in
-the LevenbergMarquardtOptimizer class in package
-org.apache.commons.math3.optimization.general 
-Original source copyright and license statement:
-
-Minpack Copyright Notice (1999) University of Chicago.  All rights reserved
-
-Redistribution and use in source and binary forms, with or
-without modification, are permitted provided that the
-following conditions are met:
-
-1. Redistributions of source code must retain the above
-copyright notice, this list of conditions and the following
-disclaimer.
-
-2. Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following
-disclaimer in the documentation and/or other materials
-provided with the distribution.
-
-3. The end-user documentation included with the
-redistribution, if any, must include the following
-acknowledgment:
-
-   "This product includes software developed by the
-   University of Chicago, as Operator of Argonne National
-   Laboratory.
-
-Alternately, this acknowledgment may appear in the software
-itself, if and wherever such third-party acknowledgments
-normally appear.
-
-4. WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS"
-WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE
-UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND
-THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES
-OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE
-OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY
-OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR
-USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF
-THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4)
-DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION
-UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL
-BE CORRECTED.
-
-5. LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT
-HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF
-ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT,
-INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF
-ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF
-PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER
-SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT
-(INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE,
-EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE
-POSSIBILITY OF SUCH LOSS OR DAMAGES.
-===============================================================================
-
-Copyright and license statement for the odex Fortran routine developed by
-E. Hairer and G. Wanner and translated in GraggBulirschStoerIntegrator class
-in package org.apache.commons.math3.ode.nonstiff:
-
-
-Copyright (c) 2004, Ernst Hairer
-
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are 
-met:
-
-- Redistributions of source code must retain the above copyright 
-notice, this list of conditions and the following disclaimer.
-
-- Redistributions in binary form must reproduce the above copyright 
-notice, this list of conditions and the following disclaimer in the 
-documentation and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-===============================================================================
-
-Copyright and license statement for the original lapack fortran routines
-translated in EigenDecompositionImpl class in package
-org.apache.commons.math3.linear:
-
-Copyright (c) 1992-2008 The University of Tennessee.  All rights reserved.
-
-$COPYRIGHT$
-
-Additional copyrights may follow
-
-$HEADER$
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-- Redistributions of source code must retain the above copyright
-  notice, this list of conditions and the following disclaimer. 
-  
-- Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions and the following disclaimer listed
-  in this license in the documentation and/or other materials
-  provided with the distribution.
-  
-- Neither the name of the copyright holders nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-  
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT  
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT  
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
-===============================================================================
-
-Copyright and license statement for the original Mersenne twister C
-routines translated in MersenneTwister class in package 
-org.apache.commons.math3.random:
-
-   Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
-   All rights reserved.                          
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions
-   are met:
-
-     1. Redistributions of source code must retain the above copyright
-        notice, this list of conditions and the following disclaimer.
-
-     2. Redistributions in binary form must reproduce the above copyright
-        notice, this list of conditions and the following disclaimer in the
-        documentation and/or other materials provided with the distribution.
-
-     3. The names of its contributors may not be used to endorse or promote 
-        products derived from this software without specific prior written 
-        permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-===============================================================================
-
-The class "org.apache.commons.math3.exception.util.LocalizedFormatsTest" is
-an adapted version of "OrekitMessagesTest" test class for the Orekit library
-The "org.apache.commons.math3.analysis.interpolation.HermiteInterpolator"
-has been imported from the Orekit space flight dynamics library.
-
-Th Orekit library is described at:
-  https://www.orekit.org/forge/projects/orekit
-The original files are distributed under the terms of the Apache 2 license
-which is: Copyright 2010 CS Communication & Systèmes
diff --git a/lib/Apache_License.txt b/lib/Apache_License.txt
deleted file mode 100644
index d645695..0000000
--- a/lib/Apache_License.txt
+++ /dev/null
@@ -1,202 +0,0 @@
-
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/lib/BIDMach.jar b/lib/BIDMach.jar
deleted file mode 100644
index 26cc39e..0000000
--- a/lib/BIDMach.jar
+++ /dev/null
Binary files differ
diff --git a/lib/BIDMat.jar b/lib/BIDMat.jar
deleted file mode 100755
index 8d46f5c..0000000
--- a/lib/BIDMat.jar
+++ /dev/null
Binary files differ
diff --git a/lib/HDF5_Copyright.html b/lib/HDF5_Copyright.html
deleted file mode 100644
index 07a71f4..0000000
--- a/lib/HDF5_Copyright.html
+++ /dev/null
@@ -1,160 +0,0 @@
-<html><head>

-<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1">

-    <title>

-      HDF5 Copyright Notice and License Terms

-    </title>

-  </head>

-

-<body bgcolor="#FFFFFF">

-<!-- NEW PAGE -->

-

-

-<hr>               

-

-<h3>Copyright Notice and License Terms for

-<br>

-HDF5 (Hierarchical Data Format 5) Software Library and Utilities</h3>

-<hr>               

-<p>

-

-

-HDF5 (Hierarchical Data Format 5) Software Library and Utilities

-<br>

-Copyright 2006-2012 by The HDF Group.

-</p><p>

-NCSA HDF5 (Hierarchical Data Format 5) Software Library and Utilities

-<br>

-Copyright 1998-2006 by the Board of Trustees of the University of Illinois.

-</p><p>

-<strong>All rights reserved.</strong>

-</p><p>

-

-</p><p>

-Redistribution and use in source and binary forms, with or without 

-modification, are permitted for any purpose (including commercial purposes) 

-provided that the following conditions are met:

-

-</p><p>

-</p><ol>

-<li>

-Redistributions of source code must retain the above copyright notice, 

-this list of conditions, and the following disclaimer.

-

-</li><li>

-Redistributions in binary form must reproduce the above copyright notice, 

-this list of conditions, and the following disclaimer in the documentation 

-and/or materials provided with the distribution.

-

-</li><li>

-In addition, redistributions of modified forms of the source or binary code 

-must carry prominent notices stating that the original code was changed and 

-the date of the change.

-

-</li><li>

-All publications or advertising materials mentioning features or use of this 

-software are asked, but not required, to acknowledge that it was developed 

-by The HDF Group and by the National Center for Supercomputing Applications 

-at the University of Illinois at Urbana-Champaign and credit the contributors.

-

-</li><li>

-Neither the name of The HDF Group, the name of the University, nor the name 

-of any Contributor may be used to endorse or promote products derived from 

-this software without specific prior written permission from The HDF Group, 

-the University, or the Contributor, respectively.

-</li></ol>

-

-<p>

-<b>DISCLAIMER:</b>

-THIS SOFTWARE IS PROVIDED BY THE HDF GROUP AND THE CONTRIBUTORS 

-"AS IS" WITH NO WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED.  

-In no event shall The HDF Group or the Contributors be liable for any damages 

-suffered by the users arising out of the use of this software, even if advised 

-of the possibility of such damage. 

-

-

-</p><hr>

-<hr>

-

-<p>

-Contributors:   National Center for Supercomputing Applications  (NCSA) at 

-the University of Illinois, Fortner Software, Unidata Program Center (netCDF), 

-The Independent JPEG Group (JPEG), Jean-loup Gailly and Mark Adler (gzip), 

-and Digital Equipment Corporation (DEC).

-

-</p><hr>

-

-<p>

-Portions of HDF5 were developed with support from the Lawrence Berkeley 

-National Laboratory (LBNL) and the United States Department of Energy 

-under Prime Contract No. DE-AC02-05CH11231.

-

-</p><hr>

-

-<p>

-Portions of HDF5 were developed with support from the University of 

-California, Lawrence Livermore National Laboratory (UC LLNL). 

-The following statement applies to those portions of the product and must 

-be retained in any redistribution of source code, binaries, documentation, 

-and/or accompanying materials:

-</p><dir>

-        This work was partially produced at the University of California, 

-        Lawrence Livermore National Laboratory (UC LLNL) under contract 

-        no. W-7405-ENG-48 (Contract 48) between the U.S. Department of 

-        Energy (DOE) and The Regents of the University of California 

-        (University) for the operation of UC LLNL.

-	<p>

-	<b>DISCLAIMER:</b>

-        This work was prepared as an account of work sponsored by an agency 

-        of the United States Government. Neither the United States Government 

-        nor the University of California nor any of their employees, makes 

-        any warranty, express or implied, or assumes any liability or 

-        responsibility for the accuracy, completeness, or usefulness of any 

-        information, apparatus, product, or process disclosed, or represents 

-        that its use would not infringe privately- owned rights. Reference 

-        herein to any specific commercial products, process, or service by 

-        trade name, trademark, manufacturer, or otherwise, does not 

-        necessarily constitute or imply its endorsement, recommendation, or 

-        favoring by the United States Government or the University of 

-        California. The views and opinions of authors expressed herein do not 

-        necessarily state or reflect those of the United States Government or 

-        the University of California, and shall not be used for advertising 

-        or product endorsement purposes.

-</p></dir>

-

-<hr>

-

-<!--    DO NOT EDIT THE FOLLOWING 8 LINES;                           -->

-<!--    THEY ARE AUTOMATICALLY UPDATED BY DOCUMENTATION SOFTWARE.    -->

-

-<!-- #BeginLibraryItem "/ed_libs/Footer.lbi" -->

-<address>

-<table border="0" width="100%">

-  <tbody><tr valign="top">

-      <td align="left">

-          <address>

-          The HDF Group Help Desk: <img src="HDF5_help.png" align="top" height="16">

-          <br>

-          Describes HDF5 Release 1.8.9, May 2012.

-          </address>

-      </td><td width="5%">&nbsp;</td>

-      <td align="right">

-          <a href="http://www.hdfgroup.org/HDF5/doc/Copyright.html">Copyright</a> by

-          <a href="http://www.hdfgroup.org/">The HDF Group</a>

-          <br>

-          and the Board of Trustees of the University of Illinois

-      </td>   

-  </tr>   

-</tbody></table>

-</address>

-<!-- #EndLibraryItem --><script language="JAVASCRIPT">

-<!--

-document.writeln("Last modified: 5 March 2012")

--->

-</script>Last modified: 5 March 2012

-

-

-

-

-

-

-</body></html>

diff --git a/lib/IScala_license.txt b/lib/IScala_license.txt
deleted file mode 100755
index 0f0a0d0..0000000
--- a/lib/IScala_license.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-Copyright (c) 2013-2014 by Mateusz Paprocki and contributors.

-

-Permission is hereby granted, free of charge, to any person obtaining

-a copy of this software and associated documentation files (the

-"Software"), to deal in the Software without restriction, including

-without limitation the rights to use, copy, modify, merge, publish,

-distribute, sublicense, and/or sell copies of the Software, and to

-permit persons to whom the Software is furnished to do so, subject to

-the following conditions:

-

-The above copyright notice and this permission notice shall be

-included in all copies or substantial portions of the Software.

-

-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND

-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE

-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION

-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION

-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

diff --git a/lib/JCUDA_Copyright.txt b/lib/JCUDA_Copyright.txt
deleted file mode 100644
index a47ba68..0000000
--- a/lib/JCUDA_Copyright.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-JCuda - Java bindings for NVIDIA CUDA

-

-Copyright (c) 2008-2012 Marco Hutter - http://www.jcuda.org

-

-Permission is hereby granted, free of charge, to any person

-obtaining a copy of this software and associated documentation

-files (the "Software"), to deal in the Software without

-restriction, including without limitation the rights to use,

-copy, modify, merge, publish, distribute, sublicense, and/or sell

-copies of the Software, and to permit persons to whom the

-Software is furnished to do so, subject to the following

-conditions:

-

-The above copyright notice and this permission notice shall be

-included in all copies or substantial portions of the Software.

-

-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES

-OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND

-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT

-HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,

-WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR

-OTHER DEALINGS IN THE SOFTWARE.

diff --git a/lib/License.txt b/lib/License.txt
deleted file mode 100644
index 5faa1fb..0000000
--- a/lib/License.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-

-JCuda - Java bindings for NVIDIA CUDA

-

-Copyright (c) 2008-2015 Marco Hutter - http://www.jcuda.org

-

-Permission is hereby granted, free of charge, to any person

-obtaining a copy of this software and associated documentation

-files (the "Software"), to deal in the Software without

-restriction, including without limitation the rights to use,

-copy, modify, merge, publish, distribute, sublicense, and/or sell

-copies of the Software, and to permit persons to whom the

-Software is furnished to do so, subject to the following

-conditions:

-

-The above copyright notice and this permission notice shall be

-included in all copies or substantial portions of the Software.

-

-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES

-OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND

-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT

-HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,

-WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR

-OTHER DEALINGS IN THE SOFTWARE.

-

diff --git a/lib/PtPlot_Copyright.txt b/lib/PtPlot_Copyright.txt
deleted file mode 100755
index 7da2f50..0000000
--- a/lib/PtPlot_Copyright.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-Below is the copyright agreement for the Ptolemy II system.
-Version: $Id: copyright.txt 57469 2010-03-10 22:04:46Z cxh $
-
-Copyright (c) 1995-2010 The Regents of the University of California.
-All rights reserved.
-
-Permission is hereby granted, without written agreement and without
-license or royalty fees, to use, copy, modify, and distribute this
-software and its documentation for any purpose, provided that the above
-copyright notice and the following two paragraphs appear in all copies
-of this software.
-
-IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
-FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
-ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
-THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGE.
-
-THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
-INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
-PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
-CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
-ENHANCEMENTS, OR MODIFICATIONS.
-
-Ptolemy II includes the work of others, to see those copyrights, follow
-the copyright link on the splash page or see copyright.htm.
diff --git a/lib/Scala_License.txt b/lib/Scala_License.txt
deleted file mode 100755
index ba8cd07..0000000
--- a/lib/Scala_License.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-SCALA LICENSE

-

-Copyright (c) 2002-2013 EPFL, Lausanne, unless otherwise specified.

-All rights reserved.

-

-This software was developed by the Programming Methods Laboratory of the

-Swiss Federal Institute of Technology (EPFL), Lausanne, Switzerland.

-

-Permission to use, copy, modify, and distribute this software in source

-or binary form for any purpose with or without fee is hereby granted,

-provided that the following conditions are met:

-

-   1. Redistributions of source code must retain the above copyright

-      notice, this list of conditions and the following disclaimer.

-

-   2. Redistributions in binary form must reproduce the above copyright

-      notice, this list of conditions and the following disclaimer in the

-      documentation and/or other materials provided with the distribution.

-

-   3. Neither the name of the EPFL nor the names of its contributors

-      may be used to endorse or promote products derived from this

-      software without specific prior written permission.

-

-

-THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND

-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

-ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE

-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR

-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER

-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY

-OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

-SUCH DAMAGE.
\ No newline at end of file
diff --git a/lib/bidmach_init.scala b/lib/bidmach_init.scala
deleted file mode 100755
index ea021f7..0000000
--- a/lib/bidmach_init.scala
+++ /dev/null
@@ -1,16 +0,0 @@
-import BIDMat.{CMat,CSMat,DMat,Dict,FMat,FND,GMat,GDMat,GIMat,GLMat,GSMat,GSDMat,HMat,IDict,Image,IMat,LMat,Mat,SMat,SBMat,SDMat}
-import BIDMat.MatFunctions._
-import BIDMat.SciFunctions._
-import BIDMat.Solvers._
-import BIDMat.Plotting._
-import BIDMach.Learner
-import BIDMach.models.{FM,GLM,KMeans,KMeansw,LDA,LDAgibbs,Model,NMF,SFA,RandomForest}
-import BIDMach.networks.{DNN}
-import BIDMach.datasources.{DataSource,MatDS,FilesDS,SFilesDS}
-import BIDMach.mixins.{CosineSim,Perplexity,Top,L1Regularizer,L2Regularizer}
-import BIDMach.updaters.{ADAGrad,Batch,BatchNorm,IncMult,IncNorm,Telescoping}
-import BIDMach.causal.{IPTW}
-
-Mat.checkMKL
-Mat.checkCUDA
-
diff --git a/lib/commons-math3-3.2.jar b/lib/commons-math3-3.2.jar
deleted file mode 100644
index f8b7db2..0000000
--- a/lib/commons-math3-3.2.jar
+++ /dev/null
Binary files differ
diff --git a/lib/jcublas-0.7.0a.jar b/lib/jcublas-0.7.0a.jar
deleted file mode 100644
index 9613fb0..0000000
--- a/lib/jcublas-0.7.0a.jar
+++ /dev/null
Binary files differ
diff --git a/lib/jcuda-0.7.0a.jar b/lib/jcuda-0.7.0a.jar
deleted file mode 100644
index 5ce4c19..0000000
--- a/lib/jcuda-0.7.0a.jar
+++ /dev/null
Binary files differ
diff --git a/lib/jcufft-0.7.0a.jar b/lib/jcufft-0.7.0a.jar
deleted file mode 100644
index 72827d4..0000000
--- a/lib/jcufft-0.7.0a.jar
+++ /dev/null
Binary files differ
diff --git a/lib/jcurand-0.7.0a.jar b/lib/jcurand-0.7.0a.jar
deleted file mode 100644
index f51034a..0000000
--- a/lib/jcurand-0.7.0a.jar
+++ /dev/null
Binary files differ
diff --git a/lib/jcusolver-0.7.0a.jar b/lib/jcusolver-0.7.0a.jar
deleted file mode 100644
index 090f0b4..0000000
--- a/lib/jcusolver-0.7.0a.jar
+++ /dev/null
Binary files differ
diff --git a/lib/jcusparse-0.7.0a.jar b/lib/jcusparse-0.7.0a.jar
deleted file mode 100644
index 8fbd9f9..0000000
--- a/lib/jcusparse-0.7.0a.jar
+++ /dev/null
Binary files differ
diff --git a/lib/jhdf5.jar b/lib/jhdf5.jar
deleted file mode 100644
index 9d15b7d..0000000
--- a/lib/jhdf5.jar
+++ /dev/null
Binary files differ
diff --git a/lib/jline-2.11.jar b/lib/jline-2.11.jar
deleted file mode 100755
index 9604bd2..0000000
--- a/lib/jline-2.11.jar
+++ /dev/null
Binary files differ
diff --git a/lib/junit-4.11.jar b/lib/junit-4.11.jar
deleted file mode 100755
index aaf7444..0000000
--- a/lib/junit-4.11.jar
+++ /dev/null
Binary files differ
diff --git a/lib/libJCublas-linux-x86_64.so b/lib/libJCublas-linux-x86_64.so
deleted file mode 100644
index eb729eb..0000000
--- a/lib/libJCublas-linux-x86_64.so
+++ /dev/null
Binary files differ
diff --git a/lib/libJCublas2-linux-x86_64.so b/lib/libJCublas2-linux-x86_64.so
deleted file mode 100644
index d15af99..0000000
--- a/lib/libJCublas2-linux-x86_64.so
+++ /dev/null
Binary files differ
diff --git a/lib/libJCudaDriver-linux-x86_64.so b/lib/libJCudaDriver-linux-x86_64.so
deleted file mode 100644
index 23c5bec..0000000
--- a/lib/libJCudaDriver-linux-x86_64.so
+++ /dev/null
Binary files differ
diff --git a/lib/libJCudaRuntime-linux-x86_64.so b/lib/libJCudaRuntime-linux-x86_64.so
deleted file mode 100644
index 34d276f..0000000
--- a/lib/libJCudaRuntime-linux-x86_64.so
+++ /dev/null
Binary files differ
diff --git a/lib/libJCufft-linux-x86_64.so b/lib/libJCufft-linux-x86_64.so
deleted file mode 100644
index 387ef92..0000000
--- a/lib/libJCufft-linux-x86_64.so
+++ /dev/null
Binary files differ
diff --git a/lib/libJCurand-linux-x86_64.so b/lib/libJCurand-linux-x86_64.so
deleted file mode 100644
index 15079cc..0000000
--- a/lib/libJCurand-linux-x86_64.so
+++ /dev/null
Binary files differ
diff --git a/lib/libJCusolver-linux-x86_64.so b/lib/libJCusolver-linux-x86_64.so
deleted file mode 100644
index 0a15dc4..0000000
--- a/lib/libJCusolver-linux-x86_64.so
+++ /dev/null
Binary files differ
diff --git a/lib/libJCusparse-linux-x86_64.so b/lib/libJCusparse-linux-x86_64.so
deleted file mode 100644
index 71a47f8..0000000
--- a/lib/libJCusparse-linux-x86_64.so
+++ /dev/null
Binary files differ
diff --git a/lib/libbidmachcpu-linux-x86_64.so b/lib/libbidmachcpu-linux-x86_64.so
deleted file mode 100755
index fb86860..0000000
--- a/lib/libbidmachcpu-linux-x86_64.so
+++ /dev/null
Binary files differ
diff --git a/lib/libbidmachcuda-linux-x86_64.so b/lib/libbidmachcuda-linux-x86_64.so
deleted file mode 100755
index 68c31ba..0000000
--- a/lib/libbidmachcuda-linux-x86_64.so
+++ /dev/null
Binary files differ
diff --git a/lib/libbidmatcuda-linux-x86_64.so b/lib/libbidmatcuda-linux-x86_64.so
deleted file mode 100755
index 3ceacfd..0000000
--- a/lib/libbidmatcuda-linux-x86_64.so
+++ /dev/null
Binary files differ
diff --git a/lib/libbidmatmkl-linux-x86_64.so b/lib/libbidmatmkl-linux-x86_64.so
deleted file mode 100755
index af19147..0000000
--- a/lib/libbidmatmkl-linux-x86_64.so
+++ /dev/null
Binary files differ
diff --git a/lib/libcaffe-linux-x86_64.so b/lib/libcaffe-linux-x86_64.so
deleted file mode 100755
index 0aae3ec..0000000
--- a/lib/libcaffe-linux-x86_64.so
+++ /dev/null
Binary files differ
diff --git a/lib/libcaffe.so b/lib/libcaffe.so
deleted file mode 100755
index e6d83d8..0000000
--- a/lib/libcaffe.so
+++ /dev/null
Binary files differ
diff --git a/lib/libiomp5.so b/lib/libiomp5.so
deleted file mode 100755
index 3b9e725..0000000
--- a/lib/libiomp5.so
+++ /dev/null
Binary files differ
diff --git a/lib/libjhdf.so b/lib/libjhdf.so
deleted file mode 100755
index ff6304c..0000000
--- a/lib/libjhdf.so
+++ /dev/null
Binary files differ
diff --git a/lib/libjhdf5-linux-x86_64.so b/lib/libjhdf5-linux-x86_64.so
deleted file mode 100755
index c3dcb2d..0000000
--- a/lib/libjhdf5-linux-x86_64.so
+++ /dev/null
Binary files differ
diff --git a/lib/libjhdf5.so b/lib/libjhdf5.so
deleted file mode 100755
index c3dcb2d..0000000
--- a/lib/libjhdf5.so
+++ /dev/null
Binary files differ
diff --git a/lib/lz4-1.3.jar b/lib/lz4-1.3.jar
deleted file mode 100755
index ffa8eaf..0000000
--- a/lib/lz4-1.3.jar
+++ /dev/null
Binary files differ
diff --git a/lib/ptplot.jar b/lib/ptplot.jar
deleted file mode 100644
index 9582f1c..0000000
--- a/lib/ptplot.jar
+++ /dev/null
Binary files differ
diff --git a/lib/ptplotapplication.jar b/lib/ptplotapplication.jar
deleted file mode 100755
index cc32dd0..0000000
--- a/lib/ptplotapplication.jar
+++ /dev/null
Binary files differ
diff --git a/src/main/scala/org/template/textclassification/DataSource.scala b/src/main/scala/DataSource.scala
similarity index 65%
rename from src/main/scala/org/template/textclassification/DataSource.scala
rename to src/main/scala/DataSource.scala
index 6a29909..be81932 100644
--- a/src/main/scala/org/template/textclassification/DataSource.scala
+++ b/src/main/scala/DataSource.scala
@@ -1,40 +1,40 @@
 package org.template.textclassification
 
-import grizzled.slf4j.Logger
-import io.prediction.controller.EmptyEvaluationInfo
-import io.prediction.controller.Params
 import io.prediction.controller.PDataSource
+import io.prediction.controller.EmptyEvaluationInfo
+import io.prediction.controller.EmptyActualResult
+import io.prediction.controller.Params
 import io.prediction.controller.SanityCheck
+import io.prediction.data.storage.Event
 import io.prediction.data.store.PEventStore
+
 import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
 import org.apache.spark.rdd.RDD
 
+import grizzled.slf4j.Logger
 
-
-// 1. Initialize your Data Source parameters. This is
-// where you specify your application name, MyTextApp,
-// and the number of folds that are to be used for
-// cross validation.
-
+/** Define Data Source parameters.
+  * appName is the application name.
+  * evalK is the the number of folds that are to be used for cross validation (optional)
+  */
 case class DataSourceParams(
-                             appName: String,
-                             evalK: Option[Int]
-                             ) extends Params
+    appName: String,
+    evalK: Option[Int]
+  ) extends Params
 
 
-
-// 2. Define your DataSource component. Remember, you must
-// implement a readTraining method, and, optionally, a
-// readEval method.
-
+/** Define your DataSource component. Remember, you must
+  * implement a readTraining method, and, optionally, a
+  * readEval method.
+  */
 class DataSource (
-                   val dsp : DataSourceParams
-                   ) extends PDataSource[TrainingData, EmptyEvaluationInfo, Query, ActualResult] {
+  val dsp : DataSourceParams
+) extends PDataSource[TrainingData, EmptyEvaluationInfo, Query, ActualResult] {
 
   @transient lazy val logger = Logger[this.type]
 
-  // Helper function used to store data given
-  // a SparkContext.
+  /** Helper function used to store data given a SparkContext. */
   private def readEventData(sc: SparkContext) : RDD[Observation] = {
     //Get RDD of Events.
     PEventStore.find(
@@ -54,8 +54,7 @@
     }).cache
   }
 
-  // Helper function used to store stop words from
-  // event server.
+  /** Helper function used to store stop words from event server. */
   private def readStopWords(sc : SparkContext) : Set[String] = {
     PEventStore.find(
       appName = dsp.appName,
@@ -69,16 +68,15 @@
       .toSet
   }
 
-
-  // Read in data and stop words from event server
-  // and store them in a TrainingData instance.
+  /** Read in data and stop words from event server
+    * and store them in a TrainingData instance.
+    */
   override
   def readTraining(sc: SparkContext): TrainingData = {
     new TrainingData(readEventData(sc), readStopWords(sc))
   }
 
-  // Used for evaluation: reads in event data and creates
-  // cross-validation folds.
+  /** Used for evaluation: reads in event data and creates cross-validation folds. */
   override
   def readEval(sc: SparkContext):
   Seq[(TrainingData, EmptyEvaluationInfo, RDD[(Query, ActualResult)])] = {
@@ -102,27 +100,28 @@
       (train, new EmptyEvaluationInfo, test)
     }
   }
+
 }
 
-
-// 3. Observation class serving as a wrapper for both our
-// data's class label and document string.
+/** Observation class serving as a wrapper for both our
+  * data's class label and document string.
+  */
 case class Observation(
-                        label : Double,
-                        text : String,
-                        category :String
-                        ) extends Serializable
+  label: Double,
+  text: String,
+  category: String
+) extends Serializable
 
-// 4. TrainingData class serving as a wrapper for all
-// read in from the Event Server.
+/** TrainingData class serving as a wrapper for all
+  * read in from the Event Server.
+  */
 class TrainingData(
-                    val data : RDD[Observation],
-                    val stopWords : Set[String]
-                    ) extends Serializable with SanityCheck {
+  val data : RDD[Observation],
+  val stopWords : Set[String]
+) extends Serializable with SanityCheck {
 
-  // Sanity check to make sure your data is being fed in correctly.
-
-  def sanityCheck {
+  /** Sanity check to make sure your data is being fed in correctly. */
+  def sanityCheck(): Unit = {
     try {
       val obs : Array[Double] = data.takeSample(false, 5).map(_.label)
 
diff --git a/src/main/scala/Engine.scala b/src/main/scala/Engine.scala
new file mode 100644
index 0000000..5cd5420
--- /dev/null
+++ b/src/main/scala/Engine.scala
@@ -0,0 +1,36 @@
+package org.template.textclassification
+
+import io.prediction.controller.IEngineFactory
+import io.prediction.controller.Engine
+
+/** Define Query class which serves as a wrapper for
+  * new text data.
+  */
+case class Query(text: String) extends Serializable
+
+/** Define PredictedResult class which serves as a
+  * wrapper for a predicted class label and the associated
+  * prediction confidence.
+  */
+case class PredictedResult(
+  category: String,
+  confidence: Double) extends Serializable
+
+/** Define ActualResult class which serves as a wrapper
+  * for an observation's true class label.
+  */
+case class ActualResult(category: String) extends Serializable
+
+/** Define Engine */
+object TextClassificationEngine extends IEngineFactory {
+  def apply() = {
+    new Engine(
+      classOf[DataSource],
+      classOf[Preparator],
+      Map(
+        "nb" -> classOf[NBAlgorithm],
+        "lr" -> classOf[LRAlgorithm]
+      ),
+      classOf[Serving])
+  }
+}
diff --git a/src/main/scala/org/template/textclassification/Evaluation.scala b/src/main/scala/Evaluation.scala
similarity index 65%
rename from src/main/scala/org/template/textclassification/Evaluation.scala
rename to src/main/scala/Evaluation.scala
index d42f5e1..3f3a070 100644
--- a/src/main/scala/org/template/textclassification/Evaluation.scala
+++ b/src/main/scala/Evaluation.scala
@@ -1,14 +1,16 @@
 package org.template.textclassification
 
-import io.prediction.controller._
+import io.prediction.controller.AverageMetric
+import io.prediction.controller.Evaluation
+import io.prediction.controller.EmptyEvaluationInfo
+import io.prediction.controller.EngineParamsGenerator
+import io.prediction.controller.EngineParams
 
-
-
-// 1. Create an accuracy metric for evaluating our supervised learning model.
+/** Create an accuracy metric for evaluating our supervised learning model. */
 case class Accuracy()
   extends AverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] {
 
-  // Method for calculating prediction accuracy.
+  /** Method for calculating prediction accuracy. */
   def calculate(
     query: Query,
     predicted: PredictedResult,
@@ -17,9 +19,9 @@
 }
 
 
-
-// 2. Define your evaluation object implementing the accuracy metric defined
-// above.
+/** Define your evaluation object implementing the accuracy metric defined
+  * above.
+  */
 object AccuracyEvaluation extends Evaluation {
 
   // Define Engine and Metric used in Evaluation.
@@ -29,15 +31,13 @@
   )
 }
 
-
-
-// 3. Set your engine parameters for evaluation procedure.
+/** Set your engine parameters for evaluation procedure.*/
 object EngineParamsList extends EngineParamsGenerator {
 
   // Set data source and preparator parameters.
   private[this] val baseEP = EngineParams(
     dataSourceParams = DataSourceParams(appName = "MyTextApp", evalK = Some(3)),
-    preparatorParams = PreparatorParams(nGram = 2, 5000, true) 
+    preparatorParams = PreparatorParams(nGram = 2, numFeatures = 500)
   )
 
   // Set the algorithm params for which we will assess an accuracy score.
diff --git a/src/main/scala/LRAlgorithm.scala b/src/main/scala/LRAlgorithm.scala
new file mode 100644
index 0000000..1f0fcd3
--- /dev/null
+++ b/src/main/scala/LRAlgorithm.scala
@@ -0,0 +1,111 @@
+package org.template.textclassification
+
+import io.prediction.controller.P2LAlgorithm
+import io.prediction.controller.Params
+
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.ml.classification.LogisticRegression
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.functions
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.UserDefinedFunction
+
+import grizzled.slf4j.Logger
+
+case class LRAlgorithmParams(regParam: Double) extends Params
+
+class LRAlgorithm(val ap: LRAlgorithmParams)
+  extends P2LAlgorithm[PreparedData, LRModel, Query, PredictedResult] {
+
+  @transient lazy val logger = Logger[this.type]
+
+  def train(sc: SparkContext, pd: PreparedData): LRModel = {
+
+    // Import SQLContext for creating DataFrame.
+    val sql: SQLContext = new SQLContext(sc)
+    import sql.implicits._
+
+    val lr = new LogisticRegression()
+      .setMaxIter(10)
+      .setThreshold(0.5)
+      .setRegParam(ap.regParam)
+
+    val labels: Seq[Double] = pd.categoryMap.keys.toSeq
+
+    val data = labels.foldLeft(pd.transformedData.toDF)( //transform to Spark DataFrame
+      // Add the different binary columns for each label.
+      (data: DataFrame, label: Double) => {
+        // function: multiclass labels --> binary labels
+        val f: UserDefinedFunction = functions.udf((e : Double) => if (e == label) 1.0 else 0.0)
+
+        data.withColumn(label.toInt.toString, f(data("label")))
+      }
+    )
+
+    // Create a logistic regression model for each class.
+    val lrModels : Seq[(Double, LREstimate)] = labels.map(
+      label => {
+        val lab = label.toInt.toString
+
+        val fit = lr.setLabelCol(lab).fit(
+          data.select(lab, "features")
+        )
+
+        // Return (label, feature coefficients, and intercept term.
+        (label, LREstimate(fit.weights.toArray, fit.intercept))
+
+      }
+    )
+
+    new LRModel(
+      tfIdf = pd.tfIdf,
+      categoryMap = pd.categoryMap,
+      lrModels = lrModels
+    )
+  }
+
+  def predict(model: LRModel, query: Query): PredictedResult = {
+    model.predict(query.text)
+  }
+}
+
+case class LREstimate (
+  coefficients : Array[Double],
+  intercept : Double
+) extends Serializable
+
+class LRModel(
+  val tfIdf: TFIDFModel,
+  val categoryMap: Map[Double, String],
+  val lrModels: Seq[(Double, LREstimate)]) extends Serializable {
+
+  /** Enable vector inner product for prediction. */
+  private def innerProduct (x : Array[Double], y : Array[Double]) : Double = {
+    x.zip(y).map(e => e._1 * e._2).sum
+  }
+
+  /** Define prediction rule. */
+  def predict(text: String): PredictedResult = {
+    val x: Array[Double] = tfIdf.transform(text).toArray
+
+    // Logistic Regression binary formula for positive probability.
+    // According to MLLib documentation, class labeled 0 is used as pivot.
+    // Thus, we are using:
+    // log(p1/p0) = log(p1/(1 - p1)) = b0 + xTb =: z
+    // p1 = exp(z) * (1 - p1)
+    // p1 * (1 + exp(z)) = exp(z)
+    // p1 = exp(z)/(1 + exp(z))
+    val pred = lrModels.map(
+      e => {
+        val z = scala.math.exp(innerProduct(e._2.coefficients, x) + e._2.intercept)
+        (e._1, z / (1 + z))
+      }
+    ).maxBy(_._2)
+
+    PredictedResult(categoryMap(pred._1), pred._2)
+  }
+
+  override def toString = s"LR model"
+}
diff --git a/src/main/scala/NBAlgorithm.scala b/src/main/scala/NBAlgorithm.scala
new file mode 100644
index 0000000..b3f6d08
--- /dev/null
+++ b/src/main/scala/NBAlgorithm.scala
@@ -0,0 +1,82 @@
+package org.template.textclassification
+
+import io.prediction.controller.P2LAlgorithm
+import io.prediction.controller.Params
+import org.apache.spark.SparkContext
+import org.apache.spark.mllib.classification.NaiveBayes
+import org.apache.spark.mllib.classification.NaiveBayesModel
+import org.apache.spark.mllib.linalg.Vector
+import com.github.fommil.netlib.F2jBLAS
+
+import scala.math._
+
+/** Define parameters for Supervised Learning Model. We are
+ * using a Naive Bayes classifier, which gives us only one
+ * hyperparameter in this stage.
+ */
+case class NBAlgorithmParams(lambda: Double) extends Params
+
+/** Define SupervisedAlgorithm class. */
+class NBAlgorithm(
+  val ap: NBAlgorithmParams
+) extends P2LAlgorithm[PreparedData, NBModel, Query, PredictedResult] {
+
+  /** Train your model. */
+  def train(sc: SparkContext, pd: PreparedData): NBModel = {
+    // Fit a Naive Bayes model using the prepared data.
+    val nb: NaiveBayesModel = NaiveBayes.train(pd.transformedData, ap.lambda)
+
+    new NBModel(
+      tfIdf = pd.tfIdf,
+      categoryMap = pd.categoryMap,
+      nb = nb)
+  }
+
+  /** Prediction method for trained model. */
+  def predict(model: NBModel, query: Query): PredictedResult = {
+    model.predict(query.text)
+  }
+}
+
+class NBModel(
+  val tfIdf: TFIDFModel,
+  val categoryMap: Map[Double, String],
+  val nb: NaiveBayesModel
+) extends Serializable {
+
+  private def innerProduct (x : Array[Double], y : Array[Double]) : Double = {
+    x.zip(y).map(e => e._1 * e._2).sum
+  }
+
+  val normalize = (u: Array[Double]) => {
+    val uSum = u.sum
+
+    u.map(e => e / uSum)
+  }
+
+  private val scoreArray = nb.pi.zip(nb.theta)
+
+  /** Given a document string, return a vector of corresponding
+    * class membership probabilities.
+    * Helper function used to normalize probability scores.
+    * Returns an object of type Array[Double]
+    */
+  private def getScores(doc: String): Array[Double] = {
+    // Vectorize query
+    val x: Vector = tfIdf.transform(doc)
+
+    val z = scoreArray
+      .map(e => innerProduct(e._2, x.toArray) + e._1)
+
+    normalize((0 until z.size).map(k => exp(z(k) - z.max)).toArray)
+  }
+
+  /** Implement predict method for our model using
+    * the prediction rule given in tutorial.
+    */
+  def predict(doc : String) : PredictedResult = {
+    val x: Array[Double] = getScores(doc)
+    val y: (Double, Double) = (nb.labels zip x).maxBy(_._2)
+    new PredictedResult(categoryMap.getOrElse(y._1, ""), y._2)
+  }
+}
diff --git a/src/main/scala/Preparator.scala b/src/main/scala/Preparator.scala
new file mode 100644
index 0000000..c990944
--- /dev/null
+++ b/src/main/scala/Preparator.scala
@@ -0,0 +1,97 @@
+package org.template.textclassification
+
+import io.prediction.controller.PPreparator
+import io.prediction.controller.Params
+
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+import org.apache.spark.mllib.feature.{IDF, IDFModel, HashingTF}
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.rdd.RDD
+
+/** Define Preparator parameters. Recall that for our data
+  * representation we are only required to input the n-gram window
+  * components.
+  */
+case class PreparatorParams(
+  nGram: Int,
+  numFeatures: Int = 15000
+) extends Params
+
+/** define your Preparator class */
+class Preparator(pp: PreparatorParams)
+  extends PPreparator[TrainingData, PreparedData] {
+
+  def prepare(sc: SparkContext, td: TrainingData): PreparedData = {
+
+    val tfHasher = new TFHasher(pp.numFeatures, pp.nGram)
+
+    // Convert trainingdata's observation text into TF vector
+    // and then fit a IDF model
+    val idf: IDFModel = new IDF().fit(td.data.map(e => tfHasher.hashTF(e.text)))
+
+    val tfIdfModel = new TFIDFModel(
+      hasher = tfHasher,
+      idf = idf
+    )
+
+    // Transform RDD[Observation] to RDD[(Label, text)]
+    val doc: RDD[(Double, String)] = td.data.map (obs => (obs.label, obs.text))
+
+    // transform RDD[(Label, text)] to RDD[LabeledPoint]
+    val transformedData: RDD[(LabeledPoint)] = tfIdfModel.transform(doc)
+
+    // Finally extract category map, associating label to category.
+    val categoryMap = td.data.map(obs => (obs.label, obs.category)).collectAsMap.toMap
+
+    new PreparedData(
+      tfIdf = tfIdfModel,
+      transformedData = transformedData,
+      categoryMap = categoryMap
+    )
+  }
+
+}
+
+class TFHasher(
+  val numFeatures: Int,
+  val nGram: Int
+) extends Serializable {
+
+  private val hasher = new HashingTF(numFeatures = numFeatures)
+
+  /** Hashing function: Text -> term frequency vector. */
+  def hashTF(text: String): Vector = {
+    val newList : Array[String] = text.split(" ")
+    .sliding(nGram)
+    .map(_.mkString)
+    .toArray
+
+    hasher.transform(newList)
+  }
+}
+
+class TFIDFModel(
+  val hasher: TFHasher,
+  val idf: IDFModel
+) extends Serializable {
+  
+  /** trasform text to tf-idf vector. */
+  def transform(text: String): Vector = {
+    // Map(n-gram -> document tf)
+    idf.transform(hasher.hashTF(text))
+  }
+
+  /** transform RDD of (label, text) to RDD of LabeledPoint */
+  def transform(doc: RDD[(Double, String)]): RDD[LabeledPoint] = {
+    doc.map{ case (label, text) => LabeledPoint(label, transform(text)) }
+  }
+}
+
+class PreparedData(
+  val tfIdf: TFIDFModel,
+  val transformedData: RDD[LabeledPoint],
+  val categoryMap: Map[Double, String]
+) extends Serializable
diff --git a/src/main/scala/Serving.scala b/src/main/scala/Serving.scala
new file mode 100644
index 0000000..7dd2573
--- /dev/null
+++ b/src/main/scala/Serving.scala
@@ -0,0 +1,13 @@
+package org.template.textclassification
+
+import io.prediction.controller.LServing
+
+class Serving
+  extends LServing[Query, PredictedResult] {
+
+  override
+  def serve(query: Query,
+      predictedResults: Seq[PredictedResult]): PredictedResult = {
+    predictedResults.maxBy(e => e.confidence)
+  }
+}
diff --git a/src/main/scala/org/template/textclassification/BIDMachLRAlgorithm.scala b/src/main/scala/org/template/textclassification/BIDMachLRAlgorithm.scala
deleted file mode 100644
index 4dea636..0000000
--- a/src/main/scala/org/template/textclassification/BIDMachLRAlgorithm.scala
+++ /dev/null
@@ -1,178 +0,0 @@
-package org.template.textclassification
-
-import java.io.{InputStreamReader, BufferedReader, ByteArrayInputStream, Serializable}
-
-import BIDMat.{CMat,CSMat,DMat,Dict,FMat,FND,GMat,GDMat,GIMat,GLMat,GSMat,GSDMat,HMat,IDict,Image,IMat,LMat,Mat,SMat,SBMat,SDMat}
-import BIDMat.MatFunctions._
-import BIDMat.SciFunctions._
-import BIDMat.Solvers._
-import BIDMat.Plotting._
-import BIDMach.Learner
-import BIDMach.models.{FM,GLM,KMeans,KMeansw,LDA,LDAgibbs,Model,NMF,SFA,RandomForest}
-import BIDMach.networks.{DNN}
-import BIDMach.datasources.{DataSource,MatDS,FilesDS,SFilesDS}
-import BIDMach.mixins.{CosineSim,Perplexity,Top,L1Regularizer,L2Regularizer}
-import BIDMach.updaters.{ADAGrad,Batch,BatchNorm,IncMult,IncNorm,Telescoping}
-import BIDMach.causal.{IPTW}
-
-import io.prediction.controller.{P2LAlgorithm, Params}
-import org.apache.spark.SparkContext
-import org.apache.spark.ml.classification.LogisticRegression
-import org.apache.spark.mllib.linalg.{DenseVector, SparseVector}
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.DataFrame
-
-case class BIDMachLRAlgorithmParams (
-                               regParam  : Double
-                               ) extends Params
-
-
-class BIDMachLRAlgorithm(
-                           val sap: BIDMachLRAlgorithmParams
-                           ) extends P2LAlgorithm[PreparedData, NativeLRModel, Query, PredictedResult] {
-  // Train your model.
-  def train(sc: SparkContext, pd: PreparedData): NativeLRModel = {
-    new BIDMachLRModel(sc, pd, sap.regParam)
-  }
-
-  // Prediction method for trained model.
-  def predict(model: NativeLRModel, query: Query): PredictedResult = {
-    model.predict(query.text)
-  }
-
-}
-
-  class BIDMachLRModel (
-                  sc : SparkContext,
-                  pd : PreparedData,
-                  regParam : Double
-                  ) extends Serializable with NativeLRModel {
-
-    private val labels: Seq[Double] = pd.categoryMap.keys.toSeq
-
-    val data = prepareDataFrame(sc, pd, labels)
-
-    private val lrModels = fitLRModels
-
-    def fitLRModels:Seq[(Double, LREstimate)] = {
-
-      Mat.checkMKL
-      Mat.checkCUDA
-      if (Mat.hasCUDA > 0) GPUmem
-
-      // 3. Create a logistic regression model for each class.
-      val lrModels: Seq[(Double, LREstimate)] = labels.map(
-        label => {
-          val lab = label.toInt.toString
-
-          val (categories, features) = getFMatsFromData(lab, data)
-
-          val mm: Learner = trainGLM(features, FMat(categories))
-
-          test(categories, features, mm)
-          val modelmat = FMat(mm.modelmat)
-          val weightSize = size(modelmat)._2 -1
-
-          val weights = modelmat(1,0 to weightSize)
-
-          val weightArray = (for(i <- 0 to weightSize -1) yield weights(0,i).toDouble).toArray
-
-          // Return (label, feature coefficients, and intercept term.
-          (label, LREstimate(weightArray, weights(0,weightSize)))
-        }
-      )
-      lrModels
-    }
-
-    def predict(text : String): PredictedResult = {
-      predict(text, pd, lrModels)
-    }
-
-    def trainGLM(traindata:SMat, traincats: FMat): Learner = {
-      //min(traindata, 1, traindata) // the first "traindata" argument is the input, the other is output
-
-      val (mm, mopts) = GLM.learner(traindata, traincats, GLM.logistic)
-      mopts.what
-
-      mopts.lrate = 0.1
-      mopts.reg1weight = regParam
-      mopts.batchSize = 1000
-      mopts.npasses = 250
-      mopts.autoReset = false
-      mopts.addConstFeat = true
-      mm.train
-      mm
-    }
-
-    def getFMatsFromData(lab: String, data:DataFrame): (FMat, SMat) = {
-      val features = data.select(lab, "features")
-
-      val sparseVectorsWithRowIndices = (for (r <- features) yield (r.getAs[SparseVector](1), r.getAs[Double](0))).zipWithIndex 
-
-      val triples = for {
-        ((vector, innerLabel), rowIndex) <- sparseVectorsWithRowIndices
-        (index, value) <- vector.indices zip vector.values
-      }  yield ((rowIndex.toInt,index,value), innerLabel)
-
-      val catTriples = for {
-        ((vector, innerLabel), rowIndex) <- sparseVectorsWithRowIndices
-      } yield (rowIndex.toInt,innerLabel.toInt,1.0)
-
-      val cats = catTriples
-      val feats = triples.map(x => x._1)
-
-      val numRows = cats.count().toInt
-
-      val catsMat = loadFMatTxt(cats,numRows)
-
-      val featsMat = loadFMatTxt(feats,numRows)
-
-      println(featsMat)
-
-      (full(catsMat), featsMat)
-    }
-
-    //See https://github.com/BIDData/BIDMat/blob/master/src/main/scala/BIDMat/HMat.scala , method loadDMatTxt
-    def loadFMatTxt(cats:RDD[(Int,Int,Double)], nrows: Int):SMat = {
-
-      val rows = cats.map(x=> x._1).collect()
-      val cols = cats.map(x=> x._2).collect()
-      val vals = cats.map(x=> x._3).collect()
-
-
-      println("LOADING")
-
-      sparse(icol(cols.toList),icol(rows.toList),col(vals.toList))
-    }
-
-    def test(categories: DMat, features: SMat, mm: Learner): Unit = {
-      val testdata = features
-      val testcats = categories
-
-      //min(testdata, 1, testdata)
-
-      val predcats = zeros(testcats.nrows, testcats.ncols)
-
-
-
-      val (nn, nopts) = GLM.predictor(mm.model, testdata, predcats)
-
-
-
-      nopts.addConstFeat = true
-      nn.predict
-
-
-      computeAccuracy(FMat(testcats), predcats)
-    }
-
-    def computeAccuracy(testcats: FMat, predcats: FMat): Unit = {
-      //println(testcats)
-      //println(predcats)
-
-      val lacc = (predcats ∙→ testcats + (1 - predcats) ∙→ (1 - testcats)) / predcats.ncols
-      lacc.t
-      println(mean(lacc))
-    }
-
-}
diff --git a/src/main/scala/org/template/textclassification/Engine.scala b/src/main/scala/org/template/textclassification/Engine.scala
deleted file mode 100644
index 5002928..0000000
--- a/src/main/scala/org/template/textclassification/Engine.scala
+++ /dev/null
@@ -1,51 +0,0 @@
-package org.template.textclassification
-
-import io.prediction.controller._
-
-
-
-// 1. Define Query class which serves as a wrapper for
-// new text data.
-class Query(
-  val text: String
-) extends Serializable
-
-
-
-// 2. Define PredictedResult class which serves as a
-// wrapper for a predicted class label and the associated
-// prediction confidence.
-case class PredictedResult (
-  val category: String,
-  val confidence: Double
-) extends Serializable
-
-
-
-
-
-// 3. Define ActualResult class which serves as a wrapper
-// for an observation's true class label.
-class ActualResult(
-  val category: String
-) extends Serializable
-
-
-
-// 4. Initialize the engine.
-object TextClassificationEngine extends EngineFactory {
-  override
-  def apply() = {
-    new Engine(
-      classOf[DataSource],
-      classOf[Preparator],
-      Map(
-        "VWlogisticSGD" -> classOf[VowpalLogisticRegressionWithSGDAlgorithm],
-        "nb" -> classOf[NBAlgorithm],
-        "lr" -> classOf[LRAlgorithm],
-        "bid-lr" -> classOf[BIDMachLRAlgorithm]
-      ), classOf[Serving]
-    )
-  }
-}
-
diff --git a/src/main/scala/org/template/textclassification/LRAlgorithm.scala b/src/main/scala/org/template/textclassification/LRAlgorithm.scala
deleted file mode 100644
index f8cae74..0000000
--- a/src/main/scala/org/template/textclassification/LRAlgorithm.scala
+++ /dev/null
@@ -1,89 +0,0 @@
-package org.template.textclassification
-
-import java.io._
-
-import BIDMat.{DMat, Mat}
-import io.prediction.controller.Params
-import io.prediction.controller.P2LAlgorithm
-import io.prediction.workflow.FakeRun
-import org.apache.spark.SparkContext
-import org.apache.spark.ml.classification.LogisticRegression
-import org.apache.spark.mllib.linalg.SparseVector
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.DataFrame
-import org.apache.spark.sql.functions
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.sql.UserDefinedFunction
-import com.github.fommil.netlib.F2jBLAS
-import org.template.textclassification.NativeLRModel
-
-
-import scala.math._
-
-
-case class LRAlgorithmParams (
-  regParam  : Double
-) extends Params
-
-
-class LRAlgorithm(
-  val sap: LRAlgorithmParams
-) extends P2LAlgorithm[PreparedData, LRModel, Query, PredictedResult] {
-
-  // Train your model.
-  def train(sc: SparkContext, pd: PreparedData): LRModel = {
-    new LRModel(sc, pd, sap.regParam)
-  }
-
-  // Prediction method for trained model.
-  def predict(model: LRModel, query: Query): PredictedResult = {
-    model.predict(query.text)
-  }
-}
-
-class LRModel (
-  sc : SparkContext,
-  pd : PreparedData,
-  regParam : Double
-) extends Serializable with NativeLRModel {
-  private val labels: Seq[Double] = pd.categoryMap.keys.toSeq
-
-  val data = prepareDataFrame(sc, pd, labels)
-
-  private val lrModels = fitLRModels
-
-  def fitLRModels:Seq[(Double, LREstimate)] = {
-    val lr = new LogisticRegression()
-      .setMaxIter(10)
-      .setThreshold(0.5)
-      .setRegParam(regParam)
-
-    // 3. Create a logistic regression model for each class.
-    val lrModels: Seq[(Double, LREstimate)] = labels.map(
-      label => {
-        val lab = label.toInt.toString
-
-        //val (categories, features) = getDMatsFromData(lab)
-
-
-        val fit = lr.setLabelCol(lab).fit(
-          data.select(lab, "features")
-        )
-
-
-        // Return (label, feature coefficients, and intercept term.
-        (label, LREstimate(fit.weights.toArray, fit.intercept))
-
-      }
-    )
-    lrModels
-  }
-
-  def predict(text : String): PredictedResult = {
-    predict(text, pd, lrModels)
-  }
-
-
-}
-
-
diff --git a/src/main/scala/org/template/textclassification/NBAlgorithm.scala b/src/main/scala/org/template/textclassification/NBAlgorithm.scala
deleted file mode 100644
index a89c013..0000000
--- a/src/main/scala/org/template/textclassification/NBAlgorithm.scala
+++ /dev/null
@@ -1,94 +0,0 @@
-package org.template.textclassification
-
-import io.prediction.controller.P2LAlgorithm
-import io.prediction.controller.Params
-import org.apache.spark.SparkContext
-import org.apache.spark.mllib.classification.NaiveBayes
-import org.apache.spark.mllib.classification.NaiveBayesModel
-import org.apache.spark.mllib.linalg.Vector
-import com.github.fommil.netlib.F2jBLAS
-
-import scala.math._
-
-// 1. Define parameters for Supervised Learning Model. We are
-// using a Naive Bayes classifier, which gives us only one
-// hyperparameter in this stage.
-
-case class  NBAlgorithmParams(
-  lambda: Double
-) extends Params
-
-
-
-// 2. Define SupervisedAlgorithm class.
-
-class NBAlgorithm(
-  val sap: NBAlgorithmParams
-) extends P2LAlgorithm[PreparedData, NBModel, Query, PredictedResult] {
-
-  // Train your model.
-  def train(sc: SparkContext, pd: PreparedData): NBModel = {
-    new NBModel(pd, sap.lambda)
-  }
-
-  // Prediction method for trained model.
-  def predict(model: NBModel, query: Query): PredictedResult = {
-    model.predict(query.text)
-  }
-}
-
-class NBModel(
-val pd: PreparedData,
-lambda: Double
-) extends Serializable {
-
-
-
-  // 1. Fit a Naive Bayes model using the prepared data.
-
-  private val nb : NaiveBayesModel = NaiveBayes.train(
-    pd.transformedData.map(x=>x.point), lambda)
-
-
-
-  // 2. Set up linear algebra framework.
-
-  private def innerProduct (x : Array[Double], y : Array[Double]) : Double = {
-    x.zip(y).map(e => e._1 * e._2).sum
-  }
-
-  val normalize = (u: Array[Double]) => {
-    val uSum = u.sum
-
-    u.map(e => e / uSum)
-  }
-
-
-
-  private val scoreArray = nb.pi.zip(nb.theta)
-
-  // 3. Given a document string, return a vector of corresponding
-  // class membership probabilities.
-
-  private def getScores(doc: String): Array[Double] = {
-    // Helper function used to normalize probability scores.
-    // Returns an object of type Array[Double]
-
-    // Vectorize query,
-    val x: Vector = pd.transform(doc).vector
-
-    val z = scoreArray
-      .map(e => innerProduct(e._2, x.toArray) + e._1)
-
-    normalize((0 until z.size).map(k => exp(z(k) - z.max)).toArray)
-  }
-
-  // 4. Implement predict method for our model using
-  // the prediction rule given in tutorial.
-
-  def predict(doc : String) : PredictedResult = {
-    val x: Array[Double] = getScores(doc)
-    val y: (Double, Double) = (nb.labels zip x).maxBy(_._2)
-    new PredictedResult(pd.categoryMap.getOrElse(y._1, ""), y._2)
-  }
-}
\ No newline at end of file
diff --git a/src/main/scala/org/template/textclassification/NativeLRModel.scala b/src/main/scala/org/template/textclassification/NativeLRModel.scala
deleted file mode 100644
index feb2e53..0000000
--- a/src/main/scala/org/template/textclassification/NativeLRModel.scala
+++ /dev/null
@@ -1,69 +0,0 @@
-package org.template.textclassification
-
-import java.io.Serializable
-
-import org.apache.spark.SparkContext
-import org.apache.spark.sql.{functions, UserDefinedFunction, SQLContext, DataFrame}
-
-import scala.math._
-
-/**
- * Created by burtn on 15/07/15.
- */
-trait NativeLRModel {
-  case class LREstimate (
-                          coefficients : Array[Double],
-                          intercept : Double
-                          ) extends Serializable
-
-
-  def fitLRModels:Seq[(Double, LREstimate)]
-
-  def predict(text: String) : PredictedResult
-
-  def prepareDataFrame(sc : SparkContext, pd : PreparedData, labels: Seq[Double]): DataFrame = {
-    // 1. Import SQLContext for creating DataFrame.
-    val sql: SQLContext = new SQLContext(sc)
-    import sql.implicits._
-
-    // 2. Initialize logistic regression model with regularization parameter.
-
-    labels.foldLeft(pd.transformedData.map(x => x.point).toDF)(//transform to Spark DataFrame
-
-      // Add the different binary columns for each label.
-      (data: DataFrame, label: Double) => {
-        // function: multiclass labels --> binary labels
-        val f: UserDefinedFunction = functions.udf((e: Double) => if (e == label) 1.0 else 0.0)
-
-        data.withColumn(label.toInt.toString, f(data("label")))
-      }
-    )
-  }
-
-  // 4. Enable vector inner product for prediction.
-
-  private def innerProduct (x : Array[Double], y : Array[Double]) : Double = {
-    x.zip(y).map(e => e._1 * e._2).sum
-  }
-
-  // 5. Define prediction rule.
-  def predict(text : String,  pd : PreparedData,lrModels:Seq[(Double, LREstimate)]): PredictedResult = {
-    val x : Array[Double] = pd.transform(text).vector.toArray
-
-    // Logistic Regression binary formula for positive probability.
-    // According to MLLib documentation, class labeled 0 is used as pivot.
-    // Thus, we are using:
-    // log(p1/p0) = log(p1/(1 - p1)) = b0 + xTb =: z
-    // p1 = exp(z) * (1 - p1)
-    // p1 * (1 + exp(z)) = exp(z)
-    // p1 = exp(z)/(1 + exp(z))
-    val pred = lrModels.map(
-      e => {
-        val z = exp(innerProduct(e._2.coefficients, x) + e._2.intercept)
-        (e._1, z / (1 + z))
-      }
-    ).maxBy(_._2)
-
-    PredictedResult(pd.categoryMap(pred._1), pred._2)
-  }
-}
\ No newline at end of file
diff --git a/src/main/scala/org/template/textclassification/Preparator.scala b/src/main/scala/org/template/textclassification/Preparator.scala
deleted file mode 100644
index d55fd64..0000000
--- a/src/main/scala/org/template/textclassification/Preparator.scala
+++ /dev/null
@@ -1,198 +0,0 @@
-package org.template.textclassification
-
-
-import io.prediction.controller.PPreparator
-import io.prediction.controller.Params
-import org.apache.spark.SparkContext
-import org.apache.spark.mllib.feature.{IDF, IDFModel, HashingTF}
-import org.apache.spark.mllib.linalg._
-import org.apache.spark.mllib.linalg.distributed._
-import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.Row
-
-import scala.collection.Map
-import scala.collection.immutable.HashMap
-import scala.collection.JavaConversions._
-import scala.math._
-
-
-// 1. Initialize Preparator parameters. Recall that for our data
-// representation we are only required to input the n-gram window
-// components.
-
-case class PreparatorParams(
-  nGram: Int,
-  numFeatures: Int = 5000,
-  SPPMI: Boolean
-) extends Params
-
-case class VectorAndTextExample(
-                        vector: SparseVector,
-                        text : String
-                        ) extends Serializable
-
-case class LabeledPointAndTextExample(
-                                 point: LabeledPoint,
-                                 text : String
-                                 ) extends Serializable
-
-
-// 2. Initialize your Preparator class.
-
-class Preparator(pp: PreparatorParams) extends PPreparator[TrainingData, PreparedData] {
-
-  // Prepare your training data.
-  def prepare(sc : SparkContext, td: TrainingData): PreparedData = {
-    new PreparedData(td, pp.nGram, pp.numFeatures, pp.SPPMI, sc)
-  }
-}
-
-//------PreparedData------------------------
-
-class PreparedData(
-  val td: TrainingData,
-  val nGram: Int,
-  val numFeatures: Int,
-  val SPPMI: Boolean,
-  @transient val sc: SparkContext
-) extends Serializable {
-
-  // 1. Hashing function: Text -> term frequency vector.
-
-  private val hasher = new HashingTF(numFeatures = numFeatures)
-
-
-  def transform(text: String): VectorAndTextExample ={
-    return if(SPPMI) transformSPPMI(text) else transformTFIDF(text)
-  }
-
-  val idf : IDFModel = new IDF().fit(td.data.map(e => hashTF(e.text)))
-
-
-  //3. Document Transformer: text => tf-idf vector.
-
-  private def transformTFIDF(text : String): VectorAndTextExample = {
-    // Map(n-gram -> document tf)
-    val result = VectorAndTextExample(idf.transform(hashTF(text)).toSparse, text)
-    //println(result)
-    result
-  }
-
-  val ppmiMap = generateSPPMIMatrix(td,sc).collectAsMap()
-  println(ppmiMap.head._2.size)
-  println(ppmiMap.head)
-
-
-  private def hashTF(text: String): Vector = {
-    val newList: Array[String] = text.split(" ")
-      .sliding(nGram)
-      .map(_.mkString)
-      .toArray
-
-    hasher.transform(newList)
-  }
-
-  private def transformSPPMI(text : String): VectorAndTextExample = {
-    // Map(n-gram -> document tf)
-
-    val result = VectorAndTextExample(ppmiMap(text), text)
-    //println(result)
-    result
-  }
-
-
-  private def calculateSPPMI(localMat: Matrix, N: Long, k: Int): IndexedSeq[MatrixEntry] = {
-    //println(localMat)
-    val pmiMatrixEntries = for (i <- 0 until localMat.numCols; j <- 0 until localMat.numRows)
-      yield {
-        new MatrixEntry(j, i, math.max(0, math.log(localMat(j, i) * N / (localMat(i, i) * localMat(j, j))) / math.log(2.0) - math.log(k) / math.log(2.0)))
-      }
-    return pmiMatrixEntries
-  }
-
-  private def generateSPPMIMatrix(trainData: TrainingData, sc:SparkContext) : RDD[(String,SparseVector)] = {
-    val (hashedFeats: RDD[Vector], mat: IndexedRowMatrix, cooccurrences: Matrix) = computeCooccurrences(trainData)
-
-    val k = 10
-    val pmiEntries = calculateSPPMI(cooccurrences , mat.numRows, k)
-    val pmiMat: CoordinateMatrix = new CoordinateMatrix(sc.parallelize(pmiEntries))
-    val indexedPMIMat = pmiMat.toIndexedRowMatrix()
-
-    //val principalComponents = indexedPMIMat.toRowMatrix().computePrincipalComponents(500)
-    //val pcPMImat = indexedPMIMat.multiply(principalComponents)
-
-    println(trainData.data.count())
-    println(indexedPMIMat.numCols())
-//    println(pcPMImat.numCols())
-
-    val pmiMatRows = indexedPMIMat.rows.map(e=> e.index -> e.vector).collectAsMap()
-
-    return generateTextToSPPMIVectorMap(trainData, hashedFeats, pmiMatRows)
-  }
-  private def generateTextToSPPMIVectorMap(trainData: TrainingData, hashedFeats: RDD[Vector], pmiMatRows: Map[Long, Vector]): RDD[(String, SparseVector)] = {
-    //TODO: take into account feature counts, currently it's on/off
-    //also not use var
-    val composedWordVectors = for (v <- hashedFeats)
-      yield {
-        var ar = Array.fill[Double](pmiMatRows.head._2.size)(0)
-        for (i <- 0 until v.size; if v(i) > 0) {
-          //Additive
-          //ar = (ar,pmiMatRows(i).toArray).zipped.map(_ + _)
-
-          //Appending
-          ar = ar ++ pmiMatRows(i).toArray
-        }
-
-        Vectors.dense(ar.map(x => x)).toSparse
-      }
-
-    val textToSPPMIVectorMap = (trainData.data.map(x => x.text) zip composedWordVectors)
-    textToSPPMIVectorMap
-  }
-
-  private def computeCooccurrences(trainData: TrainingData): (RDD[Vector], IndexedRowMatrix, Matrix) = {
-    val hashedFeats = trainData.data.map(e => hashTF(e.text))
-
-    val rows = hashedFeats.map( x => 
-      x.toArray.map( value => if (value > 0) 1.0 else 0.0)).map( y => Vectors.dense(y).toSparse)
-
-    val indexedRows = rows.zipWithIndex.map(x => new IndexedRow(x._2, x._1))
-
-    val mat = new IndexedRowMatrix(indexedRows)
-
-
-    //println(mat.toBlockMatrix().toLocalMatrix())
-
-    //println(blockMat.numCols())
-    //println(blockMat.numRows())
-
-    val cooccurrences = mat.computeGramianMatrix()
-    //Alternatively:
-    //val cooccurrences = blockMat.transpose.multiply(blockMat)
-    (hashedFeats, mat, cooccurrences)
-  }
-
-
-
-
-
-
-
-
-  // 4. Data Transformer: RDD[documents] => RDD[LabeledPoints]
-
-  val transformedData: RDD[LabeledPointAndTextExample] = {
-    td.data.map(e =>  LabeledPointAndTextExample(LabeledPoint(e.label, transform(e.text).vector), e.text))
-  }
-
-
-  // 5. Finally extract category map, associating label to category.
-  val categoryMap = td.data.map(e => (e.label, e.category)).collectAsMap
-
-
-}
-
-
-
-
diff --git a/src/main/scala/org/template/textclassification/Serving.scala b/src/main/scala/org/template/textclassification/Serving.scala
deleted file mode 100644
index b41389e..0000000
--- a/src/main/scala/org/template/textclassification/Serving.scala
+++ /dev/null
@@ -1,13 +0,0 @@
-package org.template.textclassification
-
-import io.prediction.controller.LServing
-
-// 1. Define serving component.
-class Serving extends LServing[Query, PredictedResult] {
-
-  override
-  def serve(query: Query, predictedResults: Seq[PredictedResult]):
-  PredictedResult = predictedResults.maxBy(e => e.confidence)
-}
-
-
diff --git a/src/main/scala/org/template/textclassification/VowpalLogisticRegressionWithSGDAlgorithm.scala b/src/main/scala/org/template/textclassification/VowpalLogisticRegressionWithSGDAlgorithm.scala
deleted file mode 100644
index c40b187..0000000
--- a/src/main/scala/org/template/textclassification/VowpalLogisticRegressionWithSGDAlgorithm.scala
+++ /dev/null
@@ -1,91 +0,0 @@
-package org.template.textclassification
-
-import io.prediction.controller.P2LAlgorithm
-import io.prediction.controller.Params
-
-import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.SparkContext
-import org.apache.spark.mllib.linalg.Vector
-import grizzled.slf4j.Logger
-
-import java.nio.file.{Files, Paths}
-
-import vw.VW
-
-case class AlgorithmParams(
-  maxIter: Int,
-  regParam: Double,
-  stepSize: Double,
-  bitPrecision: Int,
-  modelName: String,
-  namespace: String,
-  ngram: Int
-) extends Params
-
-// extends P2LAlgorithm because VW doesn't contain RDD.
-class VowpalLogisticRegressionWithSGDAlgorithm(val ap: AlgorithmParams)
-  extends P2LAlgorithm[PreparedData, Array[Byte], Query, PredictedResult] {
-
-  @transient lazy val logger = Logger[this.type]
-
-  def train(sc: SparkContext, data: PreparedData): Array[Byte] = {
-   
-    require(!data.td.data.take(1).isEmpty,
-      s"RDD[labeldPoints] in PreparedData cannot be empty." +
-      " Please check if DataSource generates TrainingData" +
-      " and Preprator generates PreparedData correctly.")
-  
-    val reg = "--l2 " + ap.regParam
-    //val iters = "-c -k --passes " + ap.maxIter
-    val lrate = "-l " + ap.stepSize
-    val ngram = "--ngram " + ap.ngram 
-  
-    val vw = new VW("--loss_function logistic --invert_hash readable.model -b " + ap.bitPrecision + " " + "-f " + ap.modelName + " " + reg + " " + lrate + " " + ngram)
-    
-    val inputs = for (point <- data.transformedData.collect) yield (if (point.point.label.toDouble == 0.0) "-1.0" else "1.0") + " |" + ap.namespace + " " + rawTextToVWFormattedString(point.text) + " "  + vectorToVWFormattedString(point.point.features)
-
-    //val inputs = for (point <- data.transformedData) yield (if (point.label.toDouble == 0.0) "-1.0" else "1.0") + " |" + ap.namespace + " "  + rawTextToVWFormattedString(point.)
-
-     //Regressing    
-    //val inputs = for (point <- data.td.data) yield point.category.toDouble.toString + " |" + ap.namespace + " "  + rawTextToVWFormattedString(point.text)
-
-
-    //for (item <- inputsCollected) logger.info(item)
-
-    val results = for (item <- inputs) yield vw.learn(item)
-
-    val matchOnTrainSet = for (item <- inputs) yield  item.startsWith(if(vw.predict(item).toDouble  > 0.5) "1" else "-1")
-
-
-    val acc = (for (x <- matchOnTrainSet) yield if(x) 1 else 0).sum.toDouble / matchOnTrainSet.size
-    println("Accuracy on Training set: " + acc)
-
-    vw.close()
-     
-    Files.readAllBytes(Paths.get(ap.modelName))
-  }
-
-  def predict(byteArray: Array[Byte], query: Query): PredictedResult = {
-    Files.write(Paths.get(ap.modelName), byteArray)
-
-    val vw = new VW("--link logistic -i " + ap.modelName)
-    val pred = vw.predict("|" + ap.namespace + " " + rawTextToVWFormattedString(query.text)).toDouble 
-    vw.close()
-
-    val category = (if(pred > 0.5) 1 else 0).toString
-    val prob = (if(pred > 0.5) pred else 1.0 - pred)
-    val result = new PredictedResult(category, prob)
-   
-    result
-  }
-
-  def rawTextToVWFormattedString(str: String) : String = {
-     //VW input cannot contain these characters 
-     str.replaceAll("[|:]", " ")
-  }
-
-  def vectorToVWFormattedString(vec: Vector): String = {
-     vec.toArray.zipWithIndex.map{ case (dbl, int) => s"$int:$dbl"} mkString " "
-  }
-
-}
diff --git a/train.sh b/train.sh
deleted file mode 100755
index cc47b8b..0000000
--- a/train.sh
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/bin/bash
-# export JAVA_HOME="" # Set here if not set in environment
-# export CUDA_PATH="" # Set here if not set in environment
-JCUDA_VERSION="0.7.0a" # Fix if needed
-MEMSIZE="-Xmx14G"
-export JAVA_OPTS="${MEMSIZE} -Xms128M -Dfile.encoding=UTF-8" # Set as much memory as possible
-BIDMACH_ROOT="${BASH_SOURCE[0]}"
-if [ ! `uname` = "Darwin" ]; then
-  BIDMACH_ROOT=`readlink -f "${BIDMACH_ROOT}"`
-else 
-  while [ -L "${BIDMACH_ROOT}" ]; do
-    BIDMACH_ROOT=`readlink "${BIDMACH_ROOT}"`
-  done
-fi
-BIDMACH_ROOT=`dirname "$BIDMACH_ROOT"`
-pushd "${BIDMACH_ROOT}"  > /dev/null
-BIDMACH_ROOT=`pwd`
-BIDMACH_ROOT="$( echo ${BIDMACH_ROOT} | sed s+/cygdrive/c+c:+ )" 
-JCUDA_LIBDIR="${BIDMACH_ROOT}/lib"
-LIBDIR="${BIDMACH_ROOT}/lib"
-if [ -e java_native_path.txt ]; then
-  JAVA_NATIVE=`cat java_native_path.txt`
-else 
-  JAVA_NATIVE=`java getnativepath`
-  echo ${JAVA_NATIVE} > java_native_path.txt
-fi
-if [ `uname` = "Darwin" ]; then
-    export DYLD_LIBRARY_PATH="${LIBDIR}:/usr/local/cuda/lib:${DYLD_LIBRARY_PATH}"
-else
-    export LD_LIBRARY_PATH="${LIBDIR}:${LIBDIR}/cuda:/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" 
-fi
-export JAVA_NATIVE=${JAVA_NATIVE}:${LD_LIBRARY_PATH}:${DYLD_LIBRARY_PATH}:
-popd > /dev/null
-if [ "$OS" = "Windows_NT" ]; then
-    if [ ! "${JAVA_HOME}" = "" ]; then
-        JAVA_HOME=`${BIDMACH_ROOT}/shortpath.bat "${JAVA_HOME}"`
-	    export JAVA_HOME=`echo ${JAVA_HOME} | sed 's_\\\\_/_g'`/bin
-    fi
-fi
-
-BIDMACH_LIBS="${LIBDIR}/BIDMat.jar;${LIBDIR}/ptplot.jar;${LIBDIR}/ptplotapplication.jar;${LIBDIR}/jhdf5.jar;${LIBDIR}/commons-math3-3.2.jar;${LIBDIR}/lz4-1.3.jar"
-
-JCUDA_LIBS="${JCUDA_LIBDIR}/jcuda-${JCUDA_VERSION}.jar;${JCUDA_LIBDIR}/jcublas-${JCUDA_VERSION}.jar;${JCUDA_LIBDIR}/jcufft-${JCUDA_VERSION}.jar;${JCUDA_LIBDIR}/jcurand-${JCUDA_VERSION}.jar;${JCUDA_LIBDIR}/jcusparse-${JCUDA_VERSION}.jar"
-
-ALL_LIBS=";${LIBDIR}/IScala.jar;${BIDMACH_ROOT}/BIDMach.jar;${BIDMACH_LIBS};${JCUDA_LIBS};${JAVA_HOME}/lib/tools.jar"
-
-if [ "$OS" = "Windows_NT" ]; then
-    if [ ! "${CUDA_PATH}" = "" ]; then
-	    NEWPATH=`${BIDMACH_ROOT}/shortpath.bat "${CUDA_PATH}"`
-	    NEWPATH=`echo ${NEWPATH} | sed 's_\\\\_/_g'`/bin
-    fi
-    DJAVA_NATIVE="-Djava.library.path=${LIBDIR};${NEWPATH}"
-else
-    ALL_LIBS=`echo "${ALL_LIBS}" | sed 's/;/:/g'`
-    DJAVA_NATIVE="-Djava.library.path=${JAVA_NATIVE}"
-fi
-if [ ! `uname` = "Darwin" ]; then
-    export JAVA_OPTS="${DJAVA_NATIVE} ${JAVA_OPTS}"
-fi
-
-pio train -- --driver-memory 16g --executor-memory 8g --conf spark.driver.maxResultSize=3g --conf spark.akka.frameSize=2047