[RNG-129:130] Improve performance of UnitSphereSampler

RNG-129:

- Add a benchmark for specialisations for low dimensions.
- Implement specialistions for 1D, 2D and 3D. - Use a delegate in a
publicly constructed UnitSphereSampler.

RNG-130:

- Fix 1 dimension sampling to only return vectors containing 1 or -1.
diff --git a/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/sampling/UnitSphereSamplerBenchmark.java b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/sampling/UnitSphereSamplerBenchmark.java
new file mode 100644
index 0000000..7aae519
--- /dev/null
+++ b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/sampling/UnitSphereSamplerBenchmark.java
@@ -0,0 +1,446 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.rng.examples.jmh.sampling;
+
+import org.apache.commons.rng.UniformRandomProvider;
+import org.apache.commons.rng.sampling.distribution.NormalizedGaussianSampler;
+import org.apache.commons.rng.sampling.distribution.ZigguratNormalizedGaussianSampler;
+import org.apache.commons.rng.simple.RandomSource;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Executes benchmark to compare the speed of generating samples on the surface of an
+ * N-dimension unit sphere.
+ */
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
+@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
+@State(Scope.Benchmark)
+@Fork(value = 1, jvmArgs = { "-server", "-Xms128M", "-Xmx128M" })
+public class UnitSphereSamplerBenchmark {
+    /** Name for the baseline method. */
+    private static final String BASELINE = "Baseline";
+    /** Name for the non-array based method. */
+    private static final String NON_ARRAY = "NonArray";
+    /** Name for the array based method. */
+    private static final String ARRAY = "Array";
+    /** Error message for an unknown sampler type. */
+    private static final String UNKNOWN_SAMPLER = "Unknown sampler type: ";
+
+    /**
+     * The sampler.
+     */
+    private interface Sampler {
+        /**
+         * Gets the next sample.
+         *
+         * @return the sample
+         */
+        double[] sample();
+    }
+
+    /**
+     * Base class for the sampler data.
+     * Contains the source of randomness and the number of samples.
+     * The sampler should be created by a sub-class of the data.
+     */
+    @State(Scope.Benchmark)
+    public abstract static class SamplerData {
+        /** The sampler. */
+        private Sampler sampler;
+
+        /** The number of samples. */
+        @Param({"100"})
+        private int size;
+
+        /**
+         * Gets the size.
+         *
+         * @return the size
+         */
+        public int getSize() {
+            return size;
+        }
+
+        /**
+         * Gets the sampler.
+         *
+         * @return the sampler
+         */
+        public Sampler getSampler() {
+            return sampler;
+        }
+
+        /**
+         * Create the source of randomness.
+         */
+        @Setup
+        public void setup() {
+            // This could be configured using @Param
+            final UniformRandomProvider rng = RandomSource.create(RandomSource.XO_RO_SHI_RO_128_PP);
+            sampler = createSampler(rng);
+        }
+
+        /**
+         * Creates the sampler.
+         *
+         * @param rng the source of randomness
+         * @return the sampler
+         */
+        protected abstract Sampler createSampler(UniformRandomProvider rng);
+    }
+
+    /**
+     * The 1D unit line sampler.
+     */
+    @State(Scope.Benchmark)
+    public static class Sampler1D extends SamplerData {
+        /** Name for the signed double method. */
+        private static final String SIGNED_DOUBLE = "signedDouble";
+        /** Name for the boolean method. */
+        private static final String BOOLEAN = "boolean";
+
+        /** The sampler type. */
+        @Param({BASELINE, SIGNED_DOUBLE, BOOLEAN, ARRAY})
+        private String type;
+
+        /** {@inheritDoc} */
+        @Override
+        protected Sampler createSampler(final UniformRandomProvider rng) {
+            if (BASELINE.equals(type)) {
+                return new Sampler() {
+                    @Override
+                    public double[] sample() {
+                        return new double[] {1.0};
+                    }
+                };
+            } else if (SIGNED_DOUBLE.equals(type)) {
+                return new Sampler() {
+                    @Override
+                    public double[] sample() {
+                        // (1 - 0) or (1 - 2)
+                        // Use the sign bit
+                        return new double[] {1.0 - ((rng.nextInt() >>> 30) & 0x2)};
+                    }
+                };
+            } else if (BOOLEAN.equals(type)) {
+                return new Sampler() {
+                    @Override
+                    public double[] sample() {
+                        return new double[] {rng.nextBoolean() ? -1.0 : 1.0};
+                    }
+                };
+            } else if (ARRAY.equals(type)) {
+                return new ArrayBasedUnitSphereSampler(1, rng);
+            }
+            throw new IllegalStateException(UNKNOWN_SAMPLER + type);
+        }
+    }
+
+    /**
+     * The 2D unit circle sampler.
+     */
+    @State(Scope.Benchmark)
+    public static class Sampler2D extends SamplerData {
+        /** The sampler type. */
+        @Param({BASELINE, ARRAY, NON_ARRAY})
+        private String type;
+
+        /** {@inheritDoc} */
+        @Override
+        protected Sampler createSampler(final UniformRandomProvider rng) {
+            if (BASELINE.equals(type)) {
+                return new Sampler() {
+                    @Override
+                    public double[] sample() {
+                        return new double[] {1.0, 0.0};
+                    }
+                };
+            } else if (ARRAY.equals(type)) {
+                return new ArrayBasedUnitSphereSampler(2, rng);
+            } else if (NON_ARRAY.equals(type)) {
+                return new UnitSphereSampler2D(rng);
+            }
+            throw new IllegalStateException(UNKNOWN_SAMPLER + type);
+        }
+
+        /**
+         * Sample from a 2D unit sphere.
+         */
+        private static class UnitSphereSampler2D implements Sampler {
+            /** Sampler used for generating the individual components of the vectors. */
+            private final NormalizedGaussianSampler sampler;
+
+            /**
+             * @param rng the source of randomness
+             */
+            UnitSphereSampler2D(UniformRandomProvider rng) {
+                sampler = new ZigguratNormalizedGaussianSampler(rng);
+            }
+
+            @Override
+            public double[] sample() {
+                final double x = sampler.sample();
+                final double y = sampler.sample();
+                final double sum = x * x + y * y;
+
+                if (sum == 0) {
+                    // Zero-norm vector is discarded.
+                    return sample();
+                }
+
+                final double f = 1.0 / Math.sqrt(sum);
+                return new double[] {x * f, y * f};
+            }
+        }
+    }
+
+    /**
+     * The 3D unit sphere sampler.
+     */
+    @State(Scope.Benchmark)
+    public static class Sampler3D extends SamplerData {
+        /** The sampler type. */
+        @Param({BASELINE, ARRAY, NON_ARRAY})
+        private String type;
+
+        /** {@inheritDoc} */
+        @Override
+        protected Sampler createSampler(final UniformRandomProvider rng) {
+            if (BASELINE.equals(type)) {
+                return new Sampler() {
+                    @Override
+                    public double[] sample() {
+                        return new double[] {1.0, 0.0, 0.0};
+                    }
+                };
+            } else if (ARRAY.equals(type)) {
+                return new ArrayBasedUnitSphereSampler(3, rng);
+            } else if (NON_ARRAY.equals(type)) {
+                return new UnitSphereSampler3D(rng);
+            }
+            throw new IllegalStateException(UNKNOWN_SAMPLER + type);
+        }
+
+        /**
+         * Sample from a 3D unit sphere.
+         */
+        private static class UnitSphereSampler3D implements Sampler {
+            /** Sampler used for generating the individual components of the vectors. */
+            private final NormalizedGaussianSampler sampler;
+
+            /**
+             * @param rng the source of randomness
+             */
+            UnitSphereSampler3D(UniformRandomProvider rng) {
+                sampler = new ZigguratNormalizedGaussianSampler(rng);
+            }
+
+            @Override
+            public double[] sample() {
+                final double x = sampler.sample();
+                final double y = sampler.sample();
+                final double z = sampler.sample();
+                final double sum = x * x + y * y + z * z;
+
+                if (sum == 0) {
+                    // Zero-norm vector is discarded.
+                    return sample();
+                }
+
+                final double f = 1.0 / Math.sqrt(sum);
+                return new double[] {x * f, y * f, z * f};
+            }
+        }
+    }
+
+    /**
+     * The 4D unit hypersphere sampler.
+     */
+    @State(Scope.Benchmark)
+    public static class Sampler4D extends SamplerData {
+        /** The sampler type. */
+        @Param({BASELINE, ARRAY, NON_ARRAY})
+        private String type;
+
+        /** {@inheritDoc} */
+        @Override
+        protected Sampler createSampler(final UniformRandomProvider rng) {
+            if (BASELINE.equals(type)) {
+                return new Sampler() {
+                    @Override
+                    public double[] sample() {
+                        return new double[] {1.0, 0.0, 0.0, 0.0};
+                    }
+                };
+            } else if (ARRAY.equals(type)) {
+                return new ArrayBasedUnitSphereSampler(4, rng);
+            } else if (NON_ARRAY.equals(type)) {
+                return new UnitSphereSampler4D(rng);
+            }
+            throw new IllegalStateException(UNKNOWN_SAMPLER + type);
+        }
+
+        /**
+         * Sample from a 4D unit hypersphere.
+         */
+        private static class UnitSphereSampler4D implements Sampler {
+            /** Sampler used for generating the individual components of the vectors. */
+            private final NormalizedGaussianSampler sampler;
+
+            /**
+             * @param rng the source of randomness
+             */
+            UnitSphereSampler4D(UniformRandomProvider rng) {
+                sampler = new ZigguratNormalizedGaussianSampler(rng);
+            }
+
+            @Override
+            public double[] sample() {
+                final double x = sampler.sample();
+                final double y = sampler.sample();
+                final double z = sampler.sample();
+                final double a = sampler.sample();
+                final double sum = x * x + y * y + z * z + a * a;
+
+                if (sum == 0) {
+                    // Zero-norm vector is discarded.
+                    return sample();
+                }
+
+                final double f = 1.0 / Math.sqrt(sum);
+                return new double[] {x * f, y * f, z * f, a * f};
+            }
+        }
+    }
+
+    /**
+     * Sample from a unit sphere using an array based method.
+     */
+    private static class ArrayBasedUnitSphereSampler implements Sampler {
+        /** Space dimension. */
+        private final int dimension;
+        /** Sampler used for generating the individual components of the vectors. */
+        private final NormalizedGaussianSampler sampler;
+
+        /**
+         * @param dimension space dimension
+         * @param rng the source of randomness
+         */
+        ArrayBasedUnitSphereSampler(int dimension, UniformRandomProvider rng) {
+            this.dimension = dimension;
+            sampler = new ZigguratNormalizedGaussianSampler(rng);
+        }
+
+        @Override
+        public double[] sample() {
+            final double[] v = new double[dimension];
+
+            // Pick a point by choosing a standard Gaussian for each element,
+            // and then normalize to unit length.
+            double sum = 0;
+            for (int i = 0; i < dimension; i++) {
+                final double x = sampler.sample();
+                v[i] = x;
+                sum += x * x;
+            }
+
+            if (sum == 0) {
+                // Zero-norm vector is discarded.
+                return sample();
+            }
+
+            final double f = 1 / Math.sqrt(sum);
+            for (int i = 0; i < dimension; i++) {
+                v[i] *= f;
+            }
+
+            return v;
+        }
+    }
+
+    /**
+     * Run the sampler for the configured number of samples.
+     *
+     * @param bh Data sink
+     * @param data Input data.
+     */
+    private static void runSampler(Blackhole bh, SamplerData data) {
+        final Sampler sampler = data.getSampler();
+        for (int i = data.getSize() - 1; i >= 0; i--) {
+            bh.consume(sampler.sample());
+        }
+    }
+
+    /**
+     * Generation of uniform samples on a 1D unit line.
+     *
+     * @param bh Data sink
+     * @param data Input data.
+     */
+    @Benchmark
+    public void create1D(Blackhole bh, Sampler1D data) {
+        runSampler(bh, data);
+    }
+
+    /**
+     * Generation of uniform samples from a 2D unit circle.
+     *
+     * @param bh Data sink
+     * @param data Input data.
+     */
+    @Benchmark
+    public void create2D(Blackhole bh, Sampler2D data) {
+        runSampler(bh, data);
+    }
+
+    /**
+     * Generation of uniform samples from a 3D unit sphere.
+     *
+     * @param bh Data sink
+     * @param data Input data.
+     */
+    @Benchmark
+    public void create3D(Blackhole bh, Sampler3D data) {
+        runSampler(bh, data);
+    }
+
+    /**
+     * Generation of uniform samples from a 4D unit sphere.
+     *
+     * @param bh Data sink
+     * @param data Input data.
+     */
+    @Benchmark
+    public void create4D(Blackhole bh, Sampler4D data) {
+        runSampler(bh, data);
+    }
+}
diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/UnitSphereSampler.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/UnitSphereSampler.java
index 6ce8b74..eadaf9b 100644
--- a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/UnitSphereSampler.java
+++ b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/UnitSphereSampler.java
@@ -25,22 +25,195 @@
  * Generate vectors <a href="http://mathworld.wolfram.com/SpherePointPicking.html">
  * isotropically located on the surface of a sphere</a>.
  *
- * <p>Sampling uses:</p>
+ * <p>Sampling in 2 or more dimensions uses:</p>
  *
  * <ul>
  *   <li>{@link UniformRandomProvider#nextLong()}
  *   <li>{@link UniformRandomProvider#nextDouble()}
  * </ul>
  *
+ * <p>Sampling in 1D uses the sign bit from {@link UniformRandomProvider#nextInt()} to set
+ * the direction of the vector.
+ *
  * @since 1.1
  */
 public class UnitSphereSampler implements SharedStateSampler<UnitSphereSampler> {
-    /** Sampler used for generating the individual components of the vectors. */
-    private final NormalizedGaussianSampler sampler;
-    /** Space dimension. */
-    private final int dimension;
+    /** The dimension for 1D sampling. */
+    private static final int ONE_D = 1;
+    /** The dimension for 2D sampling. */
+    private static final int TWO_D = 2;
+    /** The dimension for 3D sampling. */
+    private static final int THREE_D = 3;
+    /**
+     * The mask to extract the second bit from an integer
+     * (naming starts at bit 1 for the least significant bit).
+     * The masked integer will have a value 0 or 2.
+     */
+    private static final int MASK_BIT_2 = 0x2;
+
+    /** The internal sampler optimised for the dimension. */
+    private final UnitSphereSampler delegate;
 
     /**
+     * Sample uniformly from the ends of a 1D unit line.
+     */
+    private static class UnitSphereSampler1D extends UnitSphereSampler {
+        /** The source of randomness. */
+        private final UniformRandomProvider rng;
+
+        /**
+         * @param rng Source of randomness.
+         */
+        UnitSphereSampler1D(UniformRandomProvider rng) {
+            this.rng = rng;
+        }
+
+        @Override
+        public double[] nextVector() {
+            // Either:
+            // 1 - 0 = 1
+            // 1 - 2 = -1
+            // Use the sign bit
+            return new double[] {1.0 - ((rng.nextInt() >>> 30) & MASK_BIT_2)};
+        }
+
+        @Override
+        public UnitSphereSampler withUniformRandomProvider(UniformRandomProvider rng) {
+            return new UnitSphereSampler1D(rng);
+        }
+    }
+
+    /**
+     * Sample uniformly from a 2D unit circle.
+     * This is a 2D specialisation of the UnitSphereSamplerND.
+     */
+    private static class UnitSphereSampler2D extends UnitSphereSampler {
+        /** Sampler used for generating the individual components of the vectors. */
+        private final NormalizedGaussianSampler sampler;
+
+        /**
+         * @param rng Source of randomness.
+         */
+        UnitSphereSampler2D(UniformRandomProvider rng) {
+            sampler = new ZigguratNormalizedGaussianSampler(rng);
+        }
+
+        @Override
+        public double[] nextVector() {
+            final double x = sampler.sample();
+            final double y = sampler.sample();
+            final double sum = x * x + y * y;
+
+            if (sum == 0) {
+                // Zero-norm vector is discarded.
+                return nextVector();
+            }
+
+            final double f = 1.0 / Math.sqrt(sum);
+            return new double[] {x * f, y * f};
+        }
+
+        @Override
+        public UnitSphereSampler withUniformRandomProvider(UniformRandomProvider rng) {
+            return new UnitSphereSampler2D(rng);
+        }
+    }
+
+    /**
+     * Sample uniformly from a 3D unit sphere.
+     * This is a 3D specialisation of the UnitSphereSamplerND.
+     */
+    private static class UnitSphereSampler3D extends UnitSphereSampler {
+        /** Sampler used for generating the individual components of the vectors. */
+        private final NormalizedGaussianSampler sampler;
+
+        /**
+         * @param rng Source of randomness.
+         */
+        UnitSphereSampler3D(UniformRandomProvider rng) {
+            sampler = new ZigguratNormalizedGaussianSampler(rng);
+        }
+
+        @Override
+        public double[] nextVector() {
+            final double x = sampler.sample();
+            final double y = sampler.sample();
+            final double z = sampler.sample();
+            final double sum = x * x + y * y + z * z;
+
+            if (sum == 0) {
+                // Zero-norm vector is discarded.
+                return nextVector();
+            }
+
+            final double f = 1.0 / Math.sqrt(sum);
+            return new double[] {x * f, y * f, z * f};
+        }
+
+        @Override
+        public UnitSphereSampler withUniformRandomProvider(UniformRandomProvider rng) {
+            return new UnitSphereSampler3D(rng);
+        }
+    }
+
+    /**
+     * Sample uniformly from a ND unit sphere.
+     */
+    private static class UnitSphereSamplerND extends UnitSphereSampler {
+        /** Space dimension. */
+        private final int dimension;
+        /** Sampler used for generating the individual components of the vectors. */
+        private final NormalizedGaussianSampler sampler;
+
+        /**
+         * @param dimension Space dimension.
+         * @param rng Source of randomness.
+         */
+        UnitSphereSamplerND(int dimension, UniformRandomProvider rng) {
+            this.dimension = dimension;
+            sampler = new ZigguratNormalizedGaussianSampler(rng);
+        }
+
+        @Override
+        public double[] nextVector() {
+            final double[] v = new double[dimension];
+
+            // Pick a point by choosing a standard Gaussian for each element,
+            // and then normalize to unit length.
+            double sum = 0;
+            for (int i = 0; i < dimension; i++) {
+                final double x = sampler.sample();
+                v[i] = x;
+                sum += x * x;
+            }
+
+            if (sum == 0) {
+                // Zero-norm vector is discarded.
+                // Using recursion as it is highly unlikely to generate more
+                // than a few such vectors. It also protects against infinite
+                // loop (in case a buggy generator is used), by eventually
+                // raising a "StackOverflowError".
+                return nextVector();
+            }
+
+            final double f = 1 / Math.sqrt(sum);
+            for (int i = 0; i < dimension; i++) {
+                v[i] *= f;
+            }
+
+            return v;
+        }
+
+        @Override
+        public UnitSphereSampler withUniformRandomProvider(UniformRandomProvider rng) {
+            return new UnitSphereSamplerND(dimension, rng);
+        }
+    }
+
+    /**
+     * This instance delegates sampling. Use the factory method
+     * {@link #of(int, UniformRandomProvider)} to create an optimal sampler.
+     *
      * @param dimension Space dimension.
      * @param rng Generator for the individual components of the vectors.
      * A shallow copy will be stored in this instance.
@@ -48,55 +221,22 @@
      */
     public UnitSphereSampler(int dimension,
                              UniformRandomProvider rng) {
-        if (dimension <= 0) {
-            throw new IllegalArgumentException("Dimension must be strictly positive");
-        }
-
-        this.dimension = dimension;
-        sampler = new ZigguratNormalizedGaussianSampler(rng);
+        delegate = of(dimension, rng);
     }
 
     /**
-     * @param rng Generator for the individual components of the vectors.
-     * @param source Source to copy.
+     * Private constructor used by sub-class specialisations.
+     * In future versions the public constructor should be removed and the class made abstract.
      */
-    private UnitSphereSampler(UniformRandomProvider rng,
-                              UnitSphereSampler source) {
-        // The Gaussian sampler has no shared state so create a new instance
-        sampler = new ZigguratNormalizedGaussianSampler(rng);
-        dimension = source.dimension;
+    private UnitSphereSampler() {
+        delegate = null;
     }
 
     /**
      * @return a random normalized Cartesian vector.
      */
     public double[] nextVector() {
-        final double[] v = new double[dimension];
-
-        // Pick a point by choosing a standard Gaussian for each element,
-        // and then normalize to unit length.
-        double normSq = 0;
-        for (int i = 0; i < dimension; i++) {
-            final double comp = sampler.sample();
-            v[i] = comp;
-            normSq += comp * comp;
-        }
-
-        if (normSq == 0) {
-            // Zero-norm vector is discarded.
-            // Using recursion as it is highly unlikely to generate more
-            // than a few such vectors. It also protects against infinite
-            // loop (in case a buggy generator is used), by eventually
-            // raising a "StackOverflowError".
-            return nextVector();
-        }
-
-        final double f = 1 / Math.sqrt(normSq);
-        for (int i = 0; i < dimension; i++) {
-            v[i] *= f;
-        }
-
-        return v;
+        return delegate.nextVector();
     }
 
     /**
@@ -106,6 +246,31 @@
      */
     @Override
     public UnitSphereSampler withUniformRandomProvider(UniformRandomProvider rng) {
-        return new UnitSphereSampler(rng, this);
+        return delegate.withUniformRandomProvider(rng);
+    }
+
+    /**
+     * Create a unit sphere sampler for the given dimension.
+     *
+     * @param dimension Space dimension.
+     * @param rng Generator for the individual components of the vectors. A shallow
+     * copy will be stored in the sampler.
+     * @return the sampler
+     * @throws IllegalArgumentException If {@code dimension <= 0}
+     *
+     * @since 1.4
+     */
+    public static UnitSphereSampler of(int dimension,
+                                       UniformRandomProvider rng) {
+        if (dimension <= 0) {
+            throw new IllegalArgumentException("Dimension must be strictly positive");
+        } else if (dimension == ONE_D) {
+            return new UnitSphereSampler1D(rng);
+        } else if (dimension == TWO_D) {
+            return new UnitSphereSampler2D(rng);
+        } else if (dimension == THREE_D) {
+            return new UnitSphereSampler3D(rng);
+        }
+        return new UnitSphereSamplerND(dimension, rng);
     }
 }
diff --git a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/UnitSphereSamplerTest.java b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/UnitSphereSamplerTest.java
index 3c9df0b..efd1a6d 100644
--- a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/UnitSphereSamplerTest.java
+++ b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/UnitSphereSamplerTest.java
@@ -19,6 +19,8 @@
 import org.junit.Assert;
 import org.junit.Test;
 import org.apache.commons.rng.simple.RandomSource;
+import java.util.Arrays;
+import org.apache.commons.math3.stat.inference.ChiSquareTest;
 import org.apache.commons.rng.UniformRandomProvider;
 import org.apache.commons.rng.core.source64.SplitMix64;
 
@@ -26,77 +28,408 @@
  * Test for {@link UnitSphereSampler}.
  */
 public class UnitSphereSamplerTest {
+    /** 2 pi */
+    private static final double TWO_PI = 2 * Math.PI;
+
+    /**
+     * Test a non-positive dimension.
+     */
     @Test(expected = IllegalArgumentException.class)
-    public void testPrecondition() {
+    public void testInvalidDimensionThrows() {
         new UnitSphereSampler(0, null);
     }
 
     /**
+     * Test a non-positive dimension.
+     */
+    @Test(expected = IllegalArgumentException.class)
+    public void testInvalidDimensionThrowsWithFactoryConstructor() {
+        UnitSphereSampler.of(0, null);
+    }
+
+    /**
+     * Test the distribution of points in one dimension.
+     */
+    @Test
+    public void testDistribution1D() {
+        testDistribution1D(false);
+    }
+
+    /**
+     * Test the distribution of points in one dimension with the factory constructor.
+     */
+    @Test
+    public void testDistribution1DWithFactoryConstructor() {
+        testDistribution1D(true);
+    }
+
+    /**
+     * Test the distribution of points in one dimension.
+     * RNG-130: All samples should be 1 or -1.
+     */
+    private static void testDistribution1D(boolean factoryConstructor) {
+        final UniformRandomProvider rng = RandomSource.create(RandomSource.XO_RO_SHI_RO_128_PP, 0x1a2b3cL);
+        final UnitSphereSampler generator = createUnitSphereSampler(1, rng, factoryConstructor);
+        final int samples = 10000;
+        // Count the negatives.
+        int count = 0;
+        for (int i = 0; i < samples; i++) {
+            final double[] v = generator.nextVector();
+            Assert.assertEquals(1, v.length);
+            final double d = v[0];
+            if (d == -1.0) {
+                count++;
+            } else if (d != 1.0) {
+                // RNG-130: All samples should be 1 or -1.
+                Assert.fail("Invalid unit length: " + d);
+            }
+        }
+        // Test the number of negatives is approximately 50%
+        assertMonobit(count, samples);
+    }
+
+    /**
+     * Assert that the number of 1 bits is approximately 50%. This is based upon a fixed-step "random
+     * walk" of +1/-1 from zero.
+     *
+     * <p>The test is equivalent to the NIST Monobit test with a fixed p-value of 0.01. The number of
+     * bits is recommended to be above 100.</p>
+     *
+     * @see <a href="https://csrc.nist.gov/publications/detail/sp/800-22/rev-1a/final">Bassham, et al
+     *      (2010) NIST SP 800-22: A Statistical Test Suite for Random and Pseudorandom Number
+     *      Generators for Cryptographic Applications. Section 2.1.</a>
+     *
+     * @param bitCount The bit count.
+     * @param numberOfBits Number of bits.
+     */
+    private static void assertMonobit(int bitCount, int numberOfBits) {
+        // Convert the bit count into a number of +1/-1 steps.
+        final double sum = 2.0 * bitCount - numberOfBits;
+        // The reference distribution is Normal with a standard deviation of sqrt(n).
+        // Check the absolute position is not too far from the mean of 0 with a fixed
+        // p-value of 0.01 taken from a 2-tailed Normal distribution. Computation of
+        // the p-value requires the complimentary error function.
+        final double absSum = Math.abs(sum);
+        final double max = Math.sqrt(numberOfBits) * 2.576;
+        Assert.assertTrue(
+                "Walked too far astray: " + absSum + " > " + max +
+                " (test will fail randomly about 1 in 100 times)",
+                absSum <= max);
+    }
+
+    /**
      * Test the distribution of points in two dimensions.
      */
     @Test
     public void testDistribution2D() {
-        UniformRandomProvider rng = RandomSource.create(RandomSource.XOR_SHIFT_1024_S, 17399225432L);
-        UnitSphereSampler generator = new UnitSphereSampler(2, rng);
+        testDistribution2D(false);
+    }
+
+    /**
+     * Test the distribution of points in two dimensions with the factory constructor.
+     */
+    @Test
+    public void testDistribution2DWithFactoryConstructor() {
+        testDistribution2D(true);
+    }
+
+    /**
+     * Test the distribution of points in two dimensions.
+     * Obtains polar coordinates and checks the angle distribution is uniform.
+     */
+    private static void testDistribution2D(boolean factoryConstructor) {
+        final UniformRandomProvider rng = RandomSource.create(RandomSource.XOR_SHIFT_1024_S, 17399225432L);
+        final UnitSphereSampler generator = createUnitSphereSampler(2, rng, factoryConstructor);
 
         // In 2D, angles with a given vector should be uniformly distributed.
-        final int[] angleBuckets = new int[100];
-        final int steps = 1000000;
+        final int angleBins = 200;
+        final long[] observed = new long[angleBins];
+        final int steps = 100000;
         for (int i = 0; i < steps; ++i) {
             final double[] v = generator.nextVector();
             Assert.assertEquals(2, v.length);
-            Assert.assertEquals(1, length(v), 1e-10);
-            // Compute angle formed with vector (1, 0)?
-            // Cosine of angle is their dot product, because both are unit length.
-            // Dot product here is just the first element of the vector by construction.
-            final double angle = Math.acos(v[0]);
-            final int bucket = (int) (angle / Math.PI * angleBuckets.length);
-            ++angleBuckets[bucket];
+            Assert.assertEquals(1.0, length(v), 1e-10);
+            // Get the polar angle bin from xy
+            final int angleBin = angleBin(angleBins, v[0], v[1]);
+            observed[angleBin]++;
         }
 
-        // Simplistic test for roughly even distribution.
-        final int expectedBucketSize = steps / angleBuckets.length;
-        for (int bucket : angleBuckets) {
-            Assert.assertTrue("Bucket count " + bucket + " vs expected " + expectedBucketSize,
-                              Math.abs(expectedBucketSize - bucket) < 350);
-        }
+        final double[] expected = new double[observed.length];
+        Arrays.fill(expected, (double) steps / observed.length);
+        final double p = new ChiSquareTest().chiSquareTest(expected, observed);
+        Assert.assertFalse("p-value too small: " + p, p < 0.01);
     }
 
-    /** Cf. RNG-55. */
-    @Test(expected = StackOverflowError.class)
-    public void testBadProvider1() {
-        final UniformRandomProvider bad = new UniformRandomProvider() {
-                // CHECKSTYLE: stop all
-                public long nextLong(long n) { return 0; }
-                public long nextLong() { return 0; }
-                public int nextInt(int n) { return 0; }
-                public int nextInt() { return 0; }
-                public float nextFloat() { return 0; }
-                public double nextDouble() { return 0;}
-                public void nextBytes(byte[] bytes, int start, int len) {}
-                public void nextBytes(byte[] bytes) {}
-                public boolean nextBoolean() { return false; }
-                // CHECKSTYLE: resume all
-            };
-
-        new UnitSphereSampler(1, bad).nextVector();
-    }
-
-    /** Cf. RNG-55. */
+    /**
+     * Test the distribution of points in three dimensions.
+     */
     @Test
-    public void testBadProvider1ThenGoodProvider() {
+    public void testDistribution3D() {
+        testDistribution3D(false);
+    }
+
+    /**
+     * Test the distribution of points in three dimensions with the factory constructor.
+     */
+    @Test
+    public void testDistribution3DWithFactoryConstructor() {
+        testDistribution3D(true);
+    }
+
+    /**
+     * Test the distribution of points in three dimensions.
+     * Obtains spherical coordinates and checks the distribution is uniform.
+     */
+    private static void testDistribution3D(boolean factoryConstructor) {
+        final UniformRandomProvider rng = RandomSource.create(RandomSource.XO_SHI_RO_256_PP, 0xabcdefL);
+        final UnitSphereSampler generator = createUnitSphereSampler(3, rng, factoryConstructor);
+
+        // Get 3D spherical coordinates. Assign to a bin.
+        //
+        // polar angle (theta) in [0, 2pi)
+        // azimuthal angle (phi) in [0, pi)
+        //
+        // theta = arctan(y/x) and is uniformly distributed
+        // phi = arccos(z); and cos(phi) is uniformly distributed
+        final int angleBins = 20;
+        final int depthBins = 10;
+        final long[] observed = new long[angleBins * depthBins];
+        final int steps = 1000000;
+        for (int i = 0; i < steps; ++i) {
+            final double[] v = generator.nextVector();
+            Assert.assertEquals(3, v.length);
+            Assert.assertEquals(1.0, length(v), 1e-10);
+            // Get the polar angle bin from xy
+            final int angleBin = angleBin(angleBins, v[0], v[1]);
+            // Map cos(phi) = z from [-1, 1) to [0, 1) then assign a bin
+            final int depthBin = (int) (depthBins * (v[2] + 1) / 2);
+            observed[depthBin * angleBins + angleBin]++;
+        }
+
+        final double[] expected = new double[observed.length];
+        Arrays.fill(expected, (double) steps / observed.length);
+        final double p = new ChiSquareTest().chiSquareTest(expected, observed);
+        Assert.assertFalse("p-value too small: " + p, p < 0.01);
+    }
+
+    /**
+     * Test the distribution of points in four dimensions.
+     */
+    @Test
+    public void testDistribution4D() {
+        testDistribution4D(false);
+    }
+
+    /**
+     * Test the distribution of points in four dimensions with the factory constructor.
+     */
+    @Test
+    public void testDistribution4DWithFactoryConstructor() {
+        testDistribution4D(true);
+    }
+
+    /**
+     * Test the distribution of points in four dimensions.
+     * Checks the surface of the 3-sphere can be used to generate uniform samples within a circle.
+     */
+    private static void testDistribution4D(boolean factoryConstructor) {
+        final UniformRandomProvider rng = RandomSource.create(RandomSource.XO_SHI_RO_512_PP, 0x9876543210L);
+        final UnitSphereSampler generator = createUnitSphereSampler(4, rng, factoryConstructor);
+
+        // No uniform distribution of spherical coordinates for a 3-sphere.
+        // https://en.wikipedia.org/wiki/N-sphere#Spherical_coordinates
+        // Here we exploit the fact that the uniform distribution of a (n+1)-sphere
+        // when discarding two coordinates is uniform within a n-ball.
+        // Thus any two coordinates of the 4 are uniform within a circle.
+        // Here we separately test pairs (0, 1) and (2, 3).
+        // Note: We cannot create a single bin from the two bins in each circle as they are
+        // not independent. A point close to the edge in one circle requires a point close to
+        // the centre in the other circle to create a unit radius from all 4 coordinates.
+        // This test exercises the N-dimension sampler and demonstrates the vectors obey
+        // properties of the (n+1)-sphere.
+
+        // Divide the circle into layers of concentric rings and an angle.
+        final int layers = 10;
+        final int angleBins = 20;
+
+        // Compute the radius for each layer to have the same area
+        // (i.e. incrementally larger concentric circles must increase area by a constant).
+        // r = sqrt(fraction * maxArea / pi)
+        // Unit circle has area pi so we just use sqrt(fraction).
+        final double[] r = new double[layers];
+        for (int i = 1; i < layers; i++) {
+            r[i - 1] = Math.sqrt((double) i / layers);
+        }
+        // The final radius should be 1.0.
+        r[layers - 1] = 1.0;
+
+        final long[] observed1 = new long[layers * angleBins];
+        final long[] observed2 = new long[observed1.length];
+        final int steps = 1000000;
+        for (int i = 0; i < steps; ++i) {
+            final double[] v = generator.nextVector();
+            Assert.assertEquals(4, v.length);
+            Assert.assertEquals(1.0, length(v), 1e-10);
+            // Circle 1
+            observed1[circleBin(angleBins, r, v[0], v[1])]++;
+            // Circle 2
+            observed2[circleBin(angleBins, r, v[2], v[3])]++;
+        }
+
+        final double[] expected = new double[observed1.length];
+        Arrays.fill(expected, (double) steps / observed1.length);
+        final ChiSquareTest chi = new ChiSquareTest();
+        final double p1 = chi.chiSquareTest(expected, observed1);
+        Assert.assertFalse("Circle 1 p-value too small: " + p1, p1 < 0.01);
+        final double p2 = chi.chiSquareTest(expected, observed2);
+        Assert.assertFalse("Circle 2 p-value too small: " + p2, p2 < 0.01);
+    }
+
+    /**
+     * Compute a bin inside the circle using the polar angle theta and the radius thresholds.
+     *
+     * @param angleBins the number of angle bins
+     * @param r the radius bin thresholds (ascending order)
+     * @param x the x coordinate
+     * @param y the y coordinate
+     * @return the bin
+     */
+    private static int circleBin(int angleBins, double[] r, double x, double y) {
+        final int angleBin = angleBin(angleBins, x, y);
+        final int radiusBin = radiusBin(r, x, y);
+        return radiusBin * angleBins + angleBin;
+    }
+
+    /**
+     * Compute an angle bin from the xy vector. The bin will represent the range [0, 2pi).
+     *
+     * @param angleBins the number of angle bins
+     * @param x the x coordinate
+     * @param y the y coordinate
+     * @return the bin
+     */
+    private static int angleBin(int angleBins, double x, double y) {
+        final double angle = Math.atan2(y, x);
+        // Map [-pi, pi) to [0, 1) then assign a bin
+        return (int) (angleBins * (angle + Math.PI) / TWO_PI);
+    }
+
+    /**
+     * Compute a radius bin from the xy vector. The bin is assigned if the length of the vector
+     * is above the threshold of the bin.
+     *
+     * @param r the radius bin thresholds (ascending order)
+     * @param x the x coordinate
+     * @param y the y coordinate
+     * @return the bin
+     */
+    private static int radiusBin(double[] r, double x, double y) {
+        final double length = Math.sqrt(x * x + y * y);
+
+        // Note: The bin should be uniformly distributed.
+        // A test using a custom binary search (avoiding double NaN checks)
+        // shows the simple loop over a small number of bins is comparable in speed.
+        // The loop is preferred for simplicity. A binary search may be better
+        // if the number of bins is increased.
+        for (int layer = 0; layer < r.length; layer++) {
+            if (length <= r[layer]) {
+                return layer;
+            }
+        }
+        // Unreachable if the xy component is from a vector of length <= 1
+        throw new AssertionError("Invalid sample length: " + length);
+    }
+
+    /**
+     * Test infinite recursion occurs with a bad provider in 2D.
+     */
+    @Test(expected = StackOverflowError.class)
+    public void testBadProvider2D() {
+        testBadProvider(2);
+    }
+
+    /**
+     * Test infinite recursion occurs with a bad provider in 3D.
+     */
+    @Test(expected = StackOverflowError.class)
+    public void testBadProvider3D() {
+        testBadProvider(3);
+    }
+
+    /**
+     * Test infinite recursion occurs with a bad provider in 4D.
+     */
+    @Test(expected = StackOverflowError.class)
+    public void testBadProvider4D() {
+        testBadProvider(4);
+    }
+
+    /**
+     * Test the edge case where the normalisation sum to divide by is always zero.
+     * This test requires generation of Gaussian samples with the value 0.
+     * The sample eventually fails due to infinite recursion.
+     * See RNG-55.
+     *
+     * @param dimension the dimension
+     */
+    private static void testBadProvider(final int dimension) {
+        // A provider that will create zero valued Gaussian samples
+        // from the ZigguratNormalizedGaussianSampler.
+        final UniformRandomProvider bad = new SplitMix64(0L) {
+            @Override
+            public long nextLong() {
+                return 0;
+            }
+        };
+
+        UnitSphereSampler.of(dimension, bad).nextVector();
+    }
+
+    /**
+     * Test the edge case where the normalisation sum to divide by is zero for 2D.
+     */
+    @Test
+    public void testInvalidInverseNormalisation2D() {
+        testInvalidInverseNormalisationND(2);
+    }
+
+    /**
+     * Test the edge case where the normalisation sum to divide by is zero for 3D.
+     */
+    @Test
+    public void testInvalidInverseNormalisation3D() {
+        testInvalidInverseNormalisationND(3);
+    }
+
+    /**
+     * Test the edge case where the normalisation sum to divide by is zero for 4D.
+     */
+    @Test
+    public void testInvalidInverseNormalisation4D() {
+        testInvalidInverseNormalisationND(4);
+    }
+
+    /**
+     * Test the edge case where the normalisation sum to divide by is zero.
+     * This test requires generation of Gaussian samples with the value 0.
+     * See RNG-55.
+     */
+    private static void testInvalidInverseNormalisationND(final int dimension) {
         // Create a provider that will create a bad first sample but then recover.
         // This checks recursion will return a good value.
-        final UniformRandomProvider bad = new SplitMix64(0L) {
-                private int count;
-                // CHECKSTYLE: stop all
-                public long nextLong() { return (count++ == 0) ? 0 : super.nextLong(); }
-                public double nextDouble() { return (count++ == 0) ? 0 : super.nextDouble(); }
-                // CHECKSTYLE: resume all
-            };
+        final UniformRandomProvider bad = new SplitMix64(0x1a2b3cL) {
+            private int count = -2 * dimension;
 
-        final double[] vector = new UnitSphereSampler(1, bad).nextVector();
-        Assert.assertEquals(1, vector.length);
+            @Override
+            public long nextLong() {
+                // Return enough zeros to create Gaussian samples of zero for all coordinates.
+                return count++ < 0 ? 0 : super.nextLong();
+            }
+        };
+
+        final double[] vector = UnitSphereSampler.of(dimension, bad).nextVector();
+        Assert.assertEquals(dimension, vector.length);
+        Assert.assertEquals(1.0, length(vector), 1e-10);
     }
 
     /**
@@ -107,7 +440,7 @@
     public void testNextNormSquaredAfterZeroIsValid() {
         // The sampler explicitly handles length == 0 using recursion.
         // Anything above zero should be valid.
-        final double normSq = Math.nextAfter(0, 1);
+        final double normSq = Math.nextAfter(0.0, 1);
         // Map to the scaling factor
         final double f = 1 / Math.sqrt(normSq);
         // As long as this is finite positive then the sampler is valid
@@ -115,15 +448,79 @@
     }
 
     /**
-     * Test the SharedStateSampler implementation.
+     * Test the SharedStateSampler implementation for 1D.
      */
     @Test
-    public void testSharedStateSampler() {
+    public void testSharedStateSampler1D() {
+        testSharedStateSampler(1, false);
+    }
+
+    /**
+     * Test the SharedStateSampler implementation for 2D.
+     */
+    @Test
+    public void testSharedStateSampler2D() {
+        testSharedStateSampler(2, false);
+    }
+
+    /**
+     * Test the SharedStateSampler implementation for 3D.
+     */
+    @Test
+    public void testSharedStateSampler3D() {
+        testSharedStateSampler(3, false);
+    }
+
+    /**
+     * Test the SharedStateSampler implementation for 4D.
+     */
+    @Test
+    public void testSharedStateSampler4D() {
+        testSharedStateSampler(4, false);
+    }
+
+    /**
+     * Test the SharedStateSampler implementation for 1D using the factory constructor.
+     */
+    @Test
+    public void testSharedStateSampler1DWithFactoryConstructor() {
+        testSharedStateSampler(1, true);
+    }
+
+    /**
+     * Test the SharedStateSampler implementation for 2D using the factory constructor.
+     */
+    @Test
+    public void testSharedStateSampler2DWithFactoryConstructor() {
+        testSharedStateSampler(2, true);
+    }
+
+    /**
+     * Test the SharedStateSampler implementation for 3D using the factory constructor.
+     */
+    @Test
+    public void testSharedStateSampler3DWithFactoryConstructor() {
+        testSharedStateSampler(3, true);
+    }
+
+    /**
+     * Test the SharedStateSampler implementation for 4D using the factory constructor.
+     */
+    @Test
+    public void testSharedStateSampler4DWithFactoryConstructor() {
+        testSharedStateSampler(4, true);
+    }
+
+    /**
+     * Test the SharedStateSampler implementation for the given dimension.
+     *
+     * @param dimension the dimension
+     * @param factoryConstructor true to use the factory constructor
+     */
+    private static void testSharedStateSampler(int dimension, boolean factoryConstructor) {
         final UniformRandomProvider rng1 = RandomSource.create(RandomSource.SPLIT_MIX_64, 0L);
         final UniformRandomProvider rng2 = RandomSource.create(RandomSource.SPLIT_MIX_64, 0L);
-        final int n = 3;
-        final UnitSphereSampler sampler1 =
-            new UnitSphereSampler(n, rng1);
+        final UnitSphereSampler sampler1 = createUnitSphereSampler(dimension, rng1, factoryConstructor);
         final UnitSphereSampler sampler2 = sampler1.withUniformRandomProvider(rng2);
         RandomAssert.assertProduceSameSequence(
             new RandomAssert.Sampler<double[]>() {
@@ -141,11 +538,25 @@
     }
 
     /**
+     * Creates a UnitSphereSampler.
+     *
+     * @param dimension the dimension
+     * @param rng the source of randomness
+     * @param factoryConstructor true to use the factory constructor
+     * @return the sampler
+     */
+    private static UnitSphereSampler createUnitSphereSampler(int dimension, UniformRandomProvider rng,
+            boolean factoryConstructor) {
+        return factoryConstructor ?
+                UnitSphereSampler.of(dimension, rng) : new UnitSphereSampler(dimension, rng);
+    }
+
+    /**
      * @return the length (L2-norm) of given vector.
      */
     private static double length(double[] vector) {
         double total = 0;
-        for (double d : vector) {
+        for (final double d : vector) {
             total += d * d;
         }
         return Math.sqrt(total);
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index a8bbd88..2d24b13 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -75,6 +75,13 @@
 within the allotted number of reruns (the test will be marked
 as 'flaky' in the report).
 ">
+      <action dev="aherbert" type="fix" issue="130">
+        "UnitSphereSampler": Fix 1 dimension sampling to only return vectors containing 1 or -1.
+      </action>
+      <action dev="aherbert" type="update" issue="129">
+        "UnitSphereSampler": Improve performance with specialisations for low order dimensions.
+        Added a factory constructor to create the sampler.
+      </action>
       <action dev="aherbert" type="add" issue="128">
         New "UnitBallSampler" to generate coordinates uniformly within an n-unit ball.
       </action>