Prepare v0.3.0-RC1

Update readme file for the running instruction (compiled without any
enable list, i.e., `./configure`).
diff --git a/README.md b/README.md
index 3ae8fe0..4d124aa 100644
--- a/README.md
+++ b/README.md
@@ -19,8 +19,19 @@
   * `google-protobuf` (New BSD)
   * `openblas` (New BSD)
 
+###Optional dependencies
+For advanced features, the following libraries are needed:
+
+  * `zeromq` (LGPLv3 + static link exception),`czmq` (Mozilla Public License Version 2.0) and `zookeeper` (Apache 2.0), for distributed training with multiple processes. Compile SINGA with `--enable-dist`
+  * `cuda` (NVIDIA CUDA Toolkit EUL) for training using NVIDIA GPUs.
+  * `cudnn` (NVIDIA CuDNN EULA) for training using NVIDIA's CuDNN library.
+  * `Apache Mesos` (Apache 2.0)
+  * `Apache Hadoop` (Apache 2.0)
+  * `libhdfs3` (Apache 2.0)
+  * `swig` (GPL) for using Python Binding.
+
 We have tested SINGA on Ubuntu 12.04, Ubuntu 14.01 and CentOS 6.
-You can install all dependencies into `$PREFIX` folder by
+You can install all dependencies (including optional dependencies) into `$PREFIX` folder by
 
     ./thirdparty/install.sh all $PREFIX
 
@@ -32,17 +43,6 @@
     $ export LIBRARY_PATH=$PREFIX/lib:$LIBRARY_PATH
     $ export PATH=$PREFIX/bin:$PATH
 
-###Optional dependencies
-For advanced features, the following libraries are needed:
-
-  * `zeromq` (LGPLv3 + static link exception),`czmq` (Mozilla Public License Version 2.0) and `zookeeper` (Apache 2.0), for distributed training with multiple processes. Compile SINGA with `--enable-dist`
-  * `cuda` (NVIDIA CUDA Toolkit EUL) for training using NVIDIA GPUs.
-  * `cudnn` (NVIDIA CuDNN EULA) for training using NVIDIA's CUDNN library.
-  * `Apache Mesos` (Apache 2.0)
-  * `Apache Hadoop` (Apache 2.0)
-  * `libhdfs3` (Apache 2.0)
-  * `swig` (GPL) for using Python Binding.
-
 
 ##Documentation
 
@@ -76,7 +76,7 @@
 	$ ./tool/python/singa/generatepy.sh
 	$ ./configure --enable-python --with-python=/PATH/TO/Python.h
 
---with-python is optinal as by default the path is /usr/local/include.
+--with-python is optional as by default the path is /usr/local/include.
 
 You can also run the following command for further configuration.
 
@@ -107,10 +107,9 @@
 Next, start the training:
 
     $ cd ../../
-    $ ./bin/zk-service.sh start
-    $ ./bin/singa-run.sh -conf examples/cifar10/job.conf
+    $ ./singa -conf examples/cifar10/job.conf
 
-Now we just need to wait until it is done!
+For GPU training or distributed training, please refer to the [online guide](http://singa.apache.org/docs).
 
 ##LICENSE
 
diff --git a/src/main.cc b/src/main.cc
index a07f86b..0ce7d19 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -46,8 +46,8 @@
  * easily, e.g., MLP(layer1_size, layer2_size, tanh, loss);
  */
 int main(int argc, char **argv) {
-  if (argc < 4) {
-    std::cout << "Args: -conf JOB_CONF -singa SINGA_CONF -job_id JOB_ID "
+  if (argc < 2) {
+    std::cout << "Args: -conf JOB_CONF [-singa SINGA_CONF] [-job_id JOB_ID] "
               << " [-resume|-test]\n"
               << "-resume\t resume training from latest checkpoint files\n"
               << "-test\t test performance or extract features\n";
diff --git a/src/neuralnet/input_layer/store.cc b/src/neuralnet/input_layer/store.cc
index a4754f4..32f1887 100644
--- a/src/neuralnet/input_layer/store.cc
+++ b/src/neuralnet/input_layer/store.cc
@@ -34,7 +34,6 @@
   if (store_ != nullptr) {
     delete store_;
   }
-
 }
 
 void StoreInputLayer::Setup(const LayerProto& conf,
@@ -104,10 +103,8 @@
   } else {
     fetch_data();
   }
-  LOG(ERROR) << "batchsize << " << batchsize_;
   for (int k = 0; k < batchsize_; k++)
     Parse(k, flag, buf_keys_[k], buf_vals_[k]);
-  LOG(ERROR) << "after parse ";
   if (layer_conf_.store_conf().prefetching())
     thread_ = new thread(&StoreInputLayer::fetch_data, this);
 }