Merge PR #251 for updating opencl support using ViennaCL

Conflicts:
	doc/en/docs/installation.md
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index aa2212b..a412151 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -6,15 +6,15 @@
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
-# 
+#
 #     http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# 
+#
 
 SET(SINGA_LINKER_LIBS "")
 
@@ -26,12 +26,12 @@
 LIST(APPEND SINGA_LINKER_LIBS ${PROTOBUF_LIBRARIES})
 INCLUDE("cmake/Protobuf.cmake")
 
-#FIND_PACKAGE(Glog)
-#IF(GLOG_FOUND)
-#    MESSAGE(STATUS "GLOG FOUND at ${GLOG_INCLUDE_DIR}")
-#    ADD_DEFINITIONS("-DUSE_GLOG")
-#    LIST(APPEND SINGA_LINKER_LIBS ${GLOG_LIBRARIES})
-#ENDIF()
+FIND_PACKAGE(Glog)
+IF(GLOG_FOUND)
+    MESSAGE(STATUS "GLOG FOUND at ${GLOG_INCLUDE_DIR}")
+    ADD_DEFINITIONS("-DUSE_GLOG")
+    LIST(APPEND SINGA_LINKER_LIBS ${GLOG_LIBRARIES})
+ENDIF()
 
 IF(USE_LMDB)
     FIND_PACKAGE(LMDB REQUIRED)
@@ -72,9 +72,9 @@
 	ENDIF()
 ENDIF()
 
-FIND_PACKAGE(Glog REQUIRED)
-INCLUDE_DIRECTORIES(SYSTEM ${GLOG_INCLUDE_DIRS})
-LIST(APPEND SINGA_LINKER_LIBS ${GLOG_LIBRARIES})
+#FIND_PACKAGE(Glog REQUIRED)
+#INCLUDE_DIRECTORIES(SYSTEM ${GLOG_INCLUDE_DIRS})
+#LIST(APPEND SINGA_LINKER_LIBS ${GLOG_LIBRARIES})
 #MESSAGE(STATUS "Found glog at ${GLOG_INCLUDE_DIRS}")
 
 IF(USE_OPENCV)
diff --git a/doc/en/docs/installation.md b/doc/en/docs/installation.md
index ecf3e97..a8b0530 100755
--- a/doc/en/docs/installation.md
+++ b/doc/en/docs/installation.md
@@ -9,37 +9,82 @@
 
 
 ### Optional
-* glog
-* opencv (tested with 2.4.8)
-* lmdb (tested with 0.9)
 * cuda (tested with 6.5, 7.0 and 7.5)
 * cudnn (v4 and v5)
+* opencv (tested with 2.4.8)
+* lmdb (tested with 0.9)
+* glog
 * opencl-headers and viennacl (version 1.7.1 or newer) for OpenCL support
 
-PySINGA has additional dependencies
+PySINGA (the Python binding) has additional dependencies
 
 * python(==2.7)
 * pip(>=1.5)
-* swig(>=3.0)
+* swig(>=3.0.10)
 * numpy(>=1.11.0)
-* openblas (>=0.2.10)
 
 Users are encouraged to install the cuda and [cudnn](https://developer.nvidia.com/cudnn) for running SINGA on GPUs to
 get better performance.
-Most of the dependent libraries could be installed via package mangers like
-apt-get or homebrew.
+Most of the dependent libraries could be installed from source or via package mangers like
+apt-get, homebrew, pip and anaconda. Please refer to FAQ for problems caused by the path setting of the dependent libraries.
 
-    # for ubuntu users, tested on 14.04
-    sudo apt-get install libprotobuf-dev libopenblas-dev libopencv-dev protobuf-compiler libgoogle-glog-dev liblmdb-dev python2.7-dev python-pip python-numpy
 
-    # for Mac OS users
-    brew install -vd glog lmdb
-    brew tap homebrew/science
-    brew install opencv
-    brew install openblas
-    brew tap homebrew/python
-    brew install python
-    brew install numpy  --with-openblas
+### apt-get
+The following instructions are tested on Ubuntu 14.04 for installing dependent libraries.
+
+    # required libraries
+    $ sudo apt-get install libprotobuf-dev libopenblas-dev protobuf-compiler
+
+    # optional libraries
+    $ sudo apt-get install python2.7-dev python-pip python-numpy
+    $ sudo apt-get install llibopencv-dev ibgoogle-glog-dev liblmdb-dev
+
+Please note that PySINGA requires swig >=3.0, which could be installed via
+apt-get on Ubuntu 16.04; but it has to be installed from source for other Ubuntu versions including 14.04.
+
+### homebrew
+The following instructions are tested on Mac OS X Yosemite (10.10.5) for installing dependent libraries.
+
+    # required libraries
+    $ brew tap homebrew/science
+    $ brew install openblas
+    $ brew install protobuf260
+
+    # optional libraries
+    $ brew tap homebrew/python
+    $ brew install python
+    $ brew install opencv
+    $ brew install -vd glog lmdb
+
+By default, openblas is installed into /usr/local/opt/openblas. To let the compiler (and cmake) know the openblas
+path, please export
+
+    $ export CMAKE_INCLUDE_PATH=/usr/local/opt/openblas/include:$CMAKE_INCLUDE_PATH
+    $ export CMAKE_LIBRARY_PATH=/usr/local/opt/openblas/library:$CMAKE_LIBRARY_PATH
+
+To let the runtime know the openblas path, please export
+
+    $ export LD_LIBRARY_PATH=/usr/local/opt/openblas/library:$LD_LIBRARY_PATH
+
+### pip and anaconda for PySINGA
+pip and anaconda could be used to install python packages, e.g. numpy.
+To use pip with virtual environment,
+
+    # install virtualenv
+    $ pip install virtualenv
+    $ virtualenv pysinga
+    $ source pysinga/bin/activate
+    $ pip install numpy
+
+To use anaconda with virtual environment,
+
+    $ conda create --name pysinga python=2
+    $ source activate pysinga
+    $ conda install numpy
+
+After installing numpy, please export the header path of numpy.i as
+
+    $ export CPLUS_INCLUDE_PATH=`python -c "import numpy; print numpy.get_include()"`:$CPLUS_INCLUDE_PATH
 
 
 ## Install PySINGA
@@ -96,7 +141,7 @@
 ### From the downloaded `tar.gz` file:
 
 Extract the downloaded. If using CUDA, CNMeM needs to be fetched:
-   
+
     $ cd $SINGA_ROOT/lib/cnmem/
     $ git clone https://github.com/NVIDIA/cnmem
 
@@ -167,7 +212,7 @@
 Remember to add its directory to `PATH` and the built libraries to `LD_LIBRARY_PATH`.
 
 To build SINGA with OpenCL support, you need to pass the flag during cmake:
-    
+
     cmake .. -DUSE_OPENCL=ON
 
 ### Windows
@@ -263,3 +308,9 @@
 
     A: It could be caused by the `PYTHONPATH` which should be set to empty when you are using virtual environment to avoid the conflicts with the path of
     the virtual environment.
+
+* Q: When compiling PySINGA from source, there is a compilation error due to the missing of <numpy/objectarray.h>
+
+    A: Please install numpy and export the path of numpy header files as
+
+        $ export CPLUS_INCLUDE_PATH=`python -c "import numpy; print numpy.get_include()"`:$CPLUS_INCLUDE_PATH
diff --git a/examples/cifar10/CMakeLists.txt b/examples/cifar10/CMakeLists.txt
index 313c0eb..1f29f2f 100644
--- a/examples/cifar10/CMakeLists.txt
+++ b/examples/cifar10/CMakeLists.txt
@@ -6,31 +6,31 @@
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
-# 
+#
 #     http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# 
+#
 
 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
 INCLUDE_DIRECTORIES(${CMAKE_BINARY_DIR}/include)
 
 IF(USE_CUDNN)
 ADD_EXECUTABLE(alexnet alexnet.cc)
-ADD_DEPENDENCIES(alexnet singa_core singa_model singa_utils)
-TARGET_LINK_LIBRARIES(alexnet singa_core singa_utils singa_model protobuf ${SINGA_LIBKER_LIBS})
+ADD_DEPENDENCIES(alexnet singa)
+TARGET_LINK_LIBRARIES(alexnet singa protobuf ${SINGA_LIBKER_LIBS})
 
 ADD_EXECUTABLE(alexnet-parallel alexnet-parallel.cc)
-ADD_DEPENDENCIES(alexnet-parallel singa_core singa_model singa_utils)
-TARGET_LINK_LIBRARIES(alexnet-parallel singa_core singa_utils singa_model protobuf ${SINGA_LIBKER_LIBS})
+ADD_DEPENDENCIES(alexnet-parallel singa)
+TARGET_LINK_LIBRARIES(alexnet-parallel singa protobuf ${SINGA_LIBKER_LIBS})
 SET_TARGET_PROPERTIES(alexnet-parallel PROPERTIES LINK_FLAGS "${LINK_FLAGS} -pthread")
 
 ADD_EXECUTABLE(vgg-parallel vgg-parallel.cc)
-ADD_DEPENDENCIES(vgg-parallel singa_core singa_model singa_utils)
-TARGET_LINK_LIBRARIES(vgg-parallel singa_core singa_utils singa_model protobuf ${SINGA_LIBKER_LIBS})
+ADD_DEPENDENCIES(vgg-parallel singa)
+TARGET_LINK_LIBRARIES(vgg-parallel singa protobuf ${SINGA_LIBKER_LIBS})
 SET_TARGET_PROPERTIES(vgg-parallel PROPERTIES LINK_FLAGS "${LINK_FLAGS} -pthread")
 ENDIF(USE_CUDNN)
diff --git a/examples/cifar10/README.md b/examples/cifar10/README.md
index bd5ed7d..de122f7 100644
--- a/examples/cifar10/README.md
+++ b/examples/cifar10/README.md
@@ -56,7 +56,7 @@
 
         ./run-parallel.sh
 
-4. vgg-parallel.cc. It train the VGG model using the CPP APIs on two CudaGPU devices similar to alexnet-parallel.cc.
+4. vgg-parallel.cc. It trains the VGG model using the CPP APIs on two CudaGPU devices similar to alexnet-parallel.cc.
 
 ### Prediction
 
@@ -68,10 +68,3 @@
 a numpy array of images (one row per image); dev is the training device, e.g.,
 a CudaGPU device or the host CppCPU device; topk labels of each image would be
 returned.
-
-
-
-
-
-
-
diff --git a/examples/imagenet/CMakeLists.txt b/examples/imagenet/CMakeLists.txt
index 465245a..fbb7235 100644
--- a/examples/imagenet/CMakeLists.txt
+++ b/examples/imagenet/CMakeLists.txt
@@ -6,15 +6,15 @@
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
-# 
+#
 #     http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# 
+#
 
 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
 INCLUDE_DIRECTORIES(${CMAKE_BINARY_DIR}/include)
@@ -23,12 +23,12 @@
   IF(USE_OPENCV)
   SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp ")
     ADD_EXECUTABLE(imagenet alexnet.cc)
-    ADD_DEPENDENCIES(imagenet singa_core singa_model singa_utils singa_io)
-    TARGET_LINK_LIBRARIES(imagenet singa_core singa_utils singa_model singa_io protobuf ${SINGA_LIBKER_LIBS})
+    ADD_DEPENDENCIES(imagenet singa)
+    TARGET_LINK_LIBRARIES(imagenet singa protobuf ${SINGA_LIBKER_LIBS})
 
     ADD_EXECUTABLE(createdata ilsvrc12.cc)
-    ADD_DEPENDENCIES(createdata singa_core singa_io singa_model singa_utils)
-    TARGET_LINK_LIBRARIES(createdata singa_core singa_utils singa_io singa_model protobuf ${SINGA_LIBKER_LIBS})
+    ADD_DEPENDENCIES(createdata singa)
+    TARGET_LINK_LIBRARIES(createdata singa protobuf ${SINGA_LIBKER_LIBS})
     #SET_TARGET_PROPERTIES(createdata PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
   ENDIF(USE_OPENCV)
 ENDIF(USE_CUDNN)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index cc1ee0c..b4a88f5 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -20,36 +20,14 @@
 
 FILE(GLOB proto_files proto/*.proto)
 protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_files})
-IF (USE_PYTHON)
-    protobuf_generate_python(proto_pys ${proto_files})
-ENDIF()
 INCLUDE_DIRECTORIES("${CMAKE_BINARY_DIR}/include")
 
-#message(STATUS "include: ${CMAKE_BINARY_DIR} ")
-#message(STATUS "srcs: ${proto_srcs}")
-#message(STATUS "hdrs: ${proto_hdrs}")
-#message(STATUS "pys: ${proto_pys}")
-ADD_LIBRARY(singa_proto STATIC ${proto_hdrs} ${proto_srcs} ${proto_pys})
-FOREACH(fil ${proto_hdrs})
-    ADD_CUSTOM_COMMAND(
-        TARGET singa_proto PRE_BUILD
-        COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/include/singa/proto"
-        COMMAND ${CMAKE_COMMAND} -E copy ${fil} "${CMAKE_BINARY_DIR}/include/singa/proto"
-        #COMMAND ${CMAKE_COMMAND} -E echo "copy done"
-        )
-ENDFOREACH()
-LIST(APPEND SINGA_LINKER_LIBS singa_proto)
-
+LIST(APPEND singa_sources ${proto_hdrs} ${proto_srcs})
 SET(PREVIOUS_LINKER_LIBS ${SINGA_LINKER_LIBS})
 
-#FILE(GLOB_RECURSE utils_source ${CMAKE_CURRENT_SOURCE_DIR}/utils/ "*.cc")
 AUX_SOURCE_DIRECTORY(utils utils_source)
-#message(STATUS "UTILS ${utils_source}")
-ADD_LIBRARY(singa_utils SHARED ${utils_source})
-TARGET_LINK_LIBRARIES(singa_utils ${SINGA_LINKER_LIBS})
-LIST(APPEND SINGA_LINKER_LIBS singa_utils)
+LIST(APPEND singa_sources ${utils_source})
 
-#FILE(GLOB_RECURSE core_source ${CMAKE_CURRENT_SOURCE_DIR}/core/ "*.cc")
 AUX_SOURCE_DIRECTORY(core/device core_source)
 AUX_SOURCE_DIRECTORY(core/memory core_source)
 AUX_SOURCE_DIRECTORY(core/scheduler core_source)
@@ -67,35 +45,40 @@
     include_directories("${CMAKE_CURRENT_SOURCE_DIR}/core/tensor")
     SET(CMAKE_CXX_FLAGS ${FLAGS_BACKUP})
 ENDIF (USE_CUDA)
-#message(STATUS "FLAGS ${CMAKE_CXX_FLAGS}")
-#message(STATUS "CORE ${cuda_source}")
-#message(STATUS "OBJ ${cuda_objs}")
-ADD_LIBRARY(singa_core SHARED ${core_source} ${cuda_objs})
-TARGET_LINK_LIBRARIES(singa_core ${SINGA_LINKER_LIBS})
-LIST(APPEND SINGA_LINKER_LIBS singa_core)
-#MESSAGE(STATUS "link libs " ${SINGA_LINKER_LIBS})
+LIST(APPEND singa_sources ${core_source} ${cuda_objs})
 
-#FILE(GLOB_RECURSE model_source ${CMAKE_CURRENT_SOURCE_DIR}/model/ "*.cc")
 AUX_SOURCE_DIRECTORY(model model_source)
 AUX_SOURCE_DIRECTORY(model/layer model_source)
 AUX_SOURCE_DIRECTORY(model/optimizer model_source)
 AUX_SOURCE_DIRECTORY(model/loss model_source)
 AUX_SOURCE_DIRECTORY(model/metric model_source)
 AUX_SOURCE_DIRECTORY(model/updater model_source)
-#MESSAGE(STATUS "MODEL ${model_source}")
-ADD_LIBRARY(singa_model SHARED ${model_source})
-MESSAGE(STATUS "model linker libs ${SINGA_LINKER_LIBS}")
-TARGET_LINK_LIBRARIES(singa_model ${SINGA_LINKER_LIBS})
-LIST(APPEND SINGA_LINKER_LIBS singa_model)
+LIST(APPEND singa_sources ${model_source})
 
 AUX_SOURCE_DIRECTORY(io io_source)
 AUX_SOURCE_DIRECTORY(io/network io_source)
-ADD_LIBRARY(singa_io SHARED ${io_source})
-TARGET_LINK_LIBRARIES(singa_io ${SINGA_LINKER_LIBS})
-LIST(APPEND SINGA_LINKER_LIBS singa_io)
+LIST(APPEND singa_sources ${io_source})
+ADD_LIBRARY(singa SHARED ${singa_sources})
+ADD_CUSTOM_TARGET(
+  copy_protobuf
+  COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/include/singa/proto"
+  DEPENDS ${proto_hdrs}
+  COMMENT "Copying Protobuf headers"
+  )
+FOREACH(fil ${proto_hdrs})
+  ADD_CUSTOM_COMMAND(
+    TARGET copy_protobuf PRE_BUILD
+    COMMAND ${CMAKE_COMMAND} -E copy ${fil} "${CMAKE_BINARY_DIR}/include/singa/proto"
+ )
+ENDFOREACH()
+ADD_DEPENDENCIES(singa copy_protobuf)
+TARGET_LINK_LIBRARIES(singa ${SINGA_LINKER_LIBS})
+#MESSAGE(STATUS "HEADERS: ${proto_hdrs}")
 
 IF(USE_PYTHON)
 
+    protobuf_generate_python(proto_pys ${proto_files})
+    #MESSAGE(STATUS "proto pys: ${proto_pys}")
     FILE(REMOVE "${CMAKE_CURRENT_SOURCE_DIR}/python/swig/config.i")
     CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/python/swig/config.i.in" "${CMAKE_CURRENT_SOURCE_DIR}/python/swig/config.i")
 
@@ -112,11 +95,11 @@
 
     create_symlinks(${python_source_files})
 
-    SET(python_cxxs "${core_source};${io_source};${model_source};${utils_source}")
-    ADD_LIBRARY(_singa_wrap SHARED ${python_srcs} ${python_cxxs} ${cuda_objs})
+    ADD_LIBRARY(_singa_wrap SHARED ${python_srcs} ${singa_sources} ${cuda_objs} ${proto_pys})
     SET(WRAPPER_LINKER_LIBS "${PREVIOUS_LINKER_LIBS}")
-    TARGET_LINK_LIBRARIES(_singa_wrap ${WRAPPER_LINKER_LIBS})
+    TARGET_LINK_LIBRARIES(_singa_wrap ${WRAPPER_LINKER_LIBS} ${PYTHON_LIBRARIES})
     TARGET_INCLUDE_DIRECTORIES(_singa_wrap PRIVATE ${PYTHON_INCLUDE_DIRS})
+    ADD_DEPENDENCIES(_singa_wrap singa )
     #message(STATUS "PREVIOUS_LINKER_LIBS ${PREVIOUS_LINKER_LIBS}")
 
     SET_TARGET_PROPERTIES(_singa_wrap
@@ -131,5 +114,16 @@
 
     #create python/singa/proto/__init__.py
     FILE(WRITE ${CMAKE_BINARY_DIR}/python/singa/proto/__init__.py "")
+    #MESSAGE(STATUS "apple: ${APPLE}")
+    IF(APPLE)
+        ADD_CUSTOM_TARGET(
+            change_suffix ALL 
+            COMMAND ${CMAKE_COMMAND} -E rename "${CMAKE_BINARY_DIR}/python/singa/_singa_wrap.dylib" "${CMAKE_BINARY_DIR}/python/singa/_singa_wrap.so"
+            COMMENT "change .dylib to .so in mac system"
+        )
+        ADD_DEPENDENCIES(change_suffix _singa_wrap)
+    ENDIF(APPLE)
 
 ENDIF(USE_PYTHON)
+
+
diff --git a/src/core/device/cuda_gpu.cc b/src/core/device/cuda_gpu.cc
index 0164752..f6603d3 100644
--- a/src/core/device/cuda_gpu.cc
+++ b/src/core/device/cuda_gpu.cc
@@ -53,7 +53,7 @@
 
 CudaGPU::CudaGPU(int id, std::shared_ptr<DeviceMemPool> pool)
     : Device(id, kNumCudaStream) {
-  CHECK_NE(pool, nullptr);
+  CHECK(pool != nullptr);
   pool_ = pool;
   Setup();
 }
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 7db784c..efc1983 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -6,15 +6,15 @@
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
-# 
+#
 #     http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# 
+#
 
 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
 INCLUDE_DIRECTORIES(${CMAKE_BINARY_DIR}/include)
@@ -37,10 +37,9 @@
 
 
 ADD_EXECUTABLE(test_singa "gtest/gtest_main.cc" ${singa_test_source})
-ADD_DEPENDENCIES(test_singa singa_core singa_utils)
+ADD_DEPENDENCIES(test_singa singa)
 #MESSAGE(STATUS "link libs" ${singa_linker_libs})
-TARGET_LINK_LIBRARIES(test_singa gtest singa_core singa_utils singa_model
-    singa_io singa_proto protobuf ${SINGA_LINKER_LIBS})
+TARGET_LINK_LIBRARIES(test_singa gtest singa ${SINGA_LINKER_LIBS})
 IF(UNIX AND (NOT APPLE))
     LIST(APPEND LINK_FLAGS "-pthread")
 ENDIF()