blob: 3fc329a9b8355cca103e7344d8a26de70f846fda [file] [log] [blame]
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# This file download the caltech 256 dataset
# (http://www.vision.caltech.edu/Image_Datasets/Caltech256/), and split it into
# the train and val rec files.
# number of images per class for training
IMG_TRAIN=60
# download
if [ ! -e 256_ObjectCategories.tar ]; then
wget http://www.vision.caltech.edu/Image_Datasets/Caltech256/256_ObjectCategories.tar
fi
# split into train and val set
tar -xf 256_ObjectCategories.tar
TRAIN_DIR=caltech_256_train
mkdir -p ${TRAIN_DIR}
for i in 256_ObjectCategories/*; do
c=`basename $i`
echo "spliting $c"
mkdir -p ${TRAIN_DIR}/$c
for j in `ls $i/*.jpg | shuf | head -n ${IMG_TRAIN}`; do
mv $j ${TRAIN_DIR}/$c/
done
done
# generate lst files
CUR_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
MX_DIR=${CUR_DIR}/../../../
python ${MX_DIR}/tools/im2rec.py --list True --recursive True caltech256-train ${TRAIN_DIR}/
python ${MX_DIR}/tools/im2rec.py --list True --recursive True caltech256-val 256_ObjectCategories/
mv caltech256-train_train.lst caltech256-train.lst
rm caltech256-train_*
mv caltech256-val_train.lst caltech256-val.lst
rm caltech256-val_*
# generate rec files
python ${MX_DIR}/tools/im2rec.py --resize 256 --quality 95 --num-thread 16 caltech256-val 256_ObjectCategories/
python ${MX_DIR}/tools/im2rec.py --resize 256 --quality 95 --num-thread 16 caltech256-train ${TRAIN_DIR}/
# clean
rm -rf ${TRAIN_DIR} 256_ObjectCategories/