blob: 091c4e8ea17b0c4e7908143dd6b0e53b9bed0bb4 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
Pascal VOC database
This class loads ground truth notations from standard Pascal VOC XML data formats
and transform them into IMDB format. Selective search is used for proposals, see roidb
function. Results are written as the Pascal VOC format. Evaluation is based on mAP
import cPickle
import cv2
import os
import numpy as np
from ..logger import logger
from imdb import IMDB
from pascal_voc_eval import voc_eval
from ds_utils import unique_boxes, filter_small_boxes
class PascalVOC(IMDB):
def __init__(self, image_set, root_path, devkit_path):
fill basic information to initialize imdb
:param image_set: 2007_trainval, 2007_test, etc
:param root_path: 'selective_search_data' and 'cache'
:param devkit_path: data and results
:return: imdb object
year, image_set = image_set.split('_')
super(PascalVOC, self).__init__('voc_' + year, image_set, root_path, devkit_path) # set
self.year = year
self.root_path = root_path
self.devkit_path = devkit_path
self.data_path = os.path.join(devkit_path, 'VOC' + year)
self.classes = ['__background__', # always index 0
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor']
self.num_classes = len(self.classes)
self.image_set_index = self.load_image_set_index()
self.num_images = len(self.image_set_index)'%s num_images %d' % (, self.num_images))
self.config = {'comp_id': 'comp4',
'use_diff': False,
'min_size': 2}
def load_image_set_index(self):
find out which indexes correspond to given image set (train or val)
image_set_index_file = os.path.join(self.data_path, 'ImageSets', 'Main', self.image_set + '.txt')
assert os.path.exists(image_set_index_file), 'Path does not exist: {}'.format(image_set_index_file)
with open(image_set_index_file) as f:
image_set_index = [x.strip() for x in f.readlines()]
return image_set_index
def image_path_from_index(self, index):
given image index, find out full path
:param index: index of a specific image
:return: full path of this image
image_file = os.path.join(self.data_path, 'JPEGImages', index + '.jpg')
assert os.path.exists(image_file), 'Path does not exist: {}'.format(image_file)
return image_file
def gt_roidb(self):
return ground truth image regions database
:return: imdb[image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped']
cache_file = os.path.join(self.cache_path, + '_gt_roidb.pkl')
if os.path.exists(cache_file):
with open(cache_file, 'rb') as fid:
roidb = cPickle.load(fid)'%s gt roidb loaded from %s' % (, cache_file))
return roidb
gt_roidb = [self.load_pascal_annotation(index) for index in self.image_set_index]
with open(cache_file, 'wb') as fid:
cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL)'%s wrote gt roidb to %s' % (, cache_file))
return gt_roidb
def load_pascal_annotation(self, index):
for a given index, load image and bounding boxes info from XML file
:param index: index of a specific image
:return: record['boxes', 'gt_classes', 'gt_overlaps', 'flipped']
import xml.etree.ElementTree as ET
roi_rec = dict()
roi_rec['image'] = self.image_path_from_index(index)
size = cv2.imread(roi_rec['image']).shape
roi_rec['height'] = size[0]
roi_rec['width'] = size[1]
filename = os.path.join(self.data_path, 'Annotations', index + '.xml')
tree = ET.parse(filename)
objs = tree.findall('object')
if not self.config['use_diff']:
non_diff_objs = [obj for obj in objs if int(obj.find('difficult').text) == 0]
objs = non_diff_objs
num_objs = len(objs)
boxes = np.zeros((num_objs, 4), dtype=np.uint16)
gt_classes = np.zeros((num_objs), dtype=np.int32)
overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
class_to_index = dict(zip(self.classes, range(self.num_classes)))
# Load object bounding boxes into a data frame.
for ix, obj in enumerate(objs):
bbox = obj.find('bndbox')
# Make pixel indexes 0-based
x1 = float(bbox.find('xmin').text) - 1
y1 = float(bbox.find('ymin').text) - 1
x2 = float(bbox.find('xmax').text) - 1
y2 = float(bbox.find('ymax').text) - 1
cls = class_to_index[obj.find('name').text.lower().strip()]
boxes[ix, :] = [x1, y1, x2, y2]
gt_classes[ix] = cls
overlaps[ix, cls] = 1.0
roi_rec.update({'boxes': boxes,
'gt_classes': gt_classes,
'gt_overlaps': overlaps,
'max_classes': overlaps.argmax(axis=1),
'max_overlaps': overlaps.max(axis=1),
'flipped': False})
return roi_rec
def load_selective_search_roidb(self, gt_roidb):
turn selective search proposals into selective search roidb
:param gt_roidb: [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped']
:return: roidb: [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped']
matfile = os.path.join(self.root_path, 'selective_search_data', + '.mat')
assert os.path.exists(matfile), 'selective search data does not exist: {}'.format(matfile)
raw_data =['boxes'].ravel() # original was dict ['images', 'boxes']
box_list = []
for i in range(raw_data.shape[0]):
boxes = raw_data[i][:, (1, 0, 3, 2)] - 1 # pascal voc dataset starts from 1.
keep = unique_boxes(boxes)
boxes = boxes[keep, :]
keep = filter_small_boxes(boxes, self.config['min_size'])
boxes = boxes[keep, :]
return self.create_roidb_from_box_list(box_list, gt_roidb)
def selective_search_roidb(self, gt_roidb, append_gt=False):
get selective search roidb and ground truth roidb
:param gt_roidb: ground truth roidb
:param append_gt: append ground truth
:return: roidb of selective search
cache_file = os.path.join(self.cache_path, + '_ss_roidb.pkl')
if os.path.exists(cache_file):
with open(cache_file, 'rb') as fid:
roidb = cPickle.load(fid)'%s ss roidb loaded from %s' % (, cache_file))
return roidb
if append_gt:'%s appending ground truth annotations' %
ss_roidb = self.load_selective_search_roidb(gt_roidb)
roidb = IMDB.merge_roidbs(gt_roidb, ss_roidb)
roidb = self.load_selective_search_roidb(gt_roidb)
with open(cache_file, 'wb') as fid:
cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL)'%s wrote ss roidb to %s' % (, cache_file))
return roidb
def evaluate_detections(self, detections):
top level evaluations
:param detections: result matrix, [bbox, confidence]
:return: None
# make all these folders for results
result_dir = os.path.join(self.devkit_path, 'results')
if not os.path.exists(result_dir):
year_folder = os.path.join(self.devkit_path, 'results', 'VOC' + self.year)
if not os.path.exists(year_folder):
res_file_folder = os.path.join(self.devkit_path, 'results', 'VOC' + self.year, 'Main')
if not os.path.exists(res_file_folder):
def get_result_file_template(self):
this is a template
:return: a string template
res_file_folder = os.path.join(self.devkit_path, 'results', 'VOC' + self.year, 'Main')
comp_id = self.config['comp_id']
filename = comp_id + '_det_' + self.image_set + '_{:s}.txt'
path = os.path.join(res_file_folder, filename)
return path
def write_pascal_results(self, all_boxes):
write results files in pascal devkit path
:param all_boxes: boxes to be processed [bbox, confidence]
:return: None
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__':
continue'Writing %s VOC results file' % cls)
filename = self.get_result_file_template().format(cls)
with open(filename, 'wt') as f:
for im_ind, index in enumerate(self.image_set_index):
dets = all_boxes[cls_ind][im_ind]
if len(dets) == 0:
# the VOCdevkit expects 1-based indices
for k in range(dets.shape[0]):
f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
format(index, dets[k, -1],
dets[k, 0] + 1, dets[k, 1] + 1, dets[k, 2] + 1, dets[k, 3] + 1))
def do_python_eval(self):
python evaluation wrapper
:return: None
annopath = os.path.join(self.data_path, 'Annotations', '{0!s}.xml')
imageset_file = os.path.join(self.data_path, 'ImageSets', 'Main', self.image_set + '.txt')
annocache = os.path.join(self.cache_path, + '_annotations.pkl')
aps = []
# The PASCAL VOC metric changed in 2010
use_07_metric = True if int(self.year) < 2010 else False'VOC07 metric? ' + ('Y' if use_07_metric else 'No'))
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__':
filename = self.get_result_file_template().format(cls)
rec, prec, ap = voc_eval(filename, annopath, imageset_file, cls, annocache,
ovthresh=0.5, use_07_metric=use_07_metric)
aps += [ap]'AP for {} = {:.4f}'.format(cls, ap))'Mean AP = {:.4f}'.format(np.mean(aps)))