Source code for perceptron.utils.criteria.detection

# Copyright 2019 Baidu Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Provide base classes that define what is an adversarial
for object detection models.
"""

import sys
import math
from perceptron.utils.func import softmax
from .base import Criterion
import numpy as np


[docs]class TargetClassMiss(Criterion): """ Defines adversarials as images for which the target class is not in the detection result. """
[docs] def __init__(self, target_class): super(TargetClassMiss, self).__init__() self._target_class = target_class
[docs] def target_class(self): """Return target class.""" return self._target_class
[docs] def name(self): """Return ctiterion name.""" return 'TargetClassMiss'
[docs] def is_adversarial(self, predictions, annotation): """Decides if predictions for an image are adversarial.""" return self._target_class not in predictions['classes']
class RegionalTargetClassMiss(Criterion): """Defines adversarials as images for which the target class in target region is not in the detection result. """ def __init__(self, target_class, target_region): super(RegionalTargetClassMiss, self).__init__() self._target_class = target_class self._target_retion = np.array(target_region).astype(int) def target_class(self): """Return target class.""" return self._target_class def target_region(self): """Return target region.""" return self._target_retion def name(self): """Return ctiterion name.""" return 'RegionalTargetClassMiss' def is_adversarial(self, predictions, annotation): """Decides if predictions for an image are adversarial.""" bbox_list = predictions['boxes'] class_list = predictions['classes'] for bbox_pred, cls_pred in zip(bbox_list, class_list): iou = self._get_IoU(bbox_pred, self._target_retion) if iou > 0 and cls_pred == self._target_class: return False return True @staticmethod def _get_IoU(bbox1, bbox2): bi = [max(bbox1[0], bbox2[0]), max(bbox1[1], bbox2[1]), min(bbox1[2], bbox2[2]), min(bbox1[3], bbox2[3])] ih = bi[2] - bi[0] + 1 iw = bi[3] - bi[1] + 1 if iw > 0 and ih > 0: # compute overlap (IoU) = area of intersection / area of union ua = (bbox1[2] - bbox1[0] + 1) * (bbox1[3] - bbox1[1] + 1) +\ (bbox2[2] - bbox2[0] + 1) * (bbox2[3] - bbox2[1] + 1) - iw * ih ov = iw * ih / ua return ov else: return 0.0 class TargetClassMissGoogle(Criterion): """Defines adversarials as images for which the target class is not in the Google object detection result. """ def __init__(self, target_class): super(TargetClassMissGoogle, self).__init__() self._target_class = target_class def target_class(self): """Return target class.""" return self._target_class def name(self): """Return ctiterion name.""" return '{}-{}'.format( self.__class__.__name__, self.target_class()) def is_adversarial(self, predictions): """Decides if predictions for an image are adversarial.""" assert isinstance(predictions, list), 'Predictions should be list.' for pred in predictions: if pred['name'].lower() == self._target_class.lower(): return False return True class WeightedAP(Criterion): """Defines adversarials as weighted AP value larger than given threshold. """ _defaults = { "alpha": 0.001, "lambda_tp_area": 0, "lambda_tp_dis": 0, "lambda_tp_cs": 0, "lambda_tp_cls": 1, "lambda_fp_area": 0.1, "lambda_fp_cs": 0, 'lambda_fn_area': 0.1, 'lambda_fn_cs': 0, 'a_set': [1, 1, 1, 0.1], 'MINOVERLAP': 0.5, } @classmethod def get_defaults(cls, n): """Return default value of n. Parameters ---------- n : str Key of the defalut dictionary. """ if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" def __init__(self, Height, Width, distance_th, print_f=False): self.__dict__.update(self._defaults) # set up default values self.Height = float(Height) self.Width = float(Width) self.th_is_adv = distance_th self.print_f = print_f self.a_tp = self.a_set[0] self.a_fp = self.a_set[1] self.a_fn = self.a_set[2] self.a_er = self.a_set[3] def name(self): """Return ctiterion name.""" return 'WeightedAP' def is_adversarial(self, predictions, annotation): """Decides if predictions for an image are adversarial.""" return self.distance_score(annotation, predictions) > self.th_is_adv def _get_bb_area(self, bbox): return (bbox[2] - bbox[0] + 1) * (bbox[3] - bbox[1] + 1) def _get_IoU(self, obj_1, obj_2): bb = obj_1["bbox"] bbgt = obj_2["bbox"] bi = [max(bb[0], bbgt[0]), max(bb[1], bbgt[1]), min(bb[2], bbgt[2]), min(bb[3], bbgt[3])] iw = bi[2] - bi[0] + 1 ih = bi[3] - bi[1] + 1 if iw > 0 and ih > 0: # compute overlap (IoU) = area of intersection / area of union ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) +\ (bbgt[2] - bbgt[0] + 1) * (bbgt[3] - bbgt[1] + 1) - iw * ih ov = iw * ih / ua return ov else: return 0.0 def _find_by_idx(self, idx, source_dic_list): for temp_obj in source_dic_list: if temp_obj['index'] == idx: return temp_obj return {} def _get_largest_bb_area(self, obj_list): temp_max = 1 for temp_obj in obj_list: bb = temp_obj['bbox'] bb_area = self._get_bb_area(bb) if bb_area > temp_max: temp_max = bb_area return temp_max def _get_total_bb_area(self, obj_list): total_area = 1 for temp_obj in obj_list: bb = temp_obj['bbox'] bb_area = self._get_bb_area(bb) total_area += bb_area return total_area def _get_largest_bb_edge(self, obj_list): temp_max = -1 for temp_obj in obj_list: bb = temp_obj['bbox'] if abs(bb[2] - bb[0]) > temp_max: temp_max = abs(bb[2] - bb[0]) if abs(bb[3] - bb[1]) > temp_max: temp_max = abs(bb[3] - bb[1]) return temp_max def _sort_by_conf(self, ori_list, source_dic_list): tup_list = [] if len(ori_list) <= 1: return ori_list for temp in ori_list: temp_obj = self._find_by_idx(temp, source_dic_list) if not temp_obj: raise ValueError('object cannot be found by index.') tup_list.append((temp_obj['index'], temp_obj['confident_score'])) tup_list.sort(key=lambda tup: tup[1]) return [x[0] for x in tup_list] def _sort_match_dic(self, ori_index_dic, source_dic_list): sorted_dic = {} for temp_key in ori_index_dic.keys(): temp_list = ori_index_dic[temp_key] if len(temp_list) <= 1: sorted_dic[temp_key] = temp_list else: sorted_dic[temp_key] = self._sort_by_conf( temp_list, source_dic_list) return sorted_dic def _get_fn_list(self, tp_match_dic, source_list): dst_list = [] for temp_source in source_list: flag_found = False for temp_idx_pair in tp_match_dic.keys(): if(temp_source['index'] in tp_match_dic[temp_idx_pair]): flag_found = True if not flag_found: dst_list.append(temp_source) return dst_list def _get_bb_distance(self, bb1, bb2): c1 = [0.5 * (bb1[2] + bb1[0]), 0.5 * (bb1[3] + bb1[1])] c2 = [0.5 * (bb2[2] + bb2[0]), 0.5 * (bb2[3] + bb2[1])] return math.sqrt((c1[0] - c2[0])**2 + (c1[1] - c2[1])**2) def distance_score(self, gt_dic, pd_dic): """Compute metric distance between given two detection results. Parameters ---------- gt_dic : dict The ground truth annotation which contains: scores, boxes and classes. pd_dic : dict The target output form detector which contains: scores, boxes and classes. """ gt_list = self._dic2list(gt_dic) pd_list = self._dic2list(pd_dic) return self._compute_score(gt_list, pd_list) def _dic2list(self, dic): res_list = [] for idx, key in enumerate(dic.keys()): if idx == 0: for sub_idx in range(len(dic[key])): res_list.append({'index': sub_idx}) if key == 'scores': temp_name = 'confident_score' elif key == 'boxes': temp_name = 'bbox' elif key == 'classes': temp_name = 'class_name' else: raise ValueError('Invalid key.') for sub_idx, temp_obj in enumerate(dic[key]): if temp_name is 'bbox': temp_obj = [ temp_obj[1], temp_obj[0], temp_obj[3], temp_obj[2]] res_list[sub_idx][temp_name] = temp_obj return res_list def _compute_score(self, gt_obj_list, pd_obj_list): ''' Notes ----- compute metirc distance score for two results from object detection. input: pd_obj_list: object list of prediction gt_obj_list: object list of ground gruth obj = { 'class_name' : 'car' 'bbox' : '634 663 787 913' string of [left, up, right, down] splited by ' ' 'confident score' : 0.9918241 'index' : 0 } ''' tp_match_dic = {} # {pd_idx : [gt_idx1, gt_idx2...]} for pd_obj in pd_obj_list: tp_match_dic[pd_obj['index']] = [] for gt_obj in gt_obj_list: IoU = self._get_IoU(pd_obj, gt_obj) # and gt_obj['class_name'] == pd_obj['class_name']: if IoU >= self.MINOVERLAP: tp_match_dic[pd_obj['index']].append(gt_obj['index']) tp_match_dic = self._sort_match_dic(tp_match_dic, gt_obj_list) tp_pair = [] fp_pd = [] for temp_idx in tp_match_dic.keys(): if not tp_match_dic[temp_idx]: fp_pd.append(self._find_by_idx(temp_idx, pd_obj_list)) else: tp_pair.append( (self._find_by_idx( temp_idx, pd_obj_list), self._find_by_idx( tp_match_dic[temp_idx][0], gt_obj_list))) fn_gt = self._get_fn_list(tp_match_dic, gt_obj_list) self.largest_area_gt = self._get_largest_bb_area(gt_obj_list) self.largest_edge_gt = self._get_largest_bb_edge(gt_obj_list) self.total_area_gt = self._get_total_bb_area(gt_obj_list) self.total_area_pd = self._get_total_bb_area(pd_obj_list) cum_tp_penal = 0.0 for temp_tp_pair in tp_pair: distance, area_dif, cs_dif, class_dif = self._tp_panelize( temp_tp_pair) temp_tp_penal = self.lambda_tp_dis * distance + self.lambda_tp_area * area_dif \ + self.lambda_tp_cs * cs_dif + self.lambda_tp_cls * class_dif cum_tp_penal += temp_tp_penal if self.print_f: print('cum tp: ', cum_tp_penal) if len(tp_pair) > 1: cum_tp_penal /= len(tp_pair) cum_fp_penal = 0.0 for temp_fp_pd in fp_pd: area, cs = self._fp_fn_panelize(temp_fp_pd) drop_func_out = self._factor_func( self.total_area_pd / (self.Height * self.Width)) temp_fp_panel = self.lambda_fp_area * drop_func_out * \ area / self.total_area_pd + self.lambda_fp_cs * cs cum_fp_penal += temp_fp_panel if self.print_f: print('cum fp: ', cum_fp_penal) if len(fp_pd) > 1: cum_fp_penal /= len(fp_pd) cum_fn_penal = 0.0 for temp_fn_gt in fn_gt: area, cs = self._fp_fn_panelize(temp_fn_gt) drop_func_out = self._factor_func( self.total_area_gt / (self.Height * self.Width)) temp_fn_panel = self.lambda_fn_area * drop_func_out * \ area / self.total_area_gt + self.lambda_fn_cs * cs cum_fn_penal += temp_fn_panel if self.print_f: print('cum fn: ', cum_fn_penal) if len(fn_gt) > 1: cum_fn_penal /= len(fn_gt) if (len(tp_pair) + len(fp_pd) + len(fn_gt)) == 0: err_panel = 0 else: err_panel = float((len(fp_pd) + len(fn_gt))) / \ (len(tp_pair) + len(fp_pd) + len(fn_gt)) if self.print_f: print('tp: ', len(tp_pair), ' cum_tp_penal: ', cum_tp_penal) print('fp: ', len(fp_pd), ' cum_fp_penal: ', cum_fp_penal) print('fn: ', len(fn_gt), ' cum_fn_penal: ', cum_fn_penal) print( 'total num: ', len(tp_pair) + len(fp_pd) + len(fn_gt), ' err_panel: ', err_panel) score_final = (self.a_tp * cum_tp_penal + self.a_fp * cum_fp_penal + self.a_fn * cum_fn_penal + self.a_er * err_panel) \ / (self.a_tp + self.a_fp + self.a_fn + self.a_er) return score_final def _factor_func(self, x): x = float(x) if x != 0: return x / (x + self.alpha) return x def _tp_panelize(self, obj_pair): bb0 = obj_pair[0]['bbox'] bb1 = obj_pair[1]['bbox'] distance = self._get_bb_distance(bb0, bb1) area0 = self._get_bb_area(bb0) area1 = self._get_bb_area(bb1) area_dif = abs(area0 - area1) cs_dif = abs( float( obj_pair[0]['confident_score']) - float( obj_pair[1]['confident_score'])) class_dif = 0 if obj_pair[0]['class_name'] != obj_pair[1]['class_name']: class_dif = 1 return distance, area_dif, cs_dif, class_dif def _fp_fn_panelize(self, obj): bb = obj['bbox'] area = self._get_bb_area(bb) cs = float(obj['confident_score']) return area, cs