Source code for perceptron.utils.adversarial.classification

# Copyright 2019 Baidu Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Provides a class that represents an adversarial example
for image classfication tasks.
"""

import numpy as np
import numbers
from .base import Adversarial
from .base import StopAttack
from perceptron.utils.distances import MSE
from perceptron.utils.distances import Distance


[docs]class ClsAdversarial(Adversarial): """Defines an adversarial that should be found and stores the result.""" def __init__( self, model, criterion, original_image, original_pred, threshold=None, distance=MSE, verbose=False): super(ClsAdversarial, self).__init__( model, criterion, original_image, original_pred, threshold, distance, verbose) self._task = 'cls'
[docs] def model_task(self): """Interface to model.model_task for attacks.""" return self._task
[docs] def gradient(self, image=None, label=None, strict=True): """Interface to model.gradient for attacks. Parameters ---------- image : `numpy.ndarray` Image with shape (height, width, channels). Defaults to the original image. label : int Label used to calculate the loss that is differentiated. Deefaults to the original label strict : bool Controls if the bounds for the pixel values should be checked. """ assert self.has_gradient() if image is None: image = self._original_image if label is None: label = self._original_pred assert not strict or self.in_bounds(image) self._total_gradient_calls += 1 gradient = self._model.gradient(image, label) assert gradient.shape == image.shape return gradient
[docs] def predictions_and_gradient( self, image=None, label=None, strict=True, return_details=False): """Interface to model.predictions_and_gradient for attacks. Parameters ---------- image : `numpy.ndarray` Image with shape (height, width, channels). Defaults to the original image. label : int Label used to calculate the loss that is differentiated. Defaults to the original label. strict : bool Controls if the bounds for the pixel values should be checked. """ assert self.has_gradient() if image is None: image = self._original_image if label is None: label = self._original_pred assert not strict or self.in_bounds(image) in_bounds = self.in_bounds(image) assert not strict or in_bounds self._total_prediction_calls += 1 self._total_gradient_calls += 1 predictions, gradient = \ self._model.predictions_and_gradient(image, label) is_adversarial, is_best, distance = self._is_adversarial( image, predictions, in_bounds) assert predictions.ndim == 1 assert gradient.shape == image.shape if return_details: return predictions, gradient, is_adversarial, is_best, distance else: return predictions, gradient, is_adversarial
[docs] def backward(self, gradient, image=None, strict=True): """Interface for model.backward for attacks.""" assert self.has_gradient() assert gradient.ndim == 1 if image is None: image = self._original_image assert not strict or self.in_bounds(image) self._total_gradient_calls += 1 gradient = self._model.backward(gradient, image) assert gradient.shape == image.shape return gradient