Source code for perceptron.utils.criteria.classification

# Copyright 2019 Baidu Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Provide base classes that define what is adversarial."""

import sys
from perceptron.utils.func import softmax
from .base import Criterion
import numpy as np


[docs]class Misclassification(Criterion):
    """Defines adversarials as images for which the predicted class
    is not the original class.
    """

[docs]    def name(self):
        """Return criterion name."""
        return 'Top1Misclassification'

[docs]    def is_adversarial(self, predictions, label):
        """Decides if predictions for an image are adversarial."""
        top1 = np.argmax(predictions)
        return top1 != label


[docs]class ConfidentMisclassification(Criterion):
    """ Defines adversarials as images for which the probability
    of any class other than the original is above a given threshold.
    """

    def __init__(self, threshold):
        super(ConfidentMisclassification, self).__init__()
        assert 0 <= threshold <= 1
        self.threshold = threshold

[docs]    def name(self):
        """Return criterion name."""
        return '{}-{:.04f}'.format(self.__class__.__name__, self.threshold)

[docs]    def is_adversarial(self, predictions, label):
        """Decides if predictions for an image are adversarial."""
        top1 = np.argmax(predictions)
        probabilities = softmax(predictions)
        return (np.max(probabilities) >= self.threshold) and (top1 != label)


[docs]class TopKMisclassification(Criterion):
    """Defines adversarials as images for which the original class is
    not one of the top k predicted classes.

    For k=1, the :class:`Misclassification` class provides a more efficient
    implementation.

    Parameters
    ----------
    k : int
        Number of top predictions to which the reference label is compared to.

    """

    def __init__(self, k):
        super(TopKMisclassification, self).__init__()
        self.k = k

[docs]    def name(self):
        """Return criterion name."""
        return 'Top{}Misclassification'.format(self.k)

[docs]    def is_adversarial(self, predictions, label):
        """Decides if predictions for an image are adversarial."""
        topk = np.argsort(predictions)[-self.k:]
        return label not in topk


[docs]class TargetClass(Criterion):
    """Defines adversarials as images for which the predicted class is the given
    target class.

    Parameters
    ----------
    target_class : int
        The target class that needs to be predicted for an image
        to be considered an adversarial.

    """

    def __init__(self, target_class):
        super(TargetClass, self).__init__()
        self._target_class = target_class

[docs]    def target_class(self):
        """Return target class."""
        return self._target_class

[docs]    def name(self):
        """Return criterion name."""
        return '{}-{}'.format(self.__class__.__name__, self.target_class())

[docs]    def is_adversarial(self, predictions, label):
        """Decides if predictions for an image are adversarial."""
        top1 = np.argmax(predictions)
        return top1 == self.target_class()


[docs]class OriginalClassProbability(Criterion):
    """Defines adversarials as images for which the probability of original
    class is below a given threshold.

    This criterion alone does not guarantee that the class predicted for the
    adversarial image is not original class (unless p < 1 / num of classes).
    Therefore, it should usually be combined with a classification criterion.

    Parameters
    ----------
    p : float
        The threshold probability. If the probability of the original class is
        below this threshold, the image is considered an adversarial. It must
        satisfy 0 <= p <=1.

    """

    def __init__(self, p):
        super(OriginalClassProbability, self).__init__()
        assert 0 <= p <= 1
        self.p = p

[docs]    def name(self):
        """Return criterion name."""
        return '{}-{:.04f}'.format(self.__class__.__name__, self.p)

[docs]    def is_adversarial(self, predictions, label):
        """Decides if predictions for an image are adversarial."""
        probabilities = softmax(predictions)
        return probabilities[label] < self.p


[docs]class TargetClassProbability(Criterion):
    """Defines adversarials as images for which the probability
    of a given target class is above a given threshold.

    If the threshold is below 0.5, this criterion does not guarantee
    that the class predicted for the adversarial image is not the
    original class. In that case, it should usually be combined with
    a classification criterion.

    Parameters
    ----------
    target_class : int
        The target class for which the predicted probability must
        be above the threshold probability p, otherwise the image
        is not considered an adversarial.
    p : float
        The threshold probability. If the probability of the
        target class is above this threshold, the image is
        considered an adversarial. It must satisfy 0 <= p <= 1.

    """

    def __init__(self, target_class, p):
        super(TargetClassProbability, self).__init__()
        self._target_class = target_class
        assert 0 <= p <= 1
        self.p = p

[docs]    def target_class(self):
        """Return target class """
        return self._target_class

[docs]    def name(self):
        """Return criterion name."""
        return '{}-{}-{:.04f}'.format(
            self.__class__.__name__, self.target_class(), self.p)

[docs]    def is_adversarial(self, predictions, label):
        """Decides if predictions for an image are adversarial."""
        probabilities = softmax(predictions)
        return probabilities[self.target_class()] > self.p


class MisclassificationAntiPorn(Criterion):
    """Defines adversarials as image for which the probability of being
    `normal` is larger than the probability of being `porn`.
    """

    def name(self):
        """Return criterion name."""
        return 'AntiPornMisclassification'

    def is_adversarial(self, predictions, label):
        _porn_prob = 0
        _normal_prob = 0 
        """Decides if predictions for an image are adversarial."""
        
        for cls in predictions:
            if cls['class_name'] == '色情':
                _porn_prob = cls['probability']
            if cls['class_name'] == '正常':
                _normal_prob = cls['probability']
        return _porn_prob < _normal_prob


class MisclassificationSafeSearch(Criterion):
    """Defines adversarials as image for which the probability of being
    `unsafe` is lower than a threshold.

    Parameters
    ----------
    target_class : str
        The target `unsafe` class the you want to evade. It must
        be in one of ['adult', 'medical', 'racy', 'spoofed', 'violence'].
    threshold : int
        The threshold probability. If the probability of the
        target class is **below** this threshold, the image is
        considered an adversarial. It must satisfy 0 <= threshold <= 5.

    """

    def __init__(self, target_class='adult', threshold=2):
        super(MisclassificationSafeSearch, self).__init__()
        assert target_class in \
            ['adult', 'medical', 'racy', 'spoofed', 'violence']
        self._target_class = target_class
        assert isinstance(threshold, int) and 0 <= threshold <= 5
        self.threshold = threshold
        self.likelihood_name = {
            'UNKNOWN': 0,
            'VERY_UNLIKEYLY': 1,
            'UNLIKELY': 2,
            'POSSIBLE': 3,
            'LIKELY': 4,
            'VERY_LIKELY': 5
        }

    def target_class(self):
        """Return target class """
        return self._target_class

    def name(self):
        """Return criterion name."""
        return '{}-{}-{}'.format(
            self.__class__.__name__, self.target_class(), self.threshold)

    def is_adversarial(self, predictions, label):
        """Decides if predictions for an image are adversarial."""
        assert isinstance(predictions, dict), 'Predictions should be dict'
        probability = predictions[self._target_class]
        return probability <= self.threshold