使用IBM ART库生成对抗样本示例——生成将数字3预测为8的对抗样本-526互联

代码如下：

import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.utils import to_categorical
from art.estimators.classification import TensorFlowV2Classifier
from art.attacks.evasion import FastGradientMethod
import matplotlib.pyplot as plt
import os

# 加载MNIST数据集
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = train_images.astype('float32') / 255
test_images = test_images.astype('float32') / 255
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)
# 创建一个简单的全连接神经网络模型
model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(128, activation='relu'),
    Dense(10, activation='softmax'),
])

# 编译和训练模型
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(train_images, train_labels, epochs=5)

# 创建ART分类器
classifier = TensorFlowV2Classifier(
    model=model,
    nb_classes=10,
    input_shape=(28, 28),
    loss_object=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
    clip_values=(0, 1)
)

# 选择一个目标标签（我们希望模型将数字3识别为数字8）
target_label = to_categorical(8, num_classes=10)
target_label = np.tile(target_label, (len(test_images), 1))

# 选择数字3的图像
images = test_images[test_labels.argmax(axis=1) == 3]
preds = model.predict(images)
print('Predicted before:', preds.argmax(axis=1))

# 创建FGSM实例
attack = FastGradientMethod(estimator=classifier, targeted=True)

# 生成对抗样本
adv_images = attack.generate(x=images, y=target_label)

# 使用模型对对抗样本进行预测
preds = model.predict(adv_images)
print('Predicted:', preds.argmax(axis=1))

# 创建保存图像的目录 原始的数字3的图像和对抗样本
if not os.path.exists('org'):
    os.makedirs('org')
if not os.path.exists('new'):
    os.makedirs('new')

for i in range(len(images)):
    plt.imsave(f'org/{i}.png', images[i], cmap='gray')
    plt.imsave(f'new/{i}.png', adv_images[i], cmap='gray')

输出：

....

1848/1875 [============================>.] - ETA: 0s - loss: 0.1875/1875 [==============================] - 3s 2ms/step - loss: 0.0449 - accuracy: 0.9863

32/32 [==============================] - 0s 983us/step

Predicted before: [3 3 3 ... 3 3 3]

C:\Users\l00379637\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\keras\src\backend.py:5577: UserWarning: "`categorical_crossentropy` received `from_logits=True`, but the `output` argument was produced by a Softmax activation and thus does not represent logits. Was this intended?

output, from_logits = _get_logits(

32/32 [==============================] - 0s 963us/step

Predicted: [8 8 8 ... 8 8 8]

我们看下两个目录中的数据情况：

原始数字3的样本：

对抗样本（预测为8的）：

如果只是生成扰动图片，则代码修改为：

# 其他代码保持不变...
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.utils import to_categorical
from art.estimators.classification import TensorFlowV2Classifier
from art.attacks.evasion import FastGradientMethod
import matplotlib.pyplot as plt
import os

# 加载MNIST数据集
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = train_images.astype('float32') / 255
test_images = test_images.astype('float32') / 255
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)
# 创建一个简单的全连接神经网络模型
model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(128, activation='relu'),
    Dense(10, activation='softmax'),
])

# 编译和训练模型
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(train_images, train_labels, epochs=5)

# 创建ART分类器
classifier = TensorFlowV2Classifier(
    model=model,
    nb_classes=10,
    input_shape=(28, 28),
    loss_object=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
    clip_values=(0, 1)
)

# 选择数字3的图像
images = test_images[test_labels.argmax(axis=1) == 3]
preds = model.predict(images)
print('Predicted before:', preds.argmax(axis=1))

# 创建一个目标标签（我们希望模型将数字3识别为数字8）
target_label = to_categorical(8, num_classes=10)
target_label = np.tile(target_label, (len(images), 1))

# 创建FGSM实例
attack = FastGradientMethod(estimator=classifier, targeted=True)

# 初始化对抗样本为原始图像
adv_images = np.copy(images)

for i in range(100):  # 最多迭代100次
    # 生成对抗样本的扰动
    perturbations = attack.generate(x=adv_images, y=target_label) - adv_images

    # 计算所有样本的平均扰动
    avg_perturbation = np.mean(perturbations, axis=0)

    # 将平均扰动添加到所有对抗样本上
    adv_images += avg_perturbation

    # 使用模型对对抗样本进行预测
    preds = model.predict(adv_images)
    print('Iteration:', i, 'Predicted:', preds.argmax(axis=1))

    # 如果所有的预测结果都为8，那么停止迭代
    if np.all(preds.argmax(axis=1) == 8):
        break

# 保存对抗样本
for i in range(len(adv_images)):
    plt.imsave(f'new/adv_image_{i}.png', adv_images[i], cmap='gray')

# 归一化平均扰动并保存为图像
avg_perturbation = (avg_perturbation - np.min(avg_perturbation)) / (np.max(avg_perturbation) - np.min(avg_perturbation))
plt.imsave('avg_perturbation.png', avg_perturbation, cmap='gray')

输出结果：

Predicted before: [3 3 3 ... 3 3 3]

output, from_logits = _get_logits(

32/32 [==============================] - 0s 937us/step

Iteration: 0 Predicted: [8 8 8 ... 8 8 8]

32/32 [==============================] - 0s 915us/step

Iteration: 1 Predicted: [8 8 8 ... 8 8 8]

32/32 [==============================] - 0s 777us/step

Iteration: 2 Predicted: [8 8 8 ... 8 8 8]

然后可以看到扰动图片：

代码说明：

这段代码的主要目标是生成对抗样本，使得一个训练好的神经网络模型将MNIST数据集中的数字3错误地识别为数字8。

以下是代码的主要步骤：

1. 加载和预处理数据：代码首先加载MNIST数据集，并将图像数据归一化到0, 1]范围。然后，它选择了所有的数字3的图像，并创建了一个目标标签，这是我们希望模型将数字3识别为的数字。

2. 创建和训练模型：代码创建了一个简单的全连接神经网络模型，并使用训练数据对它进行训练。

3. 创建ART分类器：代码创建了一个ART分类器，这个分类器是对原始模型的一个包装，它提供了一些额外的功能，例如生成对抗样本。

4. 生成对抗样本：代码创建了一个FGSM实例，并使用它在每次迭代中生成对抗样本的扰动。然后，它计算所有样本的平均扰动，并将这个平均扰动添加到所有对抗样本上。这个过程重复100次，或者直到所有的预测结果都为8。

5. 保存对抗样本和平均扰动：代码将每个对抗样本和平均扰动保存为图像。

这段代码的主要思想是，通过添加一些微小的扰动到原始图像上，可以使得神经网络模型的预测结果发生改变。这种方法可以用来测试模型的鲁棒性，或者用来攻击模型。

ART库地址：https://github.com/Trusted-AI/adversarial-robustness-toolbox/

【引申】

如果要生成针对交通信号牌的对抗样本，

（1）首先要找一个CNN，能够很好识别指示牌：

例如：

https://www.kaggle.com/code/shivank856/gtsrb-cnn-98-test-accuracy

https://cloud.tencent.com/developer/article/2201792

两个的精确率都还不错。

当然，他们都是基于开源的交通信号指示牌数据做的模型：Traffic sign recognition system based on Tensorflow convolutional neural network (CNN) (GTSRB dataset)

（2）下载数据集，也就是比赛中的源样本和目标样本图片，送入上述网络，看看识别结果，是否正常识别。

（3）将该网络导入，然后用上面的步骤生成对抗样本。