项目结构#

model.py: 定义了用于验证码识别的CNN模型。
setting.py: 包含了与数据集、模型参数相关的配置信息。
train.py: 实现了模型的训练逻辑，包括数据预处理、训练循环、验证以及早停机制等。

核心代码#

模型定义 (`model.py`)#

1
import torch.nn as nn
2
from setting import IMAGE_WIDTH, IMAGE_HEIGHT, MAX_CAPTCHA, ALL_CHAR_SET_LEN
3

4
class CNN(nn.Module):
5
    def __init__(self):
6
        super(CNN, self).__init__()
7
        # 定义卷积层和全连接层
8
        # ...
9

10
    def forward(self, x):
11
        # 前向传播过程
12
        # ...

设置文件 (`setting.py`)#

1
IMAGE_HEIGHT = 40
2
IMAGE_WIDTH = 140
3
ALL_CHAR_SET = ['0'-'9', 'A'-'Z', 'a'-'z']
4
ALL_CHAR_SET_LEN = len(ALL_CHAR_SET)
5
MAX_CAPTCHA = 5
6
# 其他训练参数如学习率、批次大小等

训练脚本 (`train.py`)#

1. 数据集类

1
class CaptchaDataset(Dataset):
2
    # 初始化函数、__len__ 和 __getitem__ 方法
3
    # 主要功能：加载图片、转换为灰度图、从文件名提取标签、数据增强等
4
    # ...

2. 训练模型

1
def train_model():
2
    # 检查数据集完整性、选择设备（GPU/CPU）
3
    # 创建数据加载器、模型实例化、损失函数及优化器设置
4
    # 训练循环：前向传播、计算损失、反向传播、更新权重
5
    # 验证模型性能、早停机制、保存最佳模型
6
    # ...

3. 验证模型

1
def validate_model(model, test_loader, device, criterion):
2
    # 在验证集上评估模型性能
3
    # 返回准确率和平均损失值
4
    # ...

4. 绘制训练曲线

1
def plot_training_curve(train_losses, train_accuracies, val_losses, val_accuracies, learning_rates):
2
    # 绘制训练和验证的损失与准确率变化曲线
3
    # 同时绘制学习率的变化情况
4
    # ...

总结
该训练过程首先通过定义一个适合验证码识别任务的CNN模型开始。接着，通过CaptchaDataset类对数据进行预处理，并在训练过程中使用了早停机制来避免过拟合。最后，通过可视化工具展示了模型在训练过程中的表现，包括损失、准确率以及学习率的变化趋势。这个结构化的流程可以作为解决类似问题的一个模板，方便后续的调整和扩展。

完整代码#

`model.py`#

1
#! /usr/bin/env python
2
# -*- coding: utf-8 -*-
3
# @Time    : 2025/11/19 20:04
4
# @Author  : afish
5
# @File    : model.py
6
import torch.nn as nn
7

8
import setting
9

10

11
class CNN(nn.Module):
12
    def __init__(self):
13
        super(CNN, self).__init__()
14
        self.layer1 = nn.Sequential(
15
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
16
            nn.BatchNorm2d(32),
17
            nn.Dropout(0.5),
18
            nn.ReLU(),
19
            nn.MaxPool2d(2),
20
        )
21
        self.layer2 = nn.Sequential(
22
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
23
            nn.BatchNorm2d(64),
24
            nn.Dropout(0.5),
25
            nn.ReLU(),
26
            nn.MaxPool2d(2),
27
        )
28
        self.layer3 = nn.Sequential(
29
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
30
            nn.BatchNorm2d(64),
31
            nn.Dropout(0.5),
32
            nn.ReLU(),
33
            nn.MaxPool2d(2),
34
        )
35
        self.fc = nn.Sequential(
36
            nn.Linear(
37
                (setting.IMAGE_WIDTH // 8) * (setting.IMAGE_HEIGHT // 8) * 64, 1024
38
            ),
39
            nn.Dropout(0.5),
40
            nn.ReLU(),
41
        )
42
        self.rfc = nn.Sequential(
43
            nn.Linear(1024, setting.MAX_CAPTCHA * setting.ALL_CHAR_SET_LEN),
44
        )
45

46
    def forward(self, x):
47
        out = self.layer1(x)
48
        out = self.layer2(out)
49
        out = self.layer3(out)
50
        out = out.view(out.size(0), -1)
51
        out = self.fc(out)
52
        out = self.rfc(out)
53
        return out

`setting.py`#

1
#! /usr/bin/env python
2
# -*- coding: utf-8 -*-
3
# @Time    : 2025/11/19 20:14
4
# @Author  : afish
5
# @File    : setting.py
6
# 图片尺寸设置
7
IMAGE_HEIGHT = 40
8
IMAGE_WIDTH = 140
9

10
# 字符集设置（数字 + 小写字母）
11
ALL_CHAR_SET = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
12
                'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
13
                'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
14
                'U', 'V', 'W', 'X', 'Y', 'Z',
15
                'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
16
                'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
17
                'u', 'v', 'w', 'x', 'y', 'z'
18
                ]
19

20

21
ALL_CHAR_SET_LEN = len(ALL_CHAR_SET)
22

23
# 验证码长度
24
MAX_CAPTCHA = 5
25

26
# 训练参数
27
BATCH_SIZE = 256 * 2
28
EPOCHS = 50
29
LEARNING_RATE = 0.001
30

31
# 文件路径
32
TRAIN_DATASET_PATH = "data/train"
33
TEST_DATASET_PATH = "data/test"
34
MODEL_SAVE_PATH = "model/captcha_model.pth"

`train.py`#

1
#! /usr/bin/env python
2
# -*- coding: utf-8 -*-
3
# @Time    : 2025/11/19 20:04
4
# @Author  : afish
5
# @File    : train.py
6

7
import os
8
import torch
9
import torch.nn as nn
10
import torch.optim as optim
11
from torch.utils.data import DataLoader, Dataset
12
from PIL import Image, ImageFile
13
import numpy as np
14
from model import CNN
15
import setting
16
from torchvision import transforms
17
import matplotlib.pyplot as plt
18
import random
19
import time
20
import math
21

22
# 允许加载截断的图片文件
23
ImageFile.LOAD_TRUNCATED_IMAGES = True
24

25

26
class EarlyStopping:
27
    """早停机制"""
28

29
    def __init__(self, patience=7, verbose=True, delta=0, path='checkpoint.pt'):
30
        """
31
        Args:
32
            patience (int): 验证集性能不再提升的epoch数，之后停止训练
33
            verbose (bool): 是否打印早停信息
34
            delta (float): 认为有提升的最小变化量
35
            path (str): 模型保存路径
36
        """
37
        self.patience = patience
38
        self.verbose = verbose
39
        self.counter = 0
40
        self.best_score = None
41
        self.early_stop = False
42
        self.val_loss_min = float('inf')
43
        self.delta = delta
44
        self.path = path
45

46
    def __call__(self, val_loss, model):
47
        score = -val_loss
48

49
        if self.best_score is None:
50
            self.best_score = score
51
            self.save_checkpoint(val_loss, model)
52
        elif score < self.best_score + self.delta:
53
            self.counter += 1
54
            if self.verbose:
55
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
56
            if self.counter >= self.patience:
57
                self.early_stop = True
58
        else:
59
            self.best_score = score
60
            self.save_checkpoint(val_loss, model)
61
            self.counter = 0
62

63
    def save_checkpoint(self, val_loss, model):
64
        """保存模型检查点"""
65
        if self.verbose:
66
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model...')
67
        torch.save(model.state_dict(), self.path)
68
        self.val_loss_min = val_loss
69

70

71
class CaptchaDataset(Dataset):
72
    def __init__(self, data_path, transform=None):
73
        self.data_path = data_path
74
        self.transform = transform
75
        self.image_files = [f for f in os.listdir(data_path) if f.endswith(('.jpg', '.jpeg', '.png'))]
76

77
        # 预检查图片文件，记录损坏的文件
78
        self.valid_files = []
79
        self.corrupted_files = []
80

81
        print("检查图片文件完整性...")
82
        for img_name in self.image_files:
83
            img_path = os.path.join(self.data_path, img_name)
84
            try:
85
                # 尝试打开图片检查是否损坏
86
                with Image.open(img_path) as img:
87
                    img.verify()  # 验证图片完整性
88
                self.valid_files.append(img_name)
89
            except (IOError, SyntaxError, OSError) as e:
90
                print(f"损坏图片: {img_name} - 错误: {e}")
91
                self.corrupted_files.append(img_name)
92

93
        print(f"有效图片: {len(self.valid_files)}, 损坏图片: {len(self.corrupted_files)}")
94

95
        # 如果损坏图片太多，可以选择删除它们
96
        if len(self.corrupted_files) > 0:
97
            response = input(f"发现 {len(self.corrupted_files)} 个损坏图片，是否删除? (y/n): ")
98
            if response.lower() == 'y':
99
                for corrupted_file in self.corrupted_files:
100
                    os.remove(os.path.join(self.data_path, corrupted_file))
101
                print("已删除损坏图片")
102
                # 重新获取有效文件列表
103
                self.valid_files = [f for f in os.listdir(data_path) if f.endswith(('.jpg', '.jpeg', '.png'))]
104
            else:
105
                print("将跳过损坏图片")
106

107
    def __len__(self):
108
        return len(self.valid_files)
109

110
    def __getitem__(self, idx):
111
        img_name = self.valid_files[idx]
112
        img_path = os.path.join(self.data_path, img_name)
113

114
        # 安全地读取图片
115
        try:
116
            image = Image.open(img_path).convert('L')  # 转为灰度图
117
        except (IOError, SyntaxError, OSError) as e:
118
            # 如果图片读取失败，使用黑色图片替代
119
            print(f"读取图片失败: {img_name}, 使用替代图片")
120
            image = Image.new('L', (setting.IMAGE_WIDTH, setting.IMAGE_HEIGHT), 0)  # 黑色图片
121

122
        # 从文件名中提取标签（格式：1005~2A2G2.jpg）
123
        label_str = img_name.split('~')[-1].split('.')[0]
124

125
        # 验证标签长度
126
        if len(label_str) != setting.MAX_CAPTCHA:
127
            print(f"警告: 标签长度不匹配: {label_str} (期望长度: {setting.MAX_CAPTCHA})")
128

129
        # 将标签转换为向量形式
130
        label = self.text2vec(label_str)
131

132
        # 图片转换
133
        if self.transform:
134
            try:
135
                image = self.transform(image)
136
            except Exception as e:
137
                print(f"图片转换失败: {img_name}, 错误: {e}")
138
                # 创建替代图片
139
                image = torch.zeros(1, setting.IMAGE_HEIGHT, setting.IMAGE_WIDTH)
140
        else:
141
            # 默认转换：调整尺寸 -> 转为numpy -> 归一化 -> tensor
142
            try:
143
                image = image.resize((setting.IMAGE_WIDTH, setting.IMAGE_HEIGHT))
144
                image = np.array(image)
145
                image = torch.FloatTensor(image) / 255.0
146
                image = image.unsqueeze(0)  # 增加通道维度
147
            except Exception as e:
148
                print(f"图片处理失败: {img_name}, 错误: {e}")
149
                # 创建替代图片
150
                image = torch.zeros(1, setting.IMAGE_HEIGHT, setting.IMAGE_WIDTH)
151

152
        return image, label
153

154
    def text2vec(self, text):
155
        """将文本标签转换为向量"""
156
        vector = torch.zeros(setting.MAX_CAPTCHA, setting.ALL_CHAR_SET_LEN)
157
        for i, char in enumerate(text):
158
            if i >= setting.MAX_CAPTCHA:
159
                break
160
            try:
161
                idx = setting.ALL_CHAR_SET.index(char)
162
                vector[i][idx] = 1
163
            except ValueError:
164
                print(f"错误: 字符 '{char}' 不在字符集中")
165
                # 随机分配一个位置，避免训练中断
166
                idx = random.randint(0, setting.ALL_CHAR_SET_LEN - 1)
167
                vector[i][idx] = 1
168
        return vector.view(-1)
169

170

171
def vec2text(vec):
172
    """将向量转换回文本"""
173
    vec = vec.view(setting.MAX_CAPTCHA, -1)
174
    text = ''
175
    for i in range(setting.MAX_CAPTCHA):
176
        idx = torch.argmax(vec[i]).item()
177
        text += setting.ALL_CHAR_SET[idx]
178
    return text
179

180

181
def check_dataset():
182
    """检查数据集是否存在"""
183
    if not os.path.exists(setting.TRAIN_DATASET_PATH) or len(os.listdir(setting.TRAIN_DATASET_PATH)) == 0:
184
        print("训练集不存在或为空!")
185
        print("请先运行 split_dataset.py 来分割数据集")
186
        return False
187
    return True
188

189

190
def train_model():
191
    # 检查数据集
192
    if not check_dataset():
193
        return
194

195
    # 检查设备
196
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
197
    print(f'Using device: {device}')
198

199
    # 创建数据转换
200
    transform = transforms.Compose([
201
        transforms.Resize((setting.IMAGE_HEIGHT, setting.IMAGE_WIDTH)),
202
        transforms.ToTensor(),
203
        transforms.Normalize(mean=[0.5], std=[0.5])
204
    ])
205

206
    # 创建数据集和数据加载器
207
    train_dataset = CaptchaDataset(setting.TRAIN_DATASET_PATH, transform=transform)
208
    train_loader = DataLoader(train_dataset, batch_size=setting.BATCH_SIZE, shuffle=True)
209

210
    print(f"训练集大小: {len(train_dataset)}")
211

212
    # 检查测试集
213
    if os.path.exists(setting.TEST_DATASET_PATH) and len(os.listdir(setting.TEST_DATASET_PATH)) > 0:
214
        test_dataset = CaptchaDataset(setting.TEST_DATASET_PATH, transform=transform)
215
        test_loader = DataLoader(test_dataset, batch_size=setting.BATCH_SIZE, shuffle=False)
216
        print(f"测试集大小: {len(test_dataset)}")
217
    else:
218
        test_loader = None
219
        print("未找到测试集，将只使用训练集")
220

221
    # 创建模型
222
    model = CNN().to(device)
223
    print("模型结构:")
224
    print(model)
225

226
    # 定义损失函数和优化器 - 使用您原来的损失函数
227
    criterion = nn.MultiLabelSoftMarginLoss()
228
    optimizer = optim.Adam(model.parameters(), lr=setting.LEARNING_RATE)
229

230
    # 学习率调度器
231
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)
232

233
    # 早停机制
234
    early_stopping = EarlyStopping(
235
        patience=15,
236
        verbose=True,
237
        delta=0.001,
238
        path=setting.MODEL_SAVE_PATH.replace('.pth', '_best.pth')
239
    )
240

241
    # 训练历史记录
242
    train_losses = []
243
    train_accuracies = []
244
    val_losses = []
245
    val_accuracies = []
246
    learning_rates = []
247

248
    # 创建模型保存目录
249
    os.makedirs(os.path.dirname(setting.MODEL_SAVE_PATH), exist_ok=True)
250

251
    best_accuracy = 0.0
252
    start_time = time.time()
253

254
    print("开始训练...")
255
    for epoch in range(setting.EPOCHS):
256
        model.train()
257
        running_loss = 0.0
258
        correct = 0
259
        total = 0
260
        epoch_start_time = time.time()
261

262
        for batch_idx, (images, labels) in enumerate(train_loader):
263
            images = images.to(device)
264
            labels = labels.to(device)
265

266
            # 前向传播
267
            outputs = model(images)
268
            loss = criterion(outputs, labels)
269

270
            # 反向传播
271
            optimizer.zero_grad()
272
            loss.backward()
273

274
            # 梯度裁剪
275
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
276

277
            optimizer.step()
278

279
            running_loss += loss.item()
280

281
            # 计算准确率
282
            predicted = outputs.view(-1, setting.MAX_CAPTCHA, setting.ALL_CHAR_SET_LEN)
283
            labels_reshaped = labels.view(-1, setting.MAX_CAPTCHA, setting.ALL_CHAR_SET_LEN)
284

285
            _, predicted_chars = torch.max(predicted, 2)
286
            _, label_chars = torch.max(labels_reshaped, 2)
287

288
            correct += (predicted_chars == label_chars).all(dim=1).sum().item()
289
            total += labels.size(0)
290

291
            if batch_idx % 10 == 0:
292
                accuracy = 100 * correct / total if total > 0 else 0
293
                current_lr = optimizer.param_groups[0]['lr']
294
                print(f'Epoch [{epoch + 1}/{setting.EPOCHS}], Batch [{batch_idx}/{len(train_loader)}], '
295
                      f'Loss: {loss.item():.4f}, Acc: {accuracy:.2f}%, LR: {current_lr:.2e}')
296

297
        # 更新学习率
298
        scheduler.step()
299
        current_lr = optimizer.param_groups[0]['lr']
300
        learning_rates.append(current_lr)
301

302
        # 计算训练准确率
303
        train_accuracy = 100 * correct / total if total > 0 else 0
304
        epoch_loss = running_loss / len(train_loader) if len(train_loader) > 0 else 0
305

306
        train_losses.append(epoch_loss)
307
        train_accuracies.append(train_accuracy)
308

309
        epoch_time = time.time() - epoch_start_time
310
        print(f'Epoch [{epoch + 1}/{setting.EPOCHS}], Time: {epoch_time:.2f}s, '
311
              f'Loss: {epoch_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, LR: {current_lr:.2e}')
312

313
        # 验证
314
        if test_loader:
315
            val_accuracy, val_loss = validate_model(model, test_loader, device, criterion)
316
            val_accuracies.append(val_accuracy)
317
            val_losses.append(val_loss)
318

319
            print(f'Validation Accuracy: {val_accuracy:.2f}%, Validation Loss: {val_loss:.4f}')
320

321
            # 早停检查
322
            early_stopping(val_loss, model)
323
            if early_stopping.early_stop:
324
                print("早停: 停止训练")
325
                break
326

327
            # 保存最佳模型
328
            if val_accuracy > best_accuracy:
329
                best_accuracy = val_accuracy
330
                torch.save(model.state_dict(), setting.MODEL_SAVE_PATH)
331
                print(f'Best model saved with accuracy: {best_accuracy:.2f}%')
332
        else:
333
            # 保存当前模型
334
            torch.save({
335
                'epoch': epoch,
336
                'model_state_dict': model.state_dict(),
337
                'optimizer_state_dict': optimizer.state_dict(),
338
                'loss': epoch_loss,
339
            }, setting.MODEL_SAVE_PATH.replace('.pth', f'_epoch{epoch + 1}.pth'))
340

341
    total_time = time.time() - start_time
342
    print(f"训练完成! 总时间: {total_time:.2f}s")
343

344
    # 绘制训练曲线
345
    plot_training_curve(train_losses, train_accuracies, val_losses, val_accuracies, learning_rates)
346

347
    # 保存最终模型
348
    torch.save(model.state_dict(), setting.MODEL_SAVE_PATH.replace('.pth', '_final.pth'))
349
    print("最终模型已保存")
350

351

352
def validate_model(model, test_loader, device, criterion):
353
    model.eval()
354
    correct = 0
355
    total = 0
356
    running_loss = 0.0
357

358
    with torch.no_grad():
359
        for images, labels in test_loader:
360
            images = images.to(device)
361
            labels = labels.to(device)
362

363
            outputs = model(images)
364
            loss = criterion(outputs, labels)
365
            running_loss += loss.item()
366

367
            predicted = outputs.view(-1, setting.MAX_CAPTCHA, setting.ALL_CHAR_SET_LEN)
368
            labels_reshaped = labels.view(-1, setting.MAX_CAPTCHA, setting.ALL_CHAR_SET_LEN)
369

370
            _, predicted_chars = torch.max(predicted, 2)
371
            _, label_chars = torch.max(labels_reshaped, 2)
372

373
            correct += (predicted_chars == label_chars).all(dim=1).sum().item()
374
            total += labels.size(0)
375

376
    accuracy = 100 * correct / total if total > 0 else 0
377
    avg_loss = running_loss / len(test_loader) if len(test_loader) > 0 else 0
378
    return accuracy, avg_loss
379

380

381
def plot_training_curve(train_losses, train_accuracies, val_losses, val_accuracies, learning_rates):
382
    plt.figure(figsize=(15, 5))
383

384
    plt.subplot(1, 3, 1)
385
    plt.plot(train_losses, label='Train Loss')
386
    if val_losses:
387
        plt.plot(val_losses, label='Validation Loss')
388
    plt.title('Training and Validation Loss')
389
    plt.xlabel('Epoch')
390
    plt.ylabel('Loss')
391
    plt.legend()
392
    plt.grid(True)
393

394
    plt.subplot(1, 3, 2)
395
    plt.plot(train_accuracies, label='Train Accuracy')
396
    if val_accuracies:
397
        plt.plot(val_accuracies, label='Validation Accuracy')
398
    plt.title('Training and Validation Accuracy')
399
    plt.xlabel('Epoch')
400
    plt.ylabel('Accuracy (%)')
401
    plt.legend()
402
    plt.grid(True)
403

404
    plt.subplot(1, 3, 3)
405
    plt.plot(learning_rates)
406
    plt.title('Learning Rate Schedule')
407
    plt.xlabel('Epoch')
408
    plt.ylabel('Learning Rate')
409
    plt.yscale('log')
410
    plt.grid(True)
411

412
    plt.tight_layout()
413
    plt.savefig('training_curve.png', dpi=300, bbox_inches='tight')
414
    plt.show()
415

416

417
if __name__ == "__main__":
418
    train_model()

`pth转onnx脚本`#

1
#! /usr/bin/env python
2
# -*- coding: utf-8 -*-
3
# @Time    : 2025/11/19 23:09
4
# @Author  : afish
5
# @File    : export_to_onnx.py
6
# export_to_onnx.py
7
import os
8

9
import numpy as np
10
import onnx
11
import onnxruntime as ort
12
import torch
13

14
import setting
15
from model import CNN
16

17

18
def export_model_to_onnx():
19
    """将训练好的PyTorch模型转换为ONNX格式"""
20

21
    print("🚀 开始ONNX模型转换...")
22
    print("=" * 50)
23

24
    # 检查模型文件是否存在
25
    model_path = setting.MODEL_SAVE_PATH
26
    if not os.path.exists(model_path):
27
        print(f"❌ 模型文件不存在: {model_path}")
28
        print("💡 请先确保训练完成并保存了最佳模型")
29
        return None
30

31
    # 设置设备
32
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
33
    print(f"📱 使用设备: {device}")
34

35
    # 加载模型结构
36
    model = CNN().to(device)
37

38
    # 加载训练好的权重
39
    try:
40
        model.load_state_dict(torch.load(model_path, map_location=device))
41
        model.eval()  # 设置为评估模式
42
        print("✅ 模型加载成功!")
43
    except Exception as e:
44
        print(f"❌ 模型加载失败: {e}")
45
        return None
46

47
    # 打印模型信息
48
    total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
49
    print(f"📊 模型参数总数: {total_params:,}")
50

51
    # 创建虚拟输入（与您的图片尺寸匹配）
52
    batch_size = 1
53
    dummy_input = torch.randn(batch_size, 1, setting.IMAGE_HEIGHT, setting.IMAGE_WIDTH).to(device)
54
    print(f"📐 输入尺寸: {dummy_input.shape}")
55

56
    # ONNX输出路径
57
    onnx_path = "best_captcha_model.onnx"
58
    optimized_onnx_path = "captcha_model_optimized.onnx"
59

60
    # 导出ONNX模型
61
    try:
62
        print("🔄 正在导出ONNX模型...")
63
        torch.onnx.export(
64
            model,
65
            dummy_input,
66
            onnx_path,
67
            export_params=True,
68
            opset_version=13,  # 使用较新的opset以获得更好优化
69
            do_constant_folding=True,  # 优化常量折叠
70
            input_names=['input'],
71
            output_names=['output'],
72
            dynamic_axes={
73
                'input': {0: 'batch_size'},
74
                'output': {0: 'batch_size'}
75
            },
76
            verbose=False
77
        )
78
        print(f"✅ ONNX模型已导出: {onnx_path}")
79
    except Exception as e:
80
        print(f"❌ ONNX导出失败: {e}")
81
        return None
82

83
    # 验证ONNX模型
84
    try:
85
        print("🔍 验证ONNX模型...")
86
        onnx_model = onnx.load(onnx_path)
87
        onnx.checker.check_model(onnx_model)
88
        print("✅ ONNX模型验证通过!")
89
    except Exception as e:
90
        print(f"❌ ONNX模型验证失败: {e}")
91
        return None
92

93
    # 优化模型（可选）
94
    try:
95
        import onnxoptimizer
96
        print("⚡ 正在优化ONNX模型...")
97
        passes = ['extract_constant_to_initializer', 'eliminate_unused_initializer']
98
        optimized_model = onnxoptimizer.optimize(onnx_model, passes)
99
        onnx.save(optimized_model, optimized_onnx_path)
100
        print(f"✅ 优化模型已保存: {optimized_onnx_path}")
101
    except Exception as e:
102
        print(f"⚠️ 优化步骤跳过: {e}")
103
        optimized_onnx_path = onnx_path  # 使用原始模型
104

105
    # 测试ONNX模型推理
106
    test_onnx_inference(optimized_onnx_path)
107

108
    return optimized_onnx_path
109

110

111
def test_onnx_inference(onnx_path):
112
    """测试ONNX模型推理功能"""
113
    print("\n🧪 测试ONNX模型推理...")
114

115
    try:
116
        # 创建推理会话
117
        ort_session = ort.InferenceSession(onnx_path)
118

119
        # 创建测试输入
120
        test_input = np.random.randn(1, 1, setting.IMAGE_HEIGHT, setting.IMAGE_WIDTH).astype(np.float32)
121

122
        # 进行推理
123
        outputs = ort_session.run(None, {'input': test_input})
124

125
        print("✅ ONNX推理测试成功!")
126
        print(f"📊 输出形状: {outputs[0].shape}")
127
        print(f"🎯 输出范围: [{outputs[0].min():.4f}, {outputs[0].max():.4f}]")
128

129
    except Exception as e:
130
        print(f"❌ ONNX推理测试失败: {e}")
131

132

133
def compare_performance(onnx_path, original_model_path):
134
    """比较ONNX和原始PyTorch模型的性能"""
135
    print("\n⚡ 性能对比测试...")
136

137
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
138

139
    # 加载原始PyTorch模型
140
    original_model = CNN().to(device)
141
    original_model.load_state_dict(torch.load(original_model_path, map_location=device))
142
    original_model.eval()
143

144
    # 加载ONNX模型
145
    ort_session = ort.InferenceSession(onnx_path)
146

147
    # 创建测试数据
148
    test_data = torch.randn(10, 1, setting.IMAGE_HEIGHT, setting.IMAGE_WIDTH).to(device)
149
    test_data_np = test_data.cpu().numpy().astype(np.float32)
150

151
    # PyTorch推理时间
152
    start_time = time.time()
153
    with torch.no_grad():
154
        for i in range(100):
155
            _ = original_model(test_data)
156
    torch_time = time.time() - start_time
157

158
    # ONNX推理时间
159
    start_time = time.time()
160
    for i in range(100):
161
        _ = ort_session.run(None, {'input': test_data_np})
162
    onnx_time = time.time() - start_time
163

164
    print(f"⏱️  PyTorch推理时间: {torch_time:.4f}s")
165
    print(f"⏱️  ONNX推理时间: {onnx_time:.4f}s")
166
    print(f"🚀 速度提升: {torch_time / onnx_time:.2f}x")
167

168

169
class ONNXPredictor:
170
    """ONNX模型预测器"""
171

172
    def __init__(self, onnx_path):
173
        self.onnx_path = onnx_path
174
        self.session = ort.InferenceSession(onnx_path)
175
        self.input_name = self.session.get_inputs()[0].name
176

177
    def predict(self, image_array):
178
        """单张图片预测"""
179
        outputs = self.session.run(None, {self.input_name: image_array})
180
        return outputs[0]
181

182
    def predict_batch(self, image_arrays):
183
        """批量预测"""
184
        batch_outputs = []
185
        for img_array in image_arrays:
186
            output = self.predict(img_array)
187
            batch_outputs.append(output)
188
        return np.array(batch_outputs)
189

190

191
if __name__ == "__main__":
192
    import time
193

194
    # 解决OpenMP警告（如果出现）
195
    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
196

197
    start_time = time.time()
198

199
    # 执行转换
200
    onnx_path = export_model_to_onnx()
201

202
    if onnx_path:
203
        end_time = time.time()
204
        print("\n" + "=" * 50)
205
        print("🎉 ONNX转换完成!")
206
        print(f"⏱️  总耗时: {end_time - start_time:.2f}秒")
207
        print(f"💾 模型文件: {onnx_path}")
208
        print(f"📊 准确率: 73.23% (最佳模型)")
209
        print("\n💡 使用示例:")
210
        print(f"python inference_onnx.py --image 您的图片.jpg --model {onnx_path}")
211
    else:
212
        print("\n❌ ONNX转换失败")

项目结构#

核心代码#

模型定义 (model.py)#

设置文件 (setting.py)#

训练脚本 (train.py)#

完整代码#

model.py#

setting.py#

train.py#

pth转onnx脚本#

模型定义 (`model.py`)#

设置文件 (`setting.py`)#

训练脚本 (`train.py`)#

`model.py`#

`setting.py`#

`train.py`#

`pth转onnx脚本`#