Pytorch訓練類型:表格圖片分類
這道題型所用到的數據集,大家應是最容易熟悉的,比如“手寫數字”,常用衣服,鞋子,等數據集(2025年昆山的一個人工智能比賽第二道題目就是這個數據集)
如下都是常規格式及代碼,還是那個宗旨:爲了人工智能訓練師考試通過即可;
# 需要的庫
import pandas as pd import numpy as np import torch from torch.optim import SGD import torchmetrics import torch.nn as nn from torch.utils.data import Dataset, random_split, DataLoader
# 數據處理
class MyDataset(Dataset):
def __init__(self, csv_path):
df = pd.read_csv(csv_path)
y = df.iloc[:, 0]
X = df.iloc[:, 1:]
self.X = torch.tensor(X.values, dtype=torch.float) / 255.0
self.y = torch.tensor(y.values, dtype=torch.long)
print(f"特徵數量:{X.shape[1]}, 樣本集數量:{y.shape[0]}")
def __getitem__(self, index):
return self.X[index], self.y[index]
def __len__(self):
return len(self.X)
dataset = MyDataset("A_Z Handwritten Data.csv")
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_data, test_data = random_split(dataset, (train_size, test_size))
train_loader = DataLoader(train_data, batch_size=128, shuffle=True)
test_loader = DataLoader(test_data, batch_size=128, shuffle=False)多一句嘴,這種官方的數據集格式呀,一致性呀,都是不錯的,大家只要記憶一下標準處理過程就好。
區分標簽,特徵, 再處理一下特徵範圍就差不多了。還是比較簡單的。
# 模型構建
class AZNET(nn.Module):
def __init__(self, input_size, num_classes):
super(AZNET,self).__init__()
self.network = nn.Sequential(
nn.Linear(input_size, 512),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(512, 256),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(256, 128),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(128, num_classes)
)
def forward(self,x):
return self.network(x)
device = torch.device("cuda" if torch.cuda.is_available() else "CPU")
input_size = dataset.X.shape[1]
num_classes = len(torch.unique(dataset.y))
model = AZNET(input_size, num_classes).to(device)
print(f"分類數:{num_classes}")
print(device)# 模型訓練
# 針對分類問題,使用交叉熵損失函數
lossf = nn.CrossEntropyLoss()
optim = SGD(model.parameters(), lr=0.001)
# R2Score不适合分类任务,改用Accuracy
acc = torchmetrics.Accuracy(task='multiclass', num_classes=num_classes).to(device)
epochs = 10
for epoch in range(epochs):
acc.reset()
model.train()
total_loss = 0
for batch_X, batch_y in train_loader:
optim.zero_grad()
batch_X = batch_X.to(device)
batch_y = batch_y.to(device)
output = model(batch_X)
loss_ = lossf(output, batch_y)
loss_.backward()
optim.step()
total_loss += loss_.item()
acc.update(output, batch_y)
avg_loss = total_loss / len(train_loader)
avg_acc = acc.compute().item()
print(f"Epoch {epoch+1}:LOSS:{avg_loss:.4f}, ACC:{avg_acc:.4f}")以上兩部分沒啥好説的,和前一篇差異就是模型構建部分吧。要注意的部分也都加了注釋;