redstonewill / cnn_pytorch_beginner Goto Github PK
View Code? Open in Web Editor NEWObject detection learning path
Object detection learning path
运行第3个LeNet-5案例,自定义数据,最后模型测试出现这样的报错:RuntimeError: Given groups=1, weight of size [6, 1, 5, 5], expected input[1, 3, 32, 32] to have 1 channels, but got 3 channels instead
尝试过改变学习率,改变batch-size,但是loss一直维持在0.69左右,accuracy也维持在50%上下,我的代码如下,可以帮我看看问题在哪里,或者给我点解决问题的思路吗
from PIL import Image
from torch.utils.data import Dataset
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets,transforms
import time
from matplotlib import pyplot as plt
class AlexNet(nn.Module):
"""
Neural network model consisting of layers propsed by AlexNet paper.
"""
def init(self, num_classes=2):
"""
Define and allocate layers for this neural net.
Args:
num_classes (int): number of classes to predict with this model
"""
super().init()
# input size should be : (b x 3 x 227 x 227)
# The image in the original paper states that width and height are 224 pixels, but
# the dimensions after first convolution layer do not lead to 55 x 55.
self.net = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4), # (b x 96 x 55 x 55)
nn.ReLU(),
nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2), # section 3.3
nn.MaxPool2d(kernel_size=3, stride=2), # (b x 96 x 27 x 27)
nn.Conv2d(96, 256, 5, padding=2), # (b x 256 x 27 x 27)
nn.ReLU(),
nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
nn.MaxPool2d(kernel_size=3, stride=2), # (b x 256 x 13 x 13)
nn.Conv2d(256, 384, 3, padding=1), # (b x 384 x 13 x 13)
nn.ReLU(),
nn.Conv2d(384, 384, 3, padding=1), # (b x 384 x 13 x 13)
nn.ReLU(),
nn.Conv2d(384, 256, 3, padding=1), # (b x 256 x 13 x 13)
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2), # (b x 256 x 6 x 6)
)
# classifier is just a name for linear layers
self.classifier = nn.Sequential(
nn.Dropout(p=0.5),
nn.Linear(in_features=(256 * 6 * 6), out_features=500),
nn.ReLU(),
nn.Dropout(p=0.5),
nn.Linear(in_features=500, out_features=20),
nn.ReLU(),
nn.Linear(in_features=20, out_features=num_classes),
)
def forward(self, x):
"""
Pass the input through the net.
Args:
x (Tensor): input tensor
Returns:
output (Tensor): output tensor
"""
x = self.net(x)
x = x.view(-1, 256 * 6 * 6) # reduce the dimensions for linear layer input
return self.classifier(x)
class MyDataset(Dataset):
def init(self, txt_path, transform = None, target_transform = None):
fh = open(txt_path, 'r')
imgs = []
for line in fh:
line = line.rstrip()
words = line.split()
imgs.append((words[0], int(words[1]))) # 类别转为整型int
self.imgs = imgs
self.transform = transform
self.target_transform = target_transform
def getitem(self, index):
fn, label = self.imgs[index]
img = Image.open(fn).convert('RGB')
#img = Image.open(fn)
if self.transform is not None:
img = self.transform(img)
return img, label
def len(self):
return len(self.imgs)
def make_train_loader():
pipline_train = transforms.Compose([
#transforms.RandomResizedCrop(224),
#随机旋转图片
transforms.RandomHorizontalFlip(),
#将图片尺寸resize到227x227
transforms.Resize((227,227)),
#将图片转化为Tensor格式
transforms.ToTensor(),
#正则化(当模型出现过拟合的情况时,用来降低模型的复杂度)
transforms.Normalize(mean = [0.485, 0.456, 0.406],std = [0.229, 0.224, 0.225])
])
train_data = MyDataset('/opt/qcom/aistack/qairt/2.21.0.240401/examples/Models/alexnet/data/catVSdog/train.txt', transform=pipline_train)
#train_data 包含多有的训练与测试数据,调用DataLoader批量加载
trainloader = torch.utils.data.DataLoader(dataset=train_data, batch_size=64, shuffle=True)
classes = ('cat', 'dog') # 对应label=0,label=1
return trainloader,classes
def train_runner(model, trainloader, optimizer, epoch, Loss, Accuracy):
#训练模型, 启用 BatchNormalization 和 Dropout, 将BatchNormalization和Dropout置为True
model.train()
total = 0
correct =0.0
#enumerate迭代已加载的数据集,同时获取数据和数据下标
for i, data in enumerate(trainloader, 0):
inputs, labels = data
#初始化梯度
optimizer.zero_grad()
#保存训练结果
outputs = model(inputs)
#计算损失和
#多分类情况通常使用cross_entropy(交叉熵损失函数), 而对于二分类问题, 通常使用sigmod
loss = F.cross_entropy(outputs, labels)
#获取最大概率的预测结果
#dim=1表示返回每一行的最大值对应的列下标
predict = outputs.argmax(dim=1)
total += labels.size(0)
correct += (predict == labels).sum().item()
#反向传播
loss.backward()
#更新参数
optimizer.step()
if i % 100 == 0:
#loss.item()表示当前loss的数值
print("Train Epoch{} \t Loss: {:.6f}, accuracy: {:.6f}%".format(epoch, loss.item(), 100*(correct/total)))
Loss.append(loss.item())
Accuracy.append(correct/total)
def main():
(trainloader,classes) = make_train_loader()
model = torch.load("/opt/qcom/aistack/qairt/2.21.0.240401/examples/Models/alexnet/pytorch/alexnet_s.pt")
#定义优化器
optimizer = optim.Adam(model.parameters(), lr=0.01)
#调用
epoch = 20
Loss = []
Accuracy = []
print('started Training')
for epoch in range(1, epoch+1):
print("start_time",time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
train_runner(model, trainloader, optimizer, epoch, Loss, Accuracy)
print("end_time: ",time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())),'\n')
x = torch.rand(1,3,227,227)
trace_model = torch.jit.trace(model,x)
torch.jit.save(trace_model,"/opt/qcom/aistack/qairt/2.21.0.240401/examples/Models/alexnet/pytorch/alexnet.pt")
print('Finished Training')
if name == "main":
main()
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [64, 500]], which is output 0 of ReluBackward0, is at version 1; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
请问使用的pytorch是什么版本的?
您好,我遇到了这个错误,请问是什么原因造成的 ,谢谢~
loss.append(loss.item())
AttributeError: 'Tensor' object has no attribute 'append'
尝试过改变学习率和batch-size,但是loss保持在0.69不变,以下是代码,可以给我一点建议吗
from PIL import Image
from torch.utils.data import Dataset
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets,transforms
import time
from matplotlib import pyplot as plt
# torch.autograd.set_detect_anomaly(True)
class AlexNet(nn.Module):
"""
Neural network model consisting of layers propsed by AlexNet paper.
"""
def __init__(self, num_classes=2):
"""
Define and allocate layers for this neural net.
Args:
num_classes (int): number of classes to predict with this model
"""
super().__init__()
# input size should be : (b x 3 x 227 x 227)
# The image in the original paper states that width and height are 224 pixels, but
# the dimensions after first convolution layer do not lead to 55 x 55.
self.net = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4), # (b x 96 x 55 x 55)
nn.ReLU(),
nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2), # section 3.3
nn.MaxPool2d(kernel_size=3, stride=2), # (b x 96 x 27 x 27)
nn.Conv2d(96, 256, 5, padding=2), # (b x 256 x 27 x 27)
nn.ReLU(),
nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
nn.MaxPool2d(kernel_size=3, stride=2), # (b x 256 x 13 x 13)
nn.Conv2d(256, 384, 3, padding=1), # (b x 384 x 13 x 13)
nn.ReLU(),
nn.Conv2d(384, 384, 3, padding=1), # (b x 384 x 13 x 13)
nn.ReLU(),
nn.Conv2d(384, 256, 3, padding=1), # (b x 256 x 13 x 13)
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2), # (b x 256 x 6 x 6)
)
# classifier is just a name for linear layers
self.classifier = nn.Sequential(
nn.Dropout(p=0.5),
nn.Linear(in_features=(256 * 6 * 6), out_features=500),
nn.ReLU(),
nn.Dropout(p=0.5),
nn.Linear(in_features=500, out_features=20),
nn.ReLU(),
nn.Linear(in_features=20, out_features=num_classes),
)
def forward(self, x):
"""
Pass the input through the net.
Args:
x (Tensor): input tensor
Returns:
output (Tensor): output tensor
"""
x = self.net(x)
x = x.view(-1, 256 * 6 * 6) # reduce the dimensions for linear layer input
return self.classifier(x)
class MyDataset(Dataset):
def __init__(self, txt_path, transform = None, target_transform = None):
fh = open(txt_path, 'r')
imgs = []
for line in fh:
line = line.rstrip()
words = line.split()
imgs.append((words[0], int(words[1]))) # 类别转为整型int
self.imgs = imgs
self.transform = transform
self.target_transform = target_transform
def __getitem__(self, index):
fn, label = self.imgs[index]
img = Image.open(fn).convert('RGB')
#img = Image.open(fn)
if self.transform is not None:
img = self.transform(img)
return img, label
def __len__(self):
return len(self.imgs)
def make_train_loader():
pipline_train = transforms.Compose([
#transforms.RandomResizedCrop(224),
#随机旋转图片
transforms.RandomHorizontalFlip(),
#将图片尺寸resize到227x227
transforms.Resize((227,227)),
#将图片转化为Tensor格式
transforms.ToTensor(),
#正则化(当模型出现过拟合的情况时,用来降低模型的复杂度)
transforms.Normalize(mean = [0.485, 0.456, 0.406],std = [0.229, 0.224, 0.225])
])
train_data = MyDataset('/opt/qcom/aistack/qairt/2.21.0.240401/examples/Models/alexnet/data/catVSdog/train.txt', transform=pipline_train)
#train_data 包含多有的训练与测试数据,调用DataLoader批量加载
trainloader = torch.utils.data.DataLoader(dataset=train_data, batch_size=64, shuffle=True)
# 类别信息也是需要我们给定的
classes = ('cat', 'dog') # 对应label=0,label=1
return trainloader,classes
def train_runner(model, trainloader, optimizer, epoch, Loss, Accuracy):
#训练模型, 启用 BatchNormalization 和 Dropout, 将BatchNormalization和Dropout置为True
model.train()
total = 0
correct =0.0
#enumerate迭代已加载的数据集,同时获取数据和数据下标
for i, data in enumerate(trainloader, 0):
inputs, labels = data
#初始化梯度
optimizer.zero_grad()
#保存训练结果
outputs = model(inputs)
#计算损失和
#多分类情况通常使用cross_entropy(交叉熵损失函数), 而对于二分类问题, 通常使用sigmod
loss = F.cross_entropy(outputs, labels)
#获取最大概率的预测结果
#dim=1表示返回每一行的最大值对应的列下标
predict = outputs.argmax(dim=1)
total += labels.size(0)
correct += (predict == labels).sum().item()
#反向传播
loss.backward()
#更新参数
optimizer.step()
if i % 100 == 0:
#loss.item()表示当前loss的数值
print("Train Epoch{} \t Loss: {:.6f}, accuracy: {:.6f}%".format(epoch, loss.item(), 100*(correct/total)))
Loss.append(loss.item())
Accuracy.append(correct/total)
def main():
(trainloader,classes) = make_train_loader()
model = torch.load("/opt/qcom/aistack/qairt/2.21.0.240401/examples/Models/alexnet/pytorch/alexnet_s.pt")
#定义优化器
optimizer = optim.Adam(model.parameters(), lr=0.01)
#调用
epoch = 20
Loss = []
Accuracy = []
print('started Training')
for epoch in range(1, epoch+1):
print("start_time",time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
train_runner(model, trainloader, optimizer, epoch, Loss, Accuracy)
print("end_time: ",time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())),'\n')
x = torch.rand(1,3,227,227)
trace_model = torch.jit.trace(model,x)
torch.jit.save(trace_model,"/opt/qcom/aistack/qairt/2.21.0.240401/examples/Models/alexnet/pytorch/alexnet.pt")
print('Finished Training')
if __name__ == "__main__":
main()
A declarative, efficient, and flexible JavaScript library for building user interfaces.
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google ❤️ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.