pytorch自定义初始化权重后模型loss一直在2点几
class Net(nn.Module):

    def __init__(self):
        super(Net,self).__init__()
        self.conv1 = nn.Conv2d(3,64,3,padding=1,bias=False)
        self.conv2 = nn.Conv2d(64,64,3,padding=1,bias=False)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu1 = nn.ReLU()

        self.conv3 = nn.Conv2d(64,128,3,padding=1,bias=False)
        self.conv4 = nn.Conv2d(128, 128, 3,padding=1,bias=False)
        self.pool2 = nn.MaxPool2d(2, 2, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.relu2 = nn.ReLU()

        self.conv5 = nn.Conv2d(128,128, 3,padding=1,bias=False)
        self.conv6 = nn.Conv2d(128, 128, 3,padding=1,bias=False)
        self.conv7 = nn.Conv2d(128, 128, 1,padding=1,bias=False)
        self.pool3 = nn.MaxPool2d(2, 2, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.relu3 = nn.ReLU()

        self.conv8 = nn.Conv2d(128, 256, 3,padding=1,bias=False)
        self.conv9 = nn.Conv2d(256, 256, 3, padding=1,bias=False)
        self.conv10 = nn.Conv2d(256, 256, 1, padding=1,bias=False)
        self.pool4 = nn.MaxPool2d(2, 2, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        self.relu4 = nn.ReLU()

        self.conv11 = nn.Conv2d(256, 512, 3, padding=1,bias=False)
        self.conv12 = nn.Conv2d(512, 512, 3, padding=1,bias=False)
        self.conv13 = nn.Conv2d(512, 512, 1, padding=1,bias=False)
        self.pool5 = nn.MaxPool2d(2, 2, padding=1)
        self.bn5 = nn.BatchNorm2d(512)
        self.relu5 = nn.ReLU()

        self.fc14 = nn.Linear(512*4*4,1024)
        self.drop1 = nn.Dropout2d()
        self.fc15 = nn.Linear(1024,1024)
        self.drop2 = nn.Dropout2d()
        self.fc16 = nn.Linear(1024,10)


    def forward(self,x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.pool1(x)
        x = self.bn1(x)
        x = self.relu1(x)


        x = self.conv3(x)
        x = self.conv4(x)
        x = self.pool2(x)
        x = self.bn2(x)
        x = self.relu2(x)

        x = self.conv5(x)
        x = self.conv6(x)
        x = self.conv7(x)
        x = self.pool3(x)
        x = self.bn3(x)
        x = self.relu3(x)

        x = self.conv8(x)
        x = self.conv9(x)
        x = self.conv10(x)
        x = self.pool4(x)
        x = self.bn4(x)
        x = self.relu4(x)

        x = self.conv11(x)
        x = self.conv12(x)
        x = self.conv13(x)
        x = self.pool5(x)
        x = self.bn5(x)
        x = self.relu5(x)
        # print(" x shape ",x.size())
        x = x.view(-1,512*4*4)
        x = F.relu(self.fc14(x))
        x = self.drop1(x)
        x = F.relu(self.fc15(x))
        x = self.drop2(x)
        x = self.fc16(x)

        return x

model = Net()
#model = torch.nn.DataParallel(model)
if t.cuda.is_available():
    model.cuda()
print(model)
# torchvision输出的是PILImage,值的范围是[0, 1].
# 我们将其转化为tensor数据,并归一化为[-1, 1]。
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                                ])
# 训练集,将相对目录./data下的cifar-10-batches-py文件夹中的全部数据(50000张图片作为训练数据)加载到内存中,若download为True时,会自动从网上下载数据并解压
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform)
# 将训练集的50000张图片划分成12500份,每份4张图,用于mini-batch输入。shffule=True在表示不同批次的数据遍历时,打乱顺序。num_workers=2表示使用两个子进程来加载数据
trainloader = torch.utils.data.DataLoader(trainset, batch_size=100, shuffle=False, num_workers=1)
#测试集,将相对目录./data下的cifar-10-batches-py文件夹中的全部数据(10000张图片作为测试数据)加载到内存中,若download为True时,会自动从网上下载数据并解压
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)
# 将测试集的10000张图片划分成2500份,每份4张图,用于mini-batch输入。
testloader = torch.utils.data.DataLoader(testset, batch_size=50,
                                         shuffle=False, num_workers=1)
criterion = nn.CrossEntropyLoss()#叉熵损失函数
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)#使用SGD(随机梯度下降)优化,学习率为0.001,动量为0.9
for epoch in range(1):  # 遍历数据集10次
    running_loss = 0.0
    # enumerate(sequence, [start=0]),i序号,data是数据
    for i, data in enumerate(trainloader, 0):
    #for i in range(1000):
        # get the inputs
        #data is list
        inputs, labels = data  # data的结构是:[4x3x32x32的张量,长度4的张量]
        # wrap them in Variable
        inputs = Variable(inputs)
        labels=Variable(labels)# 把input数据从tensor转为variable
        if t.cuda.is_available():
            inputs=inputs.cuda()
            labels=labels.cuda()
        #inputs,labels= inputs.type(torch.FloatTensor),labels.type(torch.FloatTensor)
        # zero the parameter gradients
        optimizer.zero_grad()  # 将参数的grad值初始化为0
        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)  # 将output和labels使用叉熵计算损失
        loss.backward()  # 反向传播
        optimizer.step()  # 用SGD更新参数
        # 每2000批数据打印一次平均loss值
        running_loss += loss.item()  # loss本身为Variable类型,所以要使用data获取其Tensor,因为其为标量,所以取0  或使用loss.item()
        if i % 500 == 499:  # 每2000批打印一次
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 500))
            running_loss = 0.0
print('Finished Training')
start = time.clock()#.time
correct = 0
total = 0
for data in testloader:
    images, labels = data
    images=images.cuda()
    labels=labels.cuda()
    outputs = model(Variable(images))
    print(outputs.shape)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()
print('Accuracy of the network on the 10000 test images: %d %%' % (
        100 * correct / total))
end = time.clock()#.time
print("Running time: %s Seconds"  % (end-start))

weight0=np.random.random((64,3,3,3))
weight0=nn.Parameter(torch.FloatTensor(weight0).cuda(),requires_grad=True)
#weight1=np.random.random((64,64,3,3),dtype=np.float32)
weight1=np.random.random((64,64,3,3))
weight1=nn.Parameter(torch.FloatTensor(weight1).cuda(),requires_grad=True)

weight2=np.random.random((128,64,3,3))
weight2=nn.Parameter(torch.FloatTensor(weight2).cuda(),requires_grad=True)


weight3=np.random.random((128,128,3,3))
weight3=nn.Parameter(torch.FloatTensor(weight3).cuda(),requires_grad=True)


weight4=np.random.random((128,128,3,3))
weight4=nn.Parameter(torch.FloatTensor(weight4).cuda(),requires_grad=True)


weight5=np.random.random((128,128,3,3))
weight5=nn.Parameter(torch.FloatTensor(weight5).cuda(),requires_grad=True)


weight6=np.random.random((128,128,1,1))
weight6=nn.Parameter(torch.FloatTensor(weight6).cuda(),requires_grad=True)


weight7=np.random.random((256,128,3,3))
weight7=nn.Parameter(torch.FloatTensor(weight7).cuda(),requires_grad=True)


weight8=np.random.random((256,256,3,3))
weight8=nn.Parameter(torch.FloatTensor(weight8).cuda(),requires_grad=True)


weight9=np.random.random((256,256,1,1))
weight9=nn.Parameter(torch.FloatTensor(weight9).cuda(),requires_grad=True)


weight10=np.random.random((512,256,3,3))
weight10=nn.Parameter(torch.FloatTensor(weight10).cuda(),requires_grad=True)


weight11=np.random.random((512,512,3,3))
weight11=nn.Parameter(torch.FloatTensor(weight11).cuda(),requires_grad=True)


weight12=np.random.random((512,512,1,1))
weight12=nn.Parameter(torch.FloatTensor(weight12).cuda(),requires_grad=True)



new_layer_id=0
for  m1 in model.modules():
    if isinstance(m1, nn.Conv2d):
        if (new_layer_id==0):
            m1.weight = weight0
        elif(new_layer_id==1):
            m1.weight= weight1
        elif(new_layer_id==2):
            m1.weight = weight2
        elif(new_layer_id==3):
            m1.weight = weight3
        elif(new_layer_id==4):
            m1.weight = weight4
        elif(new_layer_id==5): 
            m1.weight = weight5
        elif(new_layer_id==6):
            m1.weight = weight6
        elif(new_layer_id==7):
            m1.weight = weight7
        elif(new_layer_id==8):
            m1.weight = weight8
        elif(new_layer_id==9):
            m1.weight = weight9
        elif(new_layer_id==10):
            m1.weight = weight10
        elif(new_layer_id==11):
            m1.weight = weight11
        elif(new_layer_id==12):
            m1.weight = weight12
        new_layer_id=new_layer_id+1
    elif isinstance(m1, nn.BatchNorm2d):
        m1.weight = m1.weight
        m1.bias = m1.bias
    elif isinstance(m1, nn.Linear):
        m1.weight = m1.weight
        m1.bias = m1.bias

 #torchvision输出的是PILImage,值的范围是[0, 1].
# 我们将其转化为tensor数据,并归一化为[-1, 1]。
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                                ])
# 训练集,将相对目录./data下的cifar-10-batches-py文件夹中的全部数据(50000张图片作为训练数据)加载到内存中,若download为True时,会自动从网上下载数据并解压
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform)
# 将训练集的50000张图片划分成12500份,每份4张图,用于mini-batch输入。shffule=True在表示不同批次的数据遍历时,打乱顺序。num_workers=2表示使用两个子进程来加载数据
trainloader = torch.utils.data.DataLoader(trainset, batch_size=100, shuffle=False, num_workers=1)
#测试集,将相对目录./data下的cifar-10-batches-py文件夹中的全部数据(10000张图片作为测试数据)加载到内存中,若download为True时,会自动从网上下载数据并解压
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)
# 将测试集的10000张图片划分成2500份,每份4张图,用于mini-batch输入。
testloader = torch.utils.data.DataLoader(testset, batch_size=50,
                                         shuffle=False, num_workers=1)
criterion1 = nn.CrossEntropyLoss()#叉熵损失函数
optimizer1 = optim.Adam(model.parameters(), lr=0.001)#使用SGD(随机梯度下降)优化,学习率为0.001,动量为0.9
#momentum=0.9

start = time.clock()#.time
correct = 0
total = 0
for data in testloader:
    images, labels = data
    images=images.cuda()
    labels=labels.cuda()
    outputs = model(Variable(images))
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()
print('Accuracy of the Newmodel1 on the 10000 test images: %d %%' % (
        100 * correct / total))
end = time.clock()#.time


print("Running time: %s Seconds"  % (end-start))

for epoch in range(20):  
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
    #for i in range(1000):
        inputs, labels = data  
        inputs = Variable(inputs)
        #print(inputs.shape)
        labels=Variable(labels)
        inputs=inputs.cuda()
        labels=labels.cuda()
        optimizer1.zero_grad()  
        outputs = model(inputs)
        loss = criterion1(outputs, labels)  
        loss.backward() 
        optimizer1.step()  
        running_loss += loss.item()  
        if i % 500 == 499:  
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 500))
Csdn user default icon
上传中...
上传图片
插入图片
抄袭、复制答案,以达到刷声望分或其他目的的行为,在CSDN问答是严格禁止的,一经发现立刻封号。是时候展现真正的技术了!
立即提问