我用pytorch的autograd求output对input的二阶导数,结果总是0。
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
class MTLModel(nn.Module):
def __init__(self, input_size, hidden_sizes, num_tasks):
super(MTLModel, self).__init__()
self.shared_layers = nn.Sequential(
nn.Linear(input_size, hidden_sizes[0]),
nn.ReLU()
)
for i in range(len(hidden_sizes) - 1):
self.shared_layers.add_module(f'hidden_layer_{i+1}', nn.Linear(hidden_sizes[i], hidden_sizes[i+1]))
self.shared_layers.add_module(f'relu_{i+1}', nn.ReLU())
self.task_specific_layers = nn.ModuleList()
for i in range(num_tasks):
self.task_specific_layers.append(nn.Linear(hidden_sizes[-1], 1))
def forward(self, x):
shared_output = self.shared_layers(x)
task_outputs = []
for task_layer in self.task_specific_layers:
task_output = task_layer(shared_output)
task_outputs.append(task_output)
return task_outputs
# Define the training function
def train(model, train_data, train_targets, num_epochs, batch_size, learning_rate, alpha, gamma):
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
num_batches = len(train_data) // batch_size
for epoch in range(num_epochs):
epoch_loss = 0.0
epoch_monotonicity_penalty = 0.0*np.ones(train_targets.shape[1])
epoch_slope_penalty = 0.0
epoch_output2_constraint_penalty = 0.0
epoch_output3_constraint_penalty = 0.0
epoch_slope_penalty = 0.0
for batch in range(num_batches):
batch_data = train_data[batch*batch_size:(batch+1)*batch_size]
batch_targets = train_targets[batch*batch_size:(batch+1)*batch_size]
batch_data = torch.tensor(batch_data, dtype=torch.float32, requires_grad=True) # set requires_grad to True
batch_targets = torch.tensor(batch_targets, dtype=torch.float32)
optimizer.zero_grad()
task_outputs = model(batch_data)
task_gradients = []
task_gradient2s = []
task_losses = []
for i, task_output in enumerate(task_outputs):
task_loss = criterion(task_output.squeeze(), batch_targets[:,i])
task_losses.append(task_loss)
# Apply the monotonicity constraint
task_gradient, = torch.autograd.grad(task_output.sum(), batch_data, create_graph=True, retain_graph=True)
monotonicity_penalty = -task_gradient.mean().clamp(min=0.0)
task_loss += alpha[i] * monotonicity_penalty
epoch_monotonicity_penalty[i] += monotonicity_penalty.item()
task_gradient2, = torch.autograd.grad(task_gradient.sum(), batch_data, create_graph=True, retain_graph=True)
print('1st gradient\n',task_gradient)
print('2nd gradient\n',task_gradient2)
loss = sum(task_losses)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
print('Epoch %d Loss: %.4f Slope Penalty: %.4f ' % (epoch+1, epoch_loss/num_batches, epoch_slope_penalty))
print('epoch_monotonicity_penalty',epoch_monotonicity_penalty)
#%%
# Generate random data
num_samples = 100
num_features = 1
np.random.seed(100)
data = np.linspace(0, 1, num=num_samples).reshape(num_samples,1)
targets = np.zeros((num_samples, 3))
targets[:,0] = -0.5+0.5*np.cos(data[:,0]) + 0.2*data[:,0]**2+0.1*np.random.normal(size=(num_samples))
targets[:,1] = data[:,0]**3 + 0.1*np.random.normal(size=(num_samples))
targets[:,2] = 2*data[:,0]**4 + 0.1*np.random.normal(size=(num_samples))
# Define the model
input_size = num_features
hidden_size = [64,128,64]
num_tasks = 3
model = MTLModel(input_size, hidden_size, num_tasks)
# Define the training parameters
num_epochs = 200
batch_size = 32
learning_rate = 0.001
alpha = [0,0,0] #[0,0,0]
gamma = 0
# Train the model
train(model, data, targets, num_epochs, batch_size, learning_rate, alpha, gamma)