代码如下,抄的代码 https://github.com/Yura52/rtdl/blob/main/examples/rtdl.ipynb
现在做机器学习,如何在这个例子中,把深度学习模型激活函数之前的最后一层值取出来?我想验证下网络有没有起作用。
# Requirements:
!pip install rtdl
!pip install libzero==0.0.4
from typing import Any, Dict
import numpy as np
import rtdl
import scipy.special
import sklearn.datasets
import sklearn.metrics
import sklearn.model_selection
import sklearn.preprocessing
import torch
import torch.nn as nn
import torch.nn.functional as F
import delu
device = torch.device('cpu')
# Docs: https://yura52.github.io/zero/0.0.4/reference/api/zero.improve_reproducibility.html
delu.improve_reproducibility(seed=123456)
# !!! NOTE !!! The dataset splits, preprocessing and other details are
# significantly different from those used in the
# paper "Revisiting Deep Learning Models for Tabular Data",
# so the results will be different from the reported in the paper.
dataset = sklearn.datasets.fetch_california_housing()
task_type = 'regression'
# dataset = sklearn.datasets.fetch_covtype()
# task_type = 'multiclass'
assert task_type in ['binclass', 'multiclass', 'regression']
X_all = dataset['data'].astype('float32')
y_all = dataset['target'].astype('float32' if task_type == 'regression' else 'int64')
if task_type != 'regression':
y_all = sklearn.preprocessing.LabelEncoder().fit_transform(y_all).astype('int64')
n_classes = int(max(y_all)) + 1 if task_type == 'multiclass' else None
X = {}
y = {}
X['train'], X['test'], y['train'], y['test'] = sklearn.model_selection.train_test_split(
X_all, y_all, train_size=0.8
)
X['train'], X['val'], y['train'], y['val'] = sklearn.model_selection.train_test_split(
X['train'], y['train'], train_size=0.8
)
# not the best way to preprocess features, but enough for the demonstration
preprocess = sklearn.preprocessing.StandardScaler().fit(X['train'])
X = {
k: torch.tensor(preprocess.transform(v), device=device)
for k, v in X.items()
}
y = {k: torch.tensor(v, device=device) for k, v in y.items()}
# !!! CRUCIAL for neural networks when solving regression problems !!!
if task_type == 'regression':
y_mean = y['train'].mean().item()
y_std = y['train'].std().item()
y = {k: (v - y_mean) / y_std for k, v in y.items()}
else:
y_std = y_mean = None
if task_type != 'multiclass':
y = {k: v.float() for k, v in y.items()}
d_out = n_classes or 1
# model = rtdl.MLP.make_baseline(
# d_in=X_all.shape[1],
# d_layers=[128, 256, 128],
# dropout=0.1,
# d_out=d_out,
# )
# lr = 0.001
# weight_decay = 0.0
# model = rtdl.ResNet.make_baseline(
# d_in=X_all.shape[1],
# d_main=128,
# d_intermidiate=256,
# dropout_first=0.2,
# dropout_second=0.0,
# n_blocks=2,
# d_out=d_out,
# )
# lr = 0.001
# weight_decay = 0.0
model = rtdl.FTTransformer.make_default(
n_num_features=X_all.shape[1],
cat_cardinalities=None,
last_layer_query_idx=[-1], # it makes the model faster and does NOT affect its output
d_out=d_out,
)
# === ABOUT CATEGORICAL FEATURES ===
# IF you use MLP, ResNet or any other simple feed-forward model (NOT transformer-based model)
# AND there are categorical features
# THEN you have to implement a wrapper that handles categorical features.
# The example below demonstrates how it can be achieved using rtdl.CategoricalFeatureTokenizer.
# ==================================
# 1. When you have both numerical and categorical features, you should prepare you data like this:
# (X_num, X_cat) instead of X
# Each column in X_cat should contain values within the range from 0 to <(the number of unique values in column) - 1>;
# use sklean.preprocessing.OrdinalEncoder to achieve this;
# 2. Prepare a list of so called "cardinalities":
# cardinalities[i] =
# 3. See the commented example below and adapt it for your needs.
#
# class Model(nn.Module):
# def __init__(
# self,
# n_num_features: int,
# cat_tokenizer: rtdl.CategoricalFeatureTokenizer,
# mlp_kwargs: Dict[str, Any],
# ):
# super().__init__()
# self.cat_tokenizer = cat_tokenizer
# self.model = rtdl.MLP.make_baseline(
# d_in=n_num_features + cat_tokenizer.n_tokens * cat_tokenizer.d_token,
# **mlp_kwargs,
# )
#
# def forward(self, x_num, x_cat):
# return self.model(
# torch.cat([x_num, self.cat_tokenizer(x_cat).flatten(1, -1)], dim=1)
# )
#
# model = Model(
# # `None` means "Do not transform numerical features"
# # `d_token` is the size of embedding for ONE categorical feature
# X_num_all.shape[1],
# rtdl.CategoricalFeatureTokenizer(cardinalities, d_token, True, 'uniform'),
# mlp_kwargs,
# )
# Then the model should be used as `model(x_num, x_cat)` instead of of `model(x)`.
model.to(device)
optimizer = (
model.make_default_optimizer()
if isinstance(model, rtdl.FTTransformer)
else torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
)
loss_fn = (
F.binary_cross_entropy_with_logits
if task_type == 'binclass'
else F.cross_entropy
if task_type == 'multiclass'
else F.mse_loss
)
def apply_model(x_num, x_cat=None):
if isinstance(model, rtdl.FTTransformer):
return model(x_num, x_cat)
elif isinstance(model, (rtdl.MLP, rtdl.ResNet)):
assert x_cat is None
return model(x_num)
else:
raise NotImplementedError(
f'Looks like you are using a custom model: {type(model)}.'
' Then you have to implement this branch first.'
)
@torch.no_grad()
def evaluate(part):
model.eval()
prediction = []
for batch in delu.iter_batches(X[part], 1024):
prediction.append(apply_model(batch))
prediction = torch.cat(prediction).squeeze(1).cpu().numpy()
target = y[part].cpu().numpy()
if task_type == 'binclass':
prediction = np.round(scipy.special.expit(prediction))
score = sklearn.metrics.accuracy_score(target, prediction)
elif task_type == 'multiclass':
prediction = prediction.argmax(1)
score = sklearn.metrics.accuracy_score(target, prediction)
else:
assert task_type == 'regression'
score = sklearn.metrics.mean_squared_error(target, prediction) ** 0.5 * y_std
return score
# Create a dataloader for batches of indices
# Docs: https://yura52.github.io/zero/reference/api/zero.data.IndexLoader.html
batch_size = 256
train_loader = delu.data.IndexLoader(len(X['train']), batch_size, device=device)
# Create a progress tracker for early stopping
# Docs: https://yura52.github.io/zero/reference/api/zero.ProgressTracker.html
progress = delu.ProgressTracker(patience=100)
print(f'Test score before training: {evaluate("test"):.4f}')
n_epochs = 1000
report_frequency = len(X['train']) // batch_size // 5
for epoch in range(1, n_epochs + 1):
for iteration, batch_idx in enumerate(train_loader):
model.train()
optimizer.zero_grad()
x_batch = X['train'][batch_idx]
y_batch = y['train'][batch_idx]
loss = loss_fn(apply_model(x_batch).squeeze(1), y_batch)
loss.backward()
optimizer.step()
if iteration % report_frequency == 0:
print(f'(epoch) {epoch} (batch) {iteration} (loss) {loss.item():.4f}')
val_score = evaluate('val')
test_score = evaluate('test')
print(f'Epoch {epoch:03d} | Validation score: {val_score:.4f} | Test score: {test_score:.4f}', end='')
progress.update((-1 if task_type == 'regression' else 1) * val_score)
if progress.success:
print(' <<< BEST VALIDATION EPOCH', end='')
print()
if progress.fail:
break