617. LEAP - Atmospheric Physics using AI (ClimSim) | leap-atmospheric-physics-ai-climsim
首先,我要感谢组织者和 Kaggle 举办这次比赛。比赛数据的质量很棒。虽然过程中存在一些问题,但无论如何,我们取得了让大多数人满意的结果。
这是我的第一枚个人金牌,我也成为了竞赛 Grandmaster。这七年的旅程相当漫长且令人兴奋。
我认为我的解决方案非常简单,基本上是基于从 BiLSTM 衍生的 seq2seq 模型。
(bs, 60, 25) --> seq2seq --> (bs, 60, 14) --> (bs, 368)
| 模型 | CV (交叉验证) | LB ( leaderboard) |
|---|---|---|
| BiLSTM (layers=6) | 0.7844 | 0.7812 |
| BiGRU (layers=8) | 0.7835 | 0.7802 |
| BiLSTM + Transformer | 0.7858 | 0.7821 |
| BiLSTM + Attention | 0.7865 | 0.7834 |
| BiLSTM + TCN | 0.7855 | 0.7832 |
| BiLSTM + CNN | 0.7842 | 0.7821 |
| 模型集成 (ensemble on models) | 0.7923 | 0.7890 |
| 目标集成 (ensemble on targets) | 0.7933 | 0.7884 |
class LeapModel(nn.Module):
def __init__(self,
input_size,
seq_len,
hidden_size,
output_size,
num_layers=1,
bidirectional=False,
dropout=.3,
hidden_layers=[128, 256]):
super().__init__()
self.input_size = input_size
self.seq_len = seq_len
self.hidden_size = hidden_size
self.num_layers = num_layers
self.bidirectional=bidirectional
self.output_size=output_size
self.rnn = nn.LSTM(input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
bidirectional=bidirectional,
batch_first=True,
dropout=dropout)
if hidden_layers and len(hidden_layers):
first_layer = nn.Linear(hidden_size*2 if bidirectional else hidden_size, hidden_layers[0])
self.hidden_layers = nn.ModuleList(
[first_layer] + \
[nn.Linear(hidden_layers[i], hidden_layers[i+1]) for i in range(len(hidden_layers) - 1)]
)
for layer in self.hidden_layers:
nn.init.kaiming_normal_(layer.weight.data)
self.intermediate_layer = nn.Linear(hidden_layers[-1], self.input_size)
self.output_layer = nn.Linear(hidden_layers[-1], output_size)
nn.init.kaiming_normal_(self.output_layer.weight.data)
else:
self.hidden_layers = []
self.intermediate_layer = nn.Linear(hidden_size*2 if bidirectional else hidden_size, self.input_size)
self.output_layer = nn.Linear(hidden_size*2 if bidirectional else hidden_size, output_size)
nn.init.kaiming_normal_(self.output_layer.weight.data)
self.activation_fn = torch.nn.GELU()
self.dropout = nn.Dropout(dropout)
def forward(self, x):
outputs, hidden = self.rnn(x)
x = self.dropout(self.activation_fn(outputs))
for hidden_layer in self.hidden_layers:
x = self.activation_fn(hidden_layer(x))
x = self.dropout(x)
x = self.output_layer(x)
# (-1,60,14) -> (-1,386)
o_s = x[:, :, :6]
o_s = o_s.permute(0,2,1).reshape(-1,360)
o_g = x[:, :, 6:]
o_g = o_g.mean(dim=1)
out = torch.cat([o_s, o_g], dim=1)
return out
input_size = 25
output_size = 14
seq_len = 60
hidden_size = 256
hidden_layers = [256, 512]
num_layers = 6
dropout = 0.1
model = LeapModel(
input_size=input_size,
seq_len=seq_len,
hidden_size=hidden_size,
output_size=output_size,
num_layers=num_layers,
hidden_layers=hidden_layers,
dropout=dropout,
bidirectional=True,
).to(device)
参考链接: https://www.kaggle.com/code/brandenkmurray/seq2seq-rnn-with-gru
class LeapModel(nn.Module):
def __init__(self,
input_size,
seq_len,
hidden_size,
output_size,
num_layers=1,
bidirectional=False,
dropout=0.3,
hidden_layers=[128, 256],
nhead=8,
num_transformer_layers=2):
super().__init__()
self.input_size = input_size
self.seq_len = seq_len
self.hidden_size = hidden_size
self.num_layers = num_layers
self.bidirectional = bidirectional
self.output_size = output_size
# LSTM 层
self.rnn = nn.LSTM(input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
bidirectional=bidirectional,
batch_first=True,
dropout=dropout)
# Transformer 层
transformer_input_size = hidden_size * 2 if bidirectional else hidden_size
self.transformer_layer = nn.TransformerEncoder(
nn.TransformerEncoderLayer(d_model=transformer_input_size, nhead=nhead, dropout=dropout),
num_layers=num_transformer_layers
)
# 全连接层
if hidden_layers and len(hidden_layers):
first_layer = nn.Linear(transformer_input_size, hidden_layers[0])
self.hidden_layers = nn.ModuleList(
[first_layer] + \
[nn.Linear(hidden_layers[i], hidden_layers[i+1]) for i in range(len(hidden_layers) - 1)]
)
for layer in self.hidden_layers:
nn.init.kaiming_normal_(layer.weight.data)
self.intermediate_layer = nn.Linear(hidden_layers[-1], self.input_size)
self.output_layer = nn.Linear(hidden_layers[-1], output_size)
nn.init.kaiming_normal_(self.output_layer.weight.data)
else:
self.hidden_layers = []
self.intermediate_layer = nn.Linear(transformer_input_size, self.input_size)
self.output_layer = nn.Linear(transformer_input_size, output_size)
nn.init.kaiming_normal_(self.output_layer.weight.data)
self.activation_fn = torch.nn.GELU()
self.dropout = nn.Dropout(dropout)
def forward(self, x):
# LSTM 层
lstm_output, _ = self.rnn(x)
# Transformer 层
transformer_output = self.transformer_layer(lstm_output)
# 应用 dropout 和激活函数
x = self.dropout(self.activation_fn(transformer_output))
# 全连接层
for hidden_layer in self.hidden_layers:
x = self.activation_fn(hidden_layer(x))
x = self.dropout(x)
x = self.output_layer(x)
# 重塑输出
o_s = x[:, :, :6]
o_s = o_s.permute(0, 2, 1).reshape(-1, 360)
o_g = x[:, :, 6:]
o_g = o_g.mean(dim=1)
out = torch.cat([o_s, o_g], dim=1)
return out
input_size = 25
output_size = 14
seq_len = 60
hidden_size = 256
hidden_layers = [256, 512]
num_layers = 6
dropout = 0.1
nhead = 8
num_transformer_layers = 1
model = LeapModel(
input_size=input_size,
seq_len=seq_len,
hidden_size=hidden_size,
output_size=output_size,
num_layers=num_layers,
hidden_layers=hidden_layers,
dropout=dropout,
bidirectional=True,
nhead=nhead,
num_transformer_layers=num_transformer_layers
).to(device)
class TCNBlock(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, dilation):
super(TCNBlock, self).__init__()
self.conv = nn.Conv1d(in_channels, out_channels, kernel_size,
padding=(kernel_size-1) * dilation // 2, dilation=dilation)
self.bn = nn.BatchNorm1d(out_channels)
self.activation_fn = nn.GELU()
def forward(self, x):
return self.activation_fn(self.bn(self.conv(x)))
class LeapModel(nn.Module):
def __init__(self,
input_size,
seq_len,
hidden_size,
output_size,
num_layers=1,
bidirectional=False,
dropout=0.3):
super().__init__()
self.input_size = input_size
self.seq_len = seq_len
self.hidden_size = hidden_size
self.num_layers = num_layers
self.bidirectional = bidirectional
self.output_size = output_size
# LSTM 层
self.rnn = nn.LSTM(input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
bidirectional=bidirectional,
batch_first=True,
dropout=dropout)
self.se = nn.Sequential(
nn.Linear(hidden_size*2, hidden_size//2),
nn.GELU(),
nn.Linear(hidden_size//2, hidden_size*2),
nn.Sigmoid()
)
self.tcn = nn.Sequential(
TCNBlock(hidden_size*2, hidden_size*2, kernel_size=3, dilation=1),
TCNBlock(hidden_size*2, hidden_size*2, kernel_size=3, dilation=2),
TCNBlock(hidden_size*2, hidden_size*2, kernel_size=3, dilation=4),
TCNBlock(hidden_size*2, hidden_size*2, kernel_size=3, dilation=8),
)
self.fc = nn.Linear(hidden_size*2, output_size)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
# RNN 层
outputs, _ = self.rnn(x)
se_weights = self.se(torch.mean(outputs, dim=1)).unsqueeze(1)
outputs = outputs * se_weights
tcn_input = outputs.permute(0, 2, 1)
tcn_output = self.tcn(tcn_input)
tcn_output = tcn_output.permute(0, 2, 1)
x = self.dropout(tcn_output)
x = self.fc(x)
# 重塑输出
o_s = x[:, :, :6]
o_s = o_s.permute(0, 2, 1).reshape(-1, 360)
o_g = x[:, :, 6:]
o_g = o_g.mean(dim=1)
out = torch.cat([o_s, o_g], dim=1) # (bs,368)
return out
input_size = 25
output_size = 14
seq_len = 60
hidden_size = 256
num_layers = 6
dropout = 0.1
model = LeapModel(
input_size=input_size,
seq_len=seq_len,
hidden_size=hidden_size,
output_size=output_size,
num_layers=num_layers,
dropout=dropout,
bidirectional=True,
).to(device)
class LeapModel(nn.Module):
def __init__(self,
input_size,
seq_len,
hidden_size,
output_size,
num_layers=1,
bidirectional=False,
dropout=.3,
hidden_layers=[128, 256]):
super().__init__()
self.input_size = input_size
self.seq_len = seq_len
self.hidden_size = hidden_size
self.num_layers = num_layers
self.bidirectional=bidirectional
self.output_size=output_size
self.rnn = nn.LSTM(input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
bidirectional=bidirectional,
batch_first=True,
dropout=0.1)
self.attention = nn.MultiheadAttention(embed_dim=hidden_size*2 if bidirectional else hidden_size,
num_heads=8,
batch_first=True)
if hidden_layers and len(hidden_layers):
first_layer = nn.Linear(hidden_size*2 if bidirectional else hidden_size, hidden_layers[0])
self.hidden_layers = nn.ModuleList(
[first_layer] + \
[nn.Linear(hidden_layers[i], hidden_layers[i+1]) for i in range(len(hidden_layers) - 1)]
)
for layer in self.hidden_layers:
nn.init.kaiming_normal_(layer.weight.data)
self.intermediate_layer = nn.Linear(hidden_layers[-1], self.input_size)
self.output_layer = nn.Linear(hidden_layers[-1], output_size)
nn.init.kaiming_normal_(self.output_layer.weight.data)
else:
self.hidden_layers = []
self.intermediate_layer = nn.Linear(hidden_size*2 if bidirectional else hidden_size, self.input_size)
self.output_layer = nn.Linear(hidden_size*2 if bidirectional else hidden_size, output_size)
nn.init.kaiming_normal_(self.output_layer.weight.data)
self.activation_fn = torch.nn.GELU()
self.dropout = nn.Dropout(dropout)
def forward(self, x):
batch_size = x.size(0)
outputs, hidden = self.rnn(x)
outputs = outputs.permute(1, 0, 2) # (seq_len, batch_size, hidden_size)
attn_output, _ = self.attention(outputs, outputs, outputs)
attn_output = attn_output.permute(1, 0, 2) # (batch_size, seq_len, hidden_size)
x = self.dropout(self.activation_fn(attn_output))
for hidden_layer in self.hidden_layers:
x = self.activation_fn(hidden_layer(x))
x = self.dropout(x)
x = self.output_layer(x)
# (-1,60,14) -> (-1,386)
o_s = x[:, :, :6]
o_s = o_s.permute(0,2,1).reshape(-1,360)
o_g = x[:, :, 6:]
o_g = o_g.mean(dim=1)
out = torch.cat([o_s, o_g], dim=1)
return out
input_size = 25
output_size = 14
seq_len = 60
hidden_size = 256
hidden_layers = [256, 512]
num_layers = 6
dropout = 0.1
model = LeapModel(
input_size=input_size,
seq_len=seq_len,
hidden_size=hidden_size,
output_size=output_size,
num_layers=num_layers,
hidden_layers=hidden_layers,
dropout=dropout,
bidirectional=True,
).to(device)
nn.SmoothL1Loss(reduction='mean') (比 mse 好 0.005~0.008)get_cosine_schedule_with_warmupGELU (比 relu 好 0.002~0.004)
targets_unpredictable = []
for target in weights:
if weights[target] == 0.:
targets_unpredictable.append(target)
for target in targets_unpredictable:
df_pred[target] = 0.
for target in [f'ptend_q0002_{i}' for i in range(12, 28)]:
df_pred[target] = -df_test[target.replace("ptend", "state")] * weights[target] / 1200.
参考链接: https://www.kaggle.com/competitions/leap-atmospheric-physics-ai-climsim/discussion/502484
w0 * pred0 + w1 * pred1 + ... + w5 * pred5selects = []
for idx_t, target in tqdm(enumerate(TARGETCOLS), total=len(TARGETCOLS)):
di = {}
for idx_p, prob in enumerate(probs):
di[idx_p] = r2_score(df_valid[target], probs[idx_p][:, idx_t])
selects.append(sorted(di, key=di.get, reverse=True)[:4])