575. RSNA 2023 Abdominal Trauma Detection | rsna-2023-abdominal-trauma-detection
感谢Kaggle和主办方举办这场有趣的竞赛,衷心感谢Kaggle社区中各位同仁提出的卓越想法和进行的深入讨论,特别感谢我的队友 @siwooyong。
我们的解决方案采用集成方法,结合了不使用分割的单阶段方法和使用分割的两阶段方法。
当图像尺寸大于(512, 512)时,我们裁剪像素密度较高的区域获得(512, 512)图像,然后将每个序列调整为(96, 256, 256)的输入尺寸,预处理步骤参考了hengck23的示例代码。
我们尝试独立预测每个目标,共输出13个结果:
class RSNAClassifier(nn.Module):
def __init__(self, model_arch, hidden_dim=128, seq_len=3, pretrained=False):
super().__init__()
self.seq_len = seq_len
self.model_arch = model_arch
self.model = timm.create_model(model_arch, in_chans=3, pretrained=pretrained)
cnn_feature = self.model.fc.in_features
self.model.global_pool = nn.Identity()
self.model.fc = nn.Identity()
self.pooling = nn.AdaptiveAvgPool2d(1)
self.spatialdropout = SpatialDropout(CFG.dropout)
self.gru = nn.GRU(cnn_feature, hidden_dim, num_layers=2, batch_first=True, bidirectional=True)
self.mlp_attention_layer = MLPAttentionNetwork(2 * hidden_dim)
self.logits = nn.Sequential(
nn.Linear(2 * hidden_dim, 13),
)
def forward(self, x):
bs = x.size(0)
x = x.reshape(bs*self.seq_len//3, 3, x.size(2), x.size(3))
features = self.model(x)
features = self.pooling(features).view(bs*self.seq_len//3, -1)
features = self.spatialdropout(features)
features = features.reshape(bs, self.seq_len//3, -1)
features, _ = self.gru(features)
atten_out = self.mlp_attention_layer(features)
pred = self.logits(atten_out)
pred = pred.view(bs, -1)
return pred
inds = np.random.choice(np.arange(1, 96-1), 32, replace=False)
inds.sort()
inds = np.stack([inds-1, inds, inds+1]).T.flatten()
image = image[inds]损失函数: BCEWithLogitsLoss
学习率调度: CosineAnnealingLR
优化器: AdamW
学习率: 5e-5
我们根据竞赛指标的权重对输出进行加权:
preds.loc[:, ['bowel_injury', 'kidney_low', 'liver_low', 'spleen_low']] *= 2
preds.loc[:, ['kidney_high', 'liver_high', 'spleen_high']] *= 4
preds.loc[:, ['extravasation_injury']] *= 6
模型: regnety002 + unet
即使只使用160/200的数据(第1折)作为训练数据,模型也已表现出良好性能:
class SegModel(nn.Module):
def __init__(self):
super(SegModel, self).__init__()
self.n_classes = len([
'background',
'liver',
'spleen',
'left kidney',
'right kidney',
'bowel'
])
in_chans = 1
self.encoder = timm.create_model(
'regnety_002',
pretrained=False,
features_only=True,
in_chans=in_chans,
)
encoder_channels = tuple(
[in_chans] +
[
self.encoder.feature_info[i]["num_chs"]
for i in range(len(self.encoder.feature_info))
]
)
self.decoder = UnetDecoder(
encoder_channels=encoder_channels,
decoder_channels=(256, 128, 64, 32, 16),
n_blocks=5,
use_batchnorm=True,
center=False,
attention_type=None,
)
self.segmentation_head = SegmentationHead(
in_channels=16,
out_channels=self.n_classes,
activation=None,
kernel_size=3,
)
self.bce_seg = nn.BCEWithLogitsLoss()
def forward(self, x_in):
enc_out = self.encoder(x_in)
decoder_out = self.decoder(*[x_in] + enc_out)
x_seg = self.segmentation_head(decoder_out)
return nn.Sigmoid()(x_seg)
使用阶段1的分割结果裁剪肝脏、脾脏和肾脏,每个器官调整至(96, 224, 224)尺寸(裁剪时添加10像素padding)。同时完整CT数据调整为(128, 224, 224),模型共接收四个输入(完整视频、肝脏裁剪、脾脏裁剪、肾脏裁剪)。
我们尝试了自定义any_injury_loss函数但未提升性能。实验表明输入通道数为2时效果最佳,且为每个器官使用独立的CNN和transformer模型比共享模型效果更好。最终选用较小的RegNet002模型:
class FeatureExtractor(nn.Module):
def __init__(self, hidden, num_channel):
super(FeatureExtractor, self).__init__()
self.hidden = hidden
self.num_channel = num_channel
self.cnn = timm.create_model(model_name='regnety_002',
pretrained=True,
num_classes=0,
in_chans=num_channel)
self.fc = nn.Linear(hidden, hidden//2)
def forward(self, x):
batch_size, num_frame, h, w = x.shape
x = x.reshape(batch_size, num_frame//self.num_channel, self.num_channel, h, w)
x = x.reshape(-1, self.num_channel, h, w)
x = self.cnn(x)
x = x.reshape(batch_size, num_frame//self.num_channel, self.hidden)
x = self.fc(x)
return x
class ContextProcessor(nn.Module):
def __init__(self, hidden):
super(ContextProcessor, self).__init__()
self.transformer = RobertaPreLayerNormModel(
RobertaPreLayerNormConfig(
hidden_size=hidden//2,
num_hidden_layers=1,
num_attention_heads=4,
intermediate_size=hidden*2,
hidden_act='gelu_new',
)
)
del self.transformer.embeddings.word_embeddings
self.dense = nn.Linear(hidden, hidden)
self.activation = nn.ReLU()
def forward(self, x):
x = self.transformer(inputs_embeds=x).last_hidden_state
apool = torch.mean(x, dim=1)
mpool, _ = torch.max(x, dim=1)
x = torch.cat([mpool, apool], dim=-1)
x = self.dense(x)
x = self.activation(x)
return x
class Custom3DCNN(nn.Module):
def __init__(self, hidden=368, num_channel=2):
super(Custom3DCNN, self).__init__()
self.full_extractor = FeatureExtractor(hidden=hidden, num_channel=num_channel)
self.kidney_extractor = FeatureExtractor(hidden=hidden, num_channel=num_channel)
self.liver_extractor = FeatureExtractor(hidden=hidden, num_channel=num_channel)
self.spleen_extractor = FeatureExtractor(hidden=hidden, num_channel=num_channel)
self.full_processor = ContextProcessor(hidden=hidden)
self.kidney_processor = ContextProcessor(hidden=hidden)
self.liver_processor = ContextProcessor(hidden=hidden)
self.spleen_processor = ContextProcessor(hidden=hidden)
self.bowel = nn.Linear(hidden, 2)
self.extravasation = nn.Linear(hidden, 2)
self.kidney = nn.Linear(hidden, 3)
self.liver = nn.Linear(hidden, 3)
self.spleen = nn.Linear(hidden, 3)
self.softmax = nn.Softmax(dim=-1)
def forward(self, full_input, crop_liver, crop_spleen, crop_kidney, mask, mode):
full_output = self.full_extractor(full_input)
kidney_output = self.kidney_extractor(crop_kidney)
liver_output = self.liver_extractor(crop_liver)
spleen_output = self.spleen_extractor(crop_spleen)
full_output2 = self.full_processor(torch.cat([full_output, kidney_output, liver_output, spleen_output], dim=1))
kidney_output2 = self.kidney_processor(torch.cat([full_output, kidney_output], dim=1))
liver_output2 = self.liver_processor(torch.cat([full_output, liver_output], dim=1))
spleen_output2 = self.spleen_processor(torch.cat([full_output, spleen_output], dim=1))
bowel = self.bowel(full_output2)
extravasation = self.extravasation(full_output2)
kidney = self.kidney(kidney_output2)
liver = self.liver(liver_output2)
spleen = self.spleen(spleen_output2)
any_injury = torch.stack([
self.softmax(bowel)[:, 0],
self.softmax(extravasation)[:, 0],
self.softmax(kidney)[:, 0],
self.softmax(liver)[:, 0],
self.softmax(spleen)[:, 0]
], dim=-1)
any_injury = 1 - any_injury
any_injury, _ = any_injury.max(1)
return bowel, extravasation, kidney, liver, spleen, any_injury
class CustomAug(nn.Module):
def __init__(self, prob=0.5, s=224):
super(CustomAug, self).__init__()
self.prob = prob
self.do_random_rotate = v2.RandomRotation(
degrees=(-45, 45),
interpolation=torchvision.transforms.InterpolationMode.BILINEAR,
expand=False,
center=None,
fill=0
)
self.do_random_scale = v2.ScaleJitter(
target_size=[s, s],
scale_range=(0.8, 1.2),
interpolation=torchvision.transforms.InterpolationMode.BILINEAR,
antialias=True)
self.do_random_crop = v2.RandomCrop(
size=[s, s],
pad_if_needed=True,
fill=0,
padding_mode='constant')
self.do_horizontal_flip = v2.RandomHorizontalFlip(self.prob)
self.do_vertical_flip = v2.RandomVerticalFlip(self.prob)
def forward(self, x):
if np.random.rand() < self.prob:
x = self.do_random_rotate(x)
if np.random.rand() < self.prob:
x = self.do_random_scale(x)
x = self.do_random_crop(x)
x = self.do_horizontal_flip(x)
x = self.do_vertical_flip(x)
return x
损失函数: nn.CrossEntropyLoss(无类别权重)
学习率调度: cosine_schedule_with_warmup
优化器: AdamW
学习率: 2e-4
我们乘以使每折验证分数最大化的权重:
weights = [
[0.9, 4, 2, 4, 2, 6, 6, 6],
[0.9, 1, 4, 3, 2, 5, 5, 6],
[0.2, 3, 2, 1, 2, 4, 2, 6],
[0.5, 2, 2, 2, 2, 2, 6, 6],
[1, 2, 3, 2, 6, 3, 6, 5]
]
y_pred = pred_df.copy().groupby('patient_id').mean().reset_index()
w1, w2, w3, w4, w5, w6, w7, w8 = weights[i]
y_pred['bowel_injury'] *= w1
y_pred['kidney_low'] *= w2
y_pred['liver_low'] *= w3
y_pred['spleen_low'] *= w4
y_pred['kidney_high'] *= w5
y_pred['liver_high'] *= w6
y_pred['spleen_high'] *= w7
y_pred['extravasation_injury'] *= w8
y_pred = y_pred ** 0.8