673. MAP - Charting Student Math Misunderstandings | map-charting-student-math-misunderstandings
非常感谢组织者和 Kaggle —— 这是我第一次以获奖为目标的参赛,也是我获得的第一枚奖牌:单人金牌 🏅。以下是该方法的简要总结。
将任务视为直接的多分类问题,并集成 15 个 QLoRA 微调模型(5 折 × 3 个主干模型:Qwen3-Reranker-8B, Qwen3-Embedding-8B, Qwen2.5-32B-Instruct)。
关键要素
AutoModelForSequenceClassification)每个主干模型训练 5 折 ⇒ 共 15 个模型。
weight_warmup_ratio ≈ 0.33 的总步数,以避免长尾类别早期更新不稳定。lr=1e-4, epochs=4, scheduler=cosine, weight_warmup_ratio=0.33, 有效全局批量大小=16。lr=1e-4, epochs=3, weight_warmup_ratio=0.33, 有效全局批量大小=32。LoRA 配置(典型):r=64, alpha=128, dropout=0.05,目标 Q/K/V/O 和 MLP 投影;分类头(score)被保存。
8B 模型
32B 模型
embed 模式下运行 vLLM → 应用外部 Linear 头生成 logits → 写入 logits。def split_model(args):
"""
将一个完整的序列分类模型拆分为基础模型(主干)和分类头。
"""
print(f"[*] Step: Splitting model...")
print(f" - Merged model path: {args.merged_model_path}")
os.makedirs(args.base_model_save_path, exist_ok=True)
os.makedirs(args.classifier_head_save_path, exist_ok=True)
full_model = AutoModelForSequenceClassification.from_pretrained(
args.merged_model_path,
device_map="cpu",
torch_dtype=torch.float16
)
tokenizer = AutoTokenizer.from_pretrained(args.merged_model_path)
base_model = full_model.model
classifier_head = full_model.score
print(f" - Saving base model to {args.base_model_save_path}...")
base_model.save_pretrained(args.base_model_save_path)
tokenizer.save_pretrained(args.base_model_save_path)
print(f" - Saving classifier head to {args.classifier_head_save_path}...")
torch.save(classifier_head.state_dict(), os.path.join(args.classifier_head_save_path, "classifier_state_dict.bin"))
classifier_config = {
"hidden_size": full_model.config.hidden_size,
"num_labels": full_model.config.num_labels,
"id2label": full_model.config.id2label,
"label2id": full_model.config.label2id,
"bias": classifier_head.bias is not None,
}
with open(os.path.join(args.classifier_head_save_path, "classifier_config.json"), 'w') as f:
json.dump(classifier_config, f)
import shutil
shutil.rmtree(args.merged_model_path)
print(f" - Removed merged model path: {args.merged_model_path}")
print("[*] Step: Splitting model finished successfully.")
def run_inference(args):
from vllm import LLM
"""
加载量化后的模型和外部自分类头,使用 vLLM 进行推理,并将结果保存到 JSON 文件。
"""
print(f"[*] Step: Running inference with vLLM...")
print(f" - Quantized model path: {args.quantized_model_path}")
print(f" - Classifier head path: {args.classifier_head_path}")
with open(os.path.join(args.classifier_head_path, "classifier_config.json"), "r") as f:
classifier_config = json.load(f)
classifier_head = nn.Linear(
classifier_config['hidden_size'],
classifier_config['num_labels'],
bias=bool(classifier_config.get("bias", True))
)
state = torch.load(os.path.join(args.classifier_head_path, "classifier_state_dict.bin"), map_location="cpu")
classifier_head.load_state_dict(state)
classifier_head.to("cuda").eval()
llm = LLM(
model=args.quantized_model_path,
quantization="gptq",
dtype="half",
task="embed",
override_pooler_config={"pooling_type": "LAST", "normalize": False},
tensor_parallel_size=1
)
print(" - vLLM engine initialized (task=embed).")
le = LabelEncoder()
train_df = pd.read_csv(args.label_data_path)
train_df.Misconception = train_df.Misconception.fillna('NA')
train_df['target'] = train_df.Category + ":" + train_df.Misconception
le.fit(train_df['target'])
all_results = []
try:
embed_outputs = llm.embed(TEST_PROMPTS)
embeddings = torch.from_numpy(np.stack([np.array(o.outputs.embedding, dtype=np.float32) for o in embed_outputs])).to("cuda")
with torch.no_grad():
logits = classifier_head(embeddings)
probs = torch.softmax(logits, dim=-1).cpu().numpy()
top_indices = np.argsort(-probs, axis=1)
for i in range(len(TEST_PROMPTS)):
pred_id = top_indices[i, 0]
top_3_ids = top_indices[i, :3]
result = {
"prompt_id": i,
"prompt_text": TEST_PROMPTS[i],
"predicted_label": le.inverse_transform([pred_id])[0],
"predicted_score": float(probs[i, pred_id]),
"top_3_labels": le.inverse_transform(top_3_ids).tolist(),
"top_3_scores": probs[i, top_3_ids].tolist()
}
all_results.append(result)
print(f" - vLLM Prediction for prompt #{i}: {result['predicted_label']} (Score: {result['predicted_score']:.4f})")
finally:
del llm, classifier_head; gc.collect(); torch.cuda.empty_cache()
with open(args.output_json_path, 'w') as f:
json.dump(all_results, f, indent=4)
print(f" - vLLM inference results saved to: {args.output_json_path}")
print("[*] Step: Inference finished successfully.")
group_size=64,对称,GAR (Group-Aware Reordering), true_sequential=True;~2k 条校准文本,格式与训练提示词相同。quantization="gptq", dtype="half", task="embed",override_pooler_config={"pooling_type": "LAST", "normalize": False}。我使用了加权 logit 混合——每个 32B 折权重为 2,每个 8B 折权重为 1——然后取 Top-3 用于 MAP@3,这比概率平均更稳定。
鉴于标签噪声、长尾分布和 MAP@3 目标,干净的划分 + 平衡训练 + 多个适中模型胜过更复杂的管道。
| 设置 | 公有 MAP@3 | 私有 MAP@3 |
|---|---|---|
| 单个 8B 模型 | 0.946 | 0.942 |
| 8B 主干 × 5 折 | 0.950 | 0.946 |
| 32B-Instruct × 5 折 | 0.950 | 0.947 |
| 集成 (15 个模型) | 0.951 | 0.948 |
非常感谢 Kaggle 社区的讨论、Notebook 和解决方案总结,它们为我的数据清洗、训练、量化和集成选择提供了信息。特别是: