613. AI Mathematical Olympiad - Progress Prize 1 | ai-mathematical-olympiad-prize
感谢 @abdurrafae 的早期分享,我们已经有了一个坚实的基础并以此开始。最初效果良好的方法有:
if jj >= n_repetitions-1:
if len(occurances)>=2:
freq1, freq2 = occurances[0][1], occurances[1][1]
if freq1 - freq2>=1:
cond_iter_increase = True
return best_stats[0][0], cond_iter_increase
如果 cond_iter_increase = True,那么我们就可以在下一个问题中使用额外的一轮迭代。
prob_count = 0
increase = False
if PRIVATE:
for test, sample_submission in iter_test:
prob_count = prob_count + 1
if prob_count>25:
n_repetitions = 21 if increase else 22
else:
n_repetitions = 11 if increase else 12
answer, increase = predict(test['problem'].values[0], n_repetitions)
sample_submission['answer'] = answer
env.predict(sample_submission)
print(test)
print(sample_submission)
提交 notebook 在这里:https://www.kaggle.com/code/indranilbhattacharya/aimo-2024-private-36th-place-solution/notebook
虽然这是一个月前完成的,但我们策略性地选择了这个提交,因为我们估计可能会有许多团队获得相同的分数,所以我们选择了一个较早的提交。后来我们也尝试了一些其他想法。虽然我们认为其中一些仍有潜力,但我们无法从验证集和公共 LB 中获得稳定的结果。
def final_pred(problem, n_repetitions, n_repetitions_mcq):
occurances, best_stats, cond_iter_increase, hard = predict(problem, n_repetitions)
print(occurances)
print("--"*10)
print(best_stats)
if not occurances and not best_stats:
return 24, False ## 当超过时间限制时会发生这种情况,所有其他问题都被预测为 24
if len(occurances)<2:
print("这个问题很简单,答案是:", best_stats[0][0])
return best_stats[0][0], cond_iter_increase
option1, option2 = occurances[0][0], occurances[1][0]
freq1, freq2 = occurances[0][1], occurances[1][1]
if freq1>=5:
print("这个问题很简单,答案是:", best_stats[0][0])
return best_stats[0][0], cond_iter_increase
all_options = [opt[0] for opt in occurances]
ans_dict = {ans:freq for ans, freq in occurances}
print("所有选项是:", all_options)
global model,tokenizer,USE_PAST_KEY
final_message = f"请解决以下问题:{problem}。提示:最终答案是一个非负整数模 1000,是以下整数之一:{all_options}。请逐步推理并检查哪一个是正确的,并将最终数值答案放在 \\boxed{{}} 中。\Solution:"
prompt = f"User: {final_message}"
print(prompt)
final_iterations = []
for m in tqdm(range(n_repetitions_mcq)):
torch.cuda.empty_cache()
gc.collect()
model_inputs = tokenizer(prompt, return_tensors='pt').to(model.device)
input_len = len(model_inputs['input_ids'][0])
generation_output = model.generate(**model_inputs,
max_new_tokens=TOTAL_TOKENS,
return_dict_in_generate=USE_PAST_KEY,
do_sample=True,
temperature = 0.7,
top_p = 1.0,
num_return_sequences=1, stopping_criteria=stopping_criteria)
output_ids = generation_output.sequences[0]
raw_output = tokenizer.decode(output_ids[input_len:], skip_special_tokens=True)
print(f"\FINAL ITERATION:\n{raw_output}\n")
result_output = process_text_output(raw_output)
print("来自最终迭代 MCQ 轮次的答案", m, ":" , result_output)
if result_output!=-1:
final_iterations.append(result_output)
if result_output in ans_dict:
ans_dict[result_output] = ans_dict[result_output] + 1
if ans_dict[result_output]>=5:
print("MCQ 轮次找到了最初最频繁的解决方案")
return result_output, cond_iter_increase
else:
ans_dict[result_output] = 1
sorted_answers_combined = sorted(ans_dict.items(), key=lambda x: x[1], reverse=True)
combined_ans, combined_freq = sorted_answers_combined[0][0], sorted_answers_combined[0][1]
if combined_freq>=4:
return combined_ans, cond_iter_increase
counter = Counter(final_iterations)
sorted_answers_mcq = sorted(counter.items(), key=lambda x: x[1], reverse=True)
print("来自 MCQ 轮次")
print(sorted_answers_mcq)
if sorted_answers_mcq and sorted_answers_mcq[0][1]>=2:
final_try = sorted_answers_mcq[0][0]
if final_try in all_options:
print("来自 MCQ 的最终答案是:", final_try)
return final_try, cond_iter_increase
if final_try not in all_options:
print("来自 MCQ 的最终答案不在选项中,是:",final_try)
return final_try, cond_iter_increase
else:
print("MCQ 答案不在选项中")
return option1, cond_iter_increase
else:
print("MCQ 轮次没有帮助,答案是:", option1)
return option1, cond_iter_increase