362. Google QUEST Q&A Labeling | google-quest-challenge
感谢 Google 作为主办方以及 Kaggle 团队!通过这次比赛,我真的学到了很多 NLP 技术,特别是 BERT。
class OptimPreds(object):
def __init__(self, df_train):
self.score_range_dict = {}
for i, c in enumerate(df_train.columns[11:]):
cnt = df_train[c].value_counts(normalize=True).sort_index()
self.score_range_dict[i] = [cnt.index.values.tolist(), cnt.values.tolist()]
def predict(self, preds, i):
return pd.cut(rank_average(preds), [-np.inf] + np.cumsum(self.score_range_dict[i][1])[:-1].tolist() + [np.inf], labels = self.score_range_dict[i][0])
def optim_predict(pred, do_round=True, target=[ 2, 5, 7, 9, 11, 12, 13, 14, 15, 16, 19]):
for i in range(pred.shape[1]):
if i in target:
pred[:,i] = optim.predict(pred[:,i], i)
return pred