441. Tabular Playground Series - May 2021 | tabular-playground-series-may-2021
大家好 😊
这次比赛已经结束,我想分享一些想法……
这次我分享我的具体解决方案。你可以看到下面的工作流程:
在第一阶段,我使用这种类型的代码来获取模型:
def lmodelv(X_train,y_train,X_val,y_val,lgb_params):
d_train = lgb.Dataset(X_train, label=y_train)
d_valid = lgb.Dataset(X_val, label=y_val)
watchlist = [d_train, d_valid]
model = lgb.train(lgb_params,
train_set=d_train,
valid_sets=watchlist,
verbose_eval=0,
early_stopping_rounds=EARLY_STOPPING_ROUNDS)
return model
def training_lgbv(X_train,y_train,X_test,txt='lgb'):
skf=StratifiedKFold(n_splits = NUM_FOLDS,shuffle = True,random_state = RANDOM_STATE)
yv=np.zeros((len(X_train),4))
yt=np.zeros((len(X_test),4))
for fold,(idx_tr,idx_vl) in enumerate(skf.split(X_train,y_train)):
X_tr,y_tr=pd.DataFrame(X_train).iloc[idx_tr],y_train.iloc[idx_tr]
X_vl,y_vl=pd.DataFrame(X_train).iloc[idx_vl],y_train.iloc[idx_vl]
model=lmodelv(X_tr,y_tr,X_vl,y_vl,lgb_tune)
# 查看模式推断、评估和预测
yv[idx_vl]=model.predict(X_vl)
yt+=model.predict(X_test)/NUM_FOLDS #predict_proba
print(f"Found Metric in {fold}:{log_loss(y_vl,yv[idx_vl])}")
print(f'Results of the training: {log_loss(y_train,yv)}')
np.save(path+f'preds/train_{txt}_{log_loss(y_train,yv)}.npy',yv)
np.save(path+f'preds/test_{txt}_{log_loss(y_train,yv)}.npy',yt)
return yv,yt
第一个函数用于定义模型,另一个用于训练。我这样做是因为你可以通过两种方式优化算法。(例如微调 https://www.kaggle.com/awwalmalhi/extreme-fine-tuning-lgbm-using-7-step-training)
类似地,我编写了用于