ML之lightgbm.sklearn:LGBMClassifier函数的简介、具体案例、调参技巧之详细攻略
目录
LGBMClassifier函数的简介、具体案例、调参技巧
LGBMClassifier函数的调参技巧
1、lightGBM适合较大数据集的样本
2、建议使用更小的learning_rate和更大的num_iteration
3、样本不平衡调参技巧
4、调参时,可将参数字典分为两大类
LGBMClassifier函数简介
1、所有弱学习器的参数
2、具体函数解释
LGBMClassifier函数的简介、具体案例、调参技巧 LGBMClassifier函数的调参技巧 1、lightGBM适合较大数据集的样本
而对于较小的数据集( 2:
# Switch to using a multiclass objective in the underlying LGBM
instance
ova_aliases = "multiclassova", "multiclass_ova", "ova", "ovr"
if self._objective not in ova_aliases and not callable(self.
_objective):
self._objective = "multiclass"
if eval_metric in ('logloss', 'binary_logloss'):
eval_metric = "multi_logloss"
elif eval_metric in ('error', 'binary_error'):
eval_metric = "multi_error"
elif eval_metric in ('logloss', 'multi_logloss'):
eval_metric = 'binary_logloss'
elif eval_metric in ('error', 'multi_error'):
eval_metric = 'binary_error'
if eval_set is not None:
if isinstance(eval_set, tuple):
eval_set = [eval_set]
for i, (valid_x, valid_y) in enumerate(eval_set):
if valid_x is X and valid_y is y:
eval_set[i] = valid_x, _y
else:
eval_set[i] = valid_x, self._le.transform(valid_y)
super(LGBMClassifier, self).fit(X, _y, sample_weight=sample_weight,
init_score=init_score, eval_set=eval_set, eval_names=eval_names,
eval_sample_weight=eval_sample_weight,
eval_class_weight=eval_class_weight, eval_init_score=eval_init_score,
eval_metric=eval_metric,
early_stopping_rounds=early_stopping_rounds, verbose=verbose,
feature_name=feature_name, categorical_feature=categorical_feature,
callbacks=callbacks)
return self
fit.__doc__ = LGBMModel.fit.__doc__
def predict(self, X, raw_score=False, num_iteration=None,
pred_leaf=False, pred_contrib=False, **kwargs):
"""Docstring is inherited from the LGBMModel."""
result = self.predict_proba(X, raw_score, num_iteration,
pred_leaf, pred_contrib, **kwargs)
if raw_score or pred_leaf or pred_contrib:
return result
else:
class_index = np.argmax(result, axis=1)
return self._le.inverse_transform(class_index)
predict.__doc__ = LGBMModel.predict.__doc__
def predict_proba(self, X, raw_score=False, num_iteration=None,
pred_leaf=False, pred_contrib=False, **kwargs):
"""Return the predicted probability for each class for each sample. 返回每个类和每个样本的预测概率。
Parameters
----------
X : array-like or sparse matrix of shape = [n_samples, n_features] Input features matrix.
raw_score : bool, optional (default=False). Whether to predict raw scores.
num_iteration : int or None, optional (default=None). Limit number of iterations in the prediction. If None, if the best iteration exists, it is used; otherwise, all trees are used. If 2 or pred_leaf or pred_contrib:
return result
else:
return np.vstack((1. - result, result)).transpose()
@property
def classes_(self):
"""Get the class label array."""
if self._classes is None:
raise LGBMNotFittedError('No classes found. Need to call fit
beforehand.')
return self._classes
@property
def n_classes_(self):
"""Get the number of classes."""
if self._n_classes is None:
raise LGBMNotFittedError('No classes found. Need to call fit
beforehand.')
return self._n_classes