20分钟
示例:
import lightgbm as lgb
import numpy as np
class DatasetTest:
def __init__(self):
self._matrix1 = lgb.Dataset('data/train.svm.txt')
self._matrix2 = lgb.Dataset(data=np.arange(0, 12).reshape((4, 3)),
label=[1, 2, 3, 4], weight=[0.5, 0.4, 0.3, 0.2],
silent=False, feature_name=['a', 'b', 'c'])
def print(self,matrix):
'''
Matrix 构建尚未完成时的属性
:param matrix:
:return:
'''
print('data: %s' % matrix.data)
print('label: %s' % matrix.label)
print('weight: %s' % matrix.weight)
print('init_score: %s' % matrix.init_score)
print('group: %s' % matrix.group)
def run_method(self,matrix):
'''
测试一些 方法
:param matrix:
:return:
'''
print('get_ref_chain():', matrix.get_ref_chain(ref_limit=10))
# get_ref_chain(): {<lightgbm.basic.Dataset object at 0x7f29cd762f28>}
print('subset():', matrix.subset(used_indices=[0,1]))
# subset(): <lightgbm.basic.Dataset object at 0x7f29a4aeb518>
def test(self):
self.print(self._matrix1)
# data: data/train.svm.txt
# label: None
# weight: None
# init_score: None
# group: None
self.print(self._matrix2)
# data: [[ 0 1 2]
# [ 3 4 5]
# [ 6 7 8]
# [ 9 10 11]]
# label: [1, 2, 3, 4]
# weight: [0.5, 0.4, 0.3, 0.2]
# init_score: No
self.run_method(self._matrix2)
5. 你要确保你的数据集的样本数足够大,从而满足一些限制条件(如:单个节点的最小样本数、单个桶的最小样本数等)。否则会直接报错。
学员评价