Cifar10数据集不讲了吧,入门必备,下载地址: https://www.cs.toronto.edu/~kriz/cifar.html 官方提供三种形式的下载:
可以看出是不提供图片形式的下载的,需要进行数据转换,虽然可以直接读成ndarray,但是对于初学者可能读图更直观点
自己写了个转换程序(将bytes形式的文件转换为图片并分类存储):
def recover_cifar10(cifar10_dir):
"""Save cifar 10 data(only training data) to files.
Args:
cifar10_dir: cifar 10 dataset path(python version).
Returns:
"""
save_dir = './data/cifar10'
def save_batch(path):
with open(path, 'rb') as fo:
batch_data = pickle.load(fo, encoding='bytes')
X, Y, N = batch_data[b'data'], batch_data[b'labels'], batch_data[b'filenames']
batch_size = X.shape[0]
for x in range(0, batch_size):
sample = X[x].reshape((3, 32, 32))
r, g, b = sample[0], sample[1], sample[2]
r0, g0, b0 = Image.fromarray(r), Image.fromarray(g), Image.fromarray(b)
sample_rgb = Image.merge('RGB', (r0, g0, b0))
sample_label = bytes.decode(label_names[Y[x]]) # image label
sample_name = bytes.decode(N[x])
cat_dir = join(save_dir, sample_label)
if not os.path.exists(cat_dir):
os.makedirs(cat_dir)
sample_save_path = join(cat_dir, sample_name)
sample_rgb.save(sample_save_path)
with open(join(cifar10_dir, 'batches.meta'), 'rb') as fo:
meta_data = pickle.load(fo, encoding='bytes')
label_names = meta_data[b'label_names']
for x in range(1, 6):
path = join(cifar10_dir, 'data_batch_%s' % str(x))
save_batch(path)
有需要的拿走,可以直接用