
import matplotlib as mpl import matplotlib.pyplot as plt import numpy as np import sklearn import pandas as pd import os import sys import time import tensorflow as tf
#tf
favorite_books = [name.encode('utf-8') for name in ['machine learning', 'cc150']] favorite_books_bytelist = tf.train.BytesList(value=favorite_books) print(favorite_books_bytelist) hours_floatlist = tf.train.FloatList(value=[15.5,9.5,70,80]) print(hours_floatlist)
age_int64list = tf.train.Int64List(value=[42]) print(age_int64list)
features = tf.train.Features( feature={ "favorite_books": tf.train.Feature(bytes_list=favorite_books_bytelist), "hours": tf.train.Feature(float_list=hours_floatlist), "age": tf.train.Feature(int64_list=age_int64list) } ) print(features)
example = tf.train.Example(features=features) print(example) serialized_example = example.SerializeToString() print(serialized_example)
output_dir = 'tfrecord_basic' if not os.path.exists(output_dir): os.mkdir(output_dir) filename = "test.tfrecords" filename_fullpath = os.path.join(output_dir,filename) with tf.io.TFRecordWriter(filename_fullpath) as writer: for i in range(3): writer.write(serialized_example)
dataset = tf.data.TFRecordDataset([filename_fullpath]) for serialized_example_tensor in dataset: print(serialized_example_tensor)
expected_features = { "favorite_books":tf.io.VarLenFeature(dtype=tf.string), "hours":tf.io.VarLenFeature(dtype=tf.float32), "age":tf.io.FixedLenFeature([],dtype=tf.int64), } dataset = tf.data.TFRecordDataset([filename_fullpath]) for serialized_example_tensor in dataset: example = tf.io.parse_single_example( serialized_example_tensor, expected_features) books = tf.sparse.to_dense(example["favorite_books"], default_value=b"") for book in books: print(book.numpy().decode("UTF-8"))
filename_fullpath_zip = filename_fullpath+'.zip' options = tf.io.TFRecordOptions(compression_type="GZIP") with tf.io.TFRecordWriter(filename_fullpath_zip,options) as writer: for i in range(3): writer.write(serialized_example)
dataset_zip = tf.data.TFRecordDataset([filename_fullpath_zip], compression_type="GZIP") for serialized_example_tensor in dataset_zip: example = tf.io.parse_single_example( serialized_example_tensor, expected_features) books = tf.sparse.to_dense(example["favorite_books"], default_value=b"") for book in books: print(book.numpy().decode("UTF-8"))