Deeplearning - Ai Deeplearning - Ai
Deeplearning - Ai Deeplearning - Ai
DeepLearning.AI makes these slides available for educational purposes. You may not use or
distribute these slides for commercial purposes. You may make copies of these slides and
use or distribute them for educational purposes as long as you cite DeepLearning.AI as the
source of the slides.
● Fast
● Flexible
● Easy-to-use
Basic mechanics
tf.data.Datase
tf.data.Dataset
t
...
tf.data.Datase map(func)
t batch(size)
tf.data.Datase ...
t
Basic mechanics
tf.data.Datase
tf.data.Dataset
t
...
tf.data.Datase map(func)
t batch(size)
tf.data.Datase ...
t
Basic mechanics
tf.data.Datase
tf.data.Dataset
t
...
tf.data.Datase map(func)
t batch(size)
tf.data.Datase ...
t
Using an iterator to navigate
dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3, 4])
it = iter(dataset)
Index Description
0 Male
1 Female
2 Nonbinary
3 Trans
4 Unassigned
... ...
First Last Addr Phone Gender Age
0 Male 0 Infant
1 Female 1 Child
2 Nonbinary 2 Teen
4 Unassigned 4 Adult
Bucketized column
The Iris dataset has all numeric data as its input features:
● SepalLength
● SepalWidth
● PetalLength
● PetalWidth
Specifying data types
# Then, bucketize the numeric column on the years 1960, 1980, and 2000.
bucketized_feature_column = tf.feature_column.bucketized_column(
source_column = numeric_feature_column,
boundaries = [1960, 1980, 2000])
Categorical identity column
Categorizing identity features
identity_feature_column = tf.feature_column.categorical_column_with_identity(
key='my_feature_b',
num_buckets=4) # Values [0, 4]
def input_fn():
...
return ({ 'my_feature_a':[7, 9, 5, 2], 'my_feature_b':[3, 1, 2, 2] },
[Label_values])
Categorical vocabulary column
Creating a categorical vocab column
vocabulary_feature_column = tf.feature_column.categorical_column_with_vocabulary_list(
key=feature_name,
vocabulary_feature_column = tf.feature_column.categorical_column_with_vocabulary_file(
key=feature_name,
vocabulary_file="product_class.txt",
vocabulary_size=3)
Creating a categorical vocab column
vocabulary_feature_column = tf.feature_column.categorical_column_with_vocabulary_list(
key=feature_name,
vocabulary_feature_column = tf.feature_column.categorical_column_with_vocabulary_file(
key=feature_name,
vocabulary_file="product_class.txt",
vocabulary_size=3)
Hashed column
hash(raw_feature) % hash_bucket_size
Hashed column
hashed_feature_column = tf.feature_column.categorical_column_with_hash_bucket(
key="some_feature",
latitude_bucket_fc = tf.feature_column.bucketized_column(
tf.feature_column.numeric_column('latitude'),
list(atlanta.latitude.edges))
longitude_bucket_fc = tf.feature_column.bucketized_column(
tf.feature_column.numeric_column('longitude'),
list(atlanta.longitude.edges))
crossed_lat_lon_fc = tf.feature_column.crossed_column(
latitude_bucket_fc = tf.feature_column.bucketized_column(
tf.feature_column.numeric_column('latitude'),
list(atlanta.latitude.edges))
longitude_bucket_fc = tf.feature_column.bucketized_column(
tf.feature_column.numeric_column('longitude'),
list(atlanta.longitude.edges))
crossed_lat_lon_fc = tf.feature_column.crossed_column(
embedding_dimensions = number_of_categories**0.25
# This means creating an embedding vector lookup table with one element for each
category.
embedding_column = tf.feature_column.embedding_column(
categorical_column=categorical_column,
dimension=embedding_dimensions)
Embedding column
embedding_dimensions = number_of_categories**0.25
# This means creating an embedding vector lookup table with one element for each
category.
embedding_column = tf.feature_column.embedding_column(
categorical_column=categorical_column,
dimension=embedding_dimensions)
Embedding column
embedding_dimensions = number_of_categories**0.25
# This means creating an embedding vector lookup table with one element for each
category.
embedding_column = tf.feature_column.embedding_column(
categorical_column=categorical_column,
dimension=embedding_dimensions)
Feature Columns with Keras
def demo(feature_column):
feature_layer = layers.DenseFeatures(feature_column)
...
Data sources
# Download dataset
DATA_URL = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz'
path = tf.keras.utils.get_file('mnist.npz', DATA_URL)
age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal target
df['thal'] = pd.Categorical(df['thal'])
df['thal'] = df.thal.cat.codes
df.head()
age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal target
target = df.pop('target')
dataset = tf.data.Dataset.from_tensor_slices((df.values, target.values))
import pathlib
DATA_URL =
'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz'
data_root_orig = tf.keras.utils.get_file(origin=DATA_URL,
fname='flower_photos', untar=True)
data_root = pathlib.Path(data_root_orig)
import pathlib
DATA_URL =
'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz'
data_root_orig = tf.keras.utils.get_file(origin=DATA_URL,
fname='flower_photos', untar=True)
data_root = pathlib.Path(data_root_orig)
import random
import IPython.display as display
all_image_paths = list(data_root.glob('*/*'))
all_image_paths = [str(path) for path in all_image_paths]
random.shuffle(all_image_paths)
image_count = len(all_image_paths)
image_count
image_path = random.choice(all_image_paths)
display.display(display.Image(image_path))
CSV Loading the structured dataset
TRAIN_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/train.csv"
train_file_path = tf.keras.utils.get_file("train.csv", TRAIN_DATA_URL)
df = pd.read_csv(train_file_path, sep=',')
df.head()
survived sex age n_siblings_spouses parch fare class deck embark_town alone
TRAIN_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/train.csv"
train_file_path = tf.keras.utils.get_file("train.csv", TRAIN_DATA_URL)
df = pd.read_csv(train_file_path, sep=',')
df.head()
survived sex age n_siblings_spouses parch fare class deck embark_town alone
22.0 1 0 7.2500
0
38.0 1 0 71.2833
1
26.0 0 0 7.9250
2
35.0 1 0 53.1000
3
CSV Leveraging features columns
numeric_columns = []
for feature in NUMERIC_FEATURES:
num_col = tf.feature_column.numeric_column(feature)
numeric_columns.append(tf.feature_column.indicator_column(num_col))
>>> numeric_columns
[IndicatorColumn(categorical_column=NumericColumn(key='age', shape=(1,),
default_value=None, dtype=tf.float32, normalizer_fn=None)),
IndicatorColumn(categorical_column=NumericColumn(key='n_siblings_spouses', shape=(1,),
default_value=None, dtype=tf.float32, normalizer_fn=None)),
IndicatorColumn(categorical_column=NumericColumn(key='parch', shape=(1,),
default_value=None, dtype=tf.float32, normalizer_fn=None)),
IndicatorColumn(categorical_column=NumericColumn(key='fare', shape=(1,),
default_value=None, dtype=tf.float32, normalizer_fn=None))]
CSV Leveraging features columns
numeric_columns = []
for feature in NUMERIC_FEATURES:
num_col = tf.feature_column.numeric_column(feature)
numeric_columns.append(tf.feature_column.indicator_column(num_col))
>>> numeric_columns
[IndicatorColumn(categorical_column=NumericColumn(key='age', shape=(1,),
default_value=None, dtype=tf.float32, normalizer_fn=None)),
IndicatorColumn(categorical_column=NumericColumn(key='n_siblings_spouses', shape=(1,),
default_value=None, dtype=tf.float32, normalizer_fn=None)),
IndicatorColumn(categorical_column=NumericColumn(key='parch', shape=(1,),
default_value=None, dtype=tf.float32, normalizer_fn=None)),
IndicatorColumn(categorical_column=NumericColumn(key='fare', shape=(1,),
default_value=None, dtype=tf.float32, normalizer_fn=None))]
CSV Leveraging features columns
numeric_columns = []
for feature in NUMERIC_FEATURES:
num_col = tf.feature_column.numeric_column(feature)
numeric_columns.append(tf.feature_column.indicator_column(num_col))
>>> numeric_columns
[IndicatorColumn(categorical_column=NumericColumn(key='age', shape=(1,),
default_value=None, dtype=tf.float32, normalizer_fn=None)),
IndicatorColumn(categorical_column=NumericColumn(key='n_siblings_spouses', shape=(1,),
default_value=None, dtype=tf.float32, normalizer_fn=None)),
IndicatorColumn(categorical_column=NumericColumn(key='parch', shape=(1,),
default_value=None, dtype=tf.float32, normalizer_fn=None)),
IndicatorColumn(categorical_column=NumericColumn(key='fare', shape=(1,),
default_value=None, dtype=tf.float32, normalizer_fn=None))]
CSV Leveraging features columns
numeric_columns = []
for feature in NUMERIC_FEATURES:
num_col = tf.feature_column.numeric_column(feature)
numeric_columns.append(tf.feature_column.indicator_column(num_col))
>>> numeric_columns
[IndicatorColumn(categorical_column=NumericColumn(key='age', shape=(1,),
default_value=None, dtype=tf.float32, normalizer_fn=None)),
IndicatorColumn(categorical_column=NumericColumn(key='n_siblings_spouses', shape=(1,),
default_value=None, dtype=tf.float32, normalizer_fn=None)),
IndicatorColumn(categorical_column=NumericColumn(key='parch', shape=(1,),
default_value=None, dtype=tf.float32, normalizer_fn=None)),
IndicatorColumn(categorical_column=NumericColumn(key='fare', shape=(1,),
default_value=None, dtype=tf.float32, normalizer_fn=None))]
CSV Leveraging features columns
>>> numeric_columns
[IndicatorColumn(categorical_column=NumericColumn(key='age',
shape=(1,), default_value=None, dtype=tf.float32,
normalizer_fn=None)),
...
CSV Categorical data
CATEGORIES = {
'sex': ['male', 'female'],
'class' : ['First', 'Second', 'Third'],
'deck' : ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'],
'embark_town' : ['Cherbourg', 'Southampton', 'Queenstown'],
'alone' : ['y', 'n']
}
cat_df = df[list(CATEGORIES.keys())]
cat_df.head()
CSV Categorical data
CATEGORIES = {
cat_df = df[list(CATEGORIES.keys())]
'sex': ['male', 'female'],
cat_df.head()
'class' : ['First', 'Second', 'Third'],
'deck' : ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'],
'embark_town' : ['Cherbourg', 'Southhampton', 'Queenstown'],
'alone' : ['y', 'n']
}
categorical_columns = []
for feature, vocab in CATEGORIES.items():
cat_col = tf.feature_column.categorical_column_with_vocabulary_list(
key=feature, vocabulary_list=vocab)
categorical_columns.append(tf.feature_column.indicator_column(cat_col))
CSV Categorical columns from raw data
categorical_columns = []
for feature, vocab in CATEGORIES.items():
cat_col = tf.feature_column.categorical_column_with_vocabulary_list(
key=feature, vocabulary_list=vocab)
categorical_columns.append(tf.feature_column.indicator_column(cat_col))
CSV Categorical columns from raw data
categorical_columns = []
for feature, vocab in CATEGORIES.items():
cat_col = tf.feature_column.categorical_column_with_vocabulary_list(
key=feature, vocabulary_list=vocab)
categorical_columns.append(tf.feature_column.indicator_column(cat_col))
CSV Categorical columns from raw data
>>> categorical_columns
[IndicatorColumn(categorical_column=VocabularyListCategorica
lColumn(key='sex', vocabulary_list=('male', 'female'),
dtype=tf.string, default_value=-1, num_oov_buckets=0)),
IndicatorColumn(categorical_column=VocabularyListCategorical
Column(key='class', vocabulary_list=('First', 'Second',
'Third'), dtype=tf.string, default_value=-1,
num_oov_buckets=0)),
...
CSV Categorical columns from raw data
>>> categorical_columns
[IndicatorColumn(categorical_column=VocabularyListCategorica
lColumn(key='sex', vocabulary_list=('male', 'female'),
dtype=tf.string, default_value=-1, num_oov_buckets=0)),
IndicatorColumn(categorical_column=VocabularyListCategorical
Column(key='class', vocabulary_list=('First', 'Second',
'Third'), dtype=tf.string, default_value=-1,
num_oov_buckets=0)),
...
Text Loading texts with TextLineDataset
DIRECTORY_URL =
'https://storage.googleapis.com/download.tensorflow.org/data/ill
iad/'
FILE_NAME = 'cowper.txt'
Text Loading texts with TextLineDataset
file_path = tf.keras.utils.get_file(name,
origin=DIRECTORY_URL + FILE_NAME)
lines_dataset = tf.data.TextLineDataset(file_path)
Text Inspecting texts
filenames = [tf_record_filename]
raw_dataset = tf.data.TFRecordDataset(filenames)
feature_description = {
'feature1': tf.io.FixedLenFeature((), tf.string),
'feature2': tf.io.FixedLenFeature((), tf.int64)
}
filenames = [tf_record_filename]
raw_dataset = tf.data.TFRecordDataset(filenames)
feature_description = {
'feature1': tf.io.FixedLenFeature((), tf.string),
'feature2': tf.io.FixedLenFeature((), tf.int64)
}
filenames = [tf_record_filename]
raw_dataset = tf.data.TFRecordDataset(filenames)
feature_description = {
'feature1': tf.io.FixedLenFeature((), tf.string),
'feature2': tf.io.FixedLenFeature((), tf.int64)
}
filenames = [tf_record_filename]
raw_dataset = tf.data.TFRecordDataset(filenames)
feature_description = {
'feature1': tf.io.FixedLenFeature((), tf.string),
'feature2': tf.io.FixedLenFeature((), tf.int64)
}
3.jpg
4.jpg
Dogs
5.jpg
6.jpg
7.jpg
Validation Cats
8.jpg
9.jpg
Dogs
10.jpg
Generators Keras ImageDataGenerator
def make_generator():
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1. / 255,
rotation_range=20, zoom_range=[0.8, 1.2])
train_generator = train_datagen.flow_from_directory(catsdogs,
target_size=(224, 224), class_mode='categorical',batch_size=32)
return train_generator
train_generator = tf.data.Dataset.from_generator(
make_generator,(tf.float32, tf.uint8))
Generators Keras ImageDataGenerator
def make_generator():
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1. / 255,
rotation_range=20, zoom_range=[0.8, 1.2])
train_generator = train_datagen.flow_from_directory(catsdogs,
target_size=(224, 224), class_mode='categorical',batch_size=32)
return train_generator
train_generator = tf.data.Dataset.from_generator(
make_generator,(tf.float32, tf.uint8))
Generators Keras ImageDataGenerator
def make_generator():
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1. / 255,
rotation_range=20, zoom_range=[0.8, 1.2])
train_generator = train_datagen.flow_from_directory(catsdogs,
target_size=(224, 224), class_mode='categorical',batch_size=32)
return train_generator
train_generator = tf.data.Dataset.from_generator(
make_generator,(tf.float32, tf.uint8))
MNIST Numpy
Images
Datasets TF-Flowers
TFRecords
Titanic CSV
Illiad Text
MNIST Numpy
Images
Datasets TF-Flowers
TFRecords
Titanic CSV
Illiad Text
MNIST Numpy
Images
Datasets TF-Flowers
TFRecords
Titanic CSV
Illiad Text
MNIST Numpy
Images
Datasets TF-Flowers
TFRecords
Titanic CSV
Illiad Text
MNIST Numpy
Images
Datasets TF-Flowers
TFRecords
Titanic CSV
Illiad Text
Numpy MNIST
X, y = next(iter(train_dataset))
input_shape = X.numpy().shape[1:]
model.compile(optimizer=tf.keras.optimizers.RMSprop(), loss=...,
metrics=...])
X, y = next(iter(train_dataset))
input_shape = X.numpy().shape[1:]
model.compile(optimizer=tf.keras.optimizers.RMSprop(), loss=...,
metrics=...])
csv_file = tf.keras.utils.get_file('heart.csv',
'https://storage.googleapis.com/applied-dl/heart.csv')
df = pd.read_csv(csv_file)
df['thal'] = pd.Categorical(df['thal'])
df['thal'] = df.thal.cat.codes
target = df.pop('target')
Pandas Identifying Heart Disease
df = pd.read_csv(csv_file)
df['thal'] = pd.Categorical(df['thal'])
df['thal'] = df.thal.cat.codes
target = df.pop('target')
age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal
model = tf.keras.Sequential([
tf.keras.layers.Dense(10, activation='relu'),
tf.keras.layers.Dense(10, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit(train_dataset, epochs=15)
Pandas Constructing the dataset for the Functional API
dict_slices = tf.data.Dataset.from_tensor_slices((df.to_dict('list'),
target.values)).batch(16)
dict_slices = tf.data.Dataset.from_tensor_slices((df.to_dict('list'),
target.values)).batch(16)
model.fit(dict_slices, epochs=15)
Pandas Training the model (Functional)
model.fit(dict_slices, epochs=15)
Images Classifying species of flowers
path_ds = tf.data.Dataset.from_tensor_slices(all_image_paths)
label_ds = tf.data.Dataset.from_tensor_slices(all_image_labels)
def preprocess_image(path):
image = tf.io.read_file(path)
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.resize(image, [192, 192])
image /= 255.0 # normalize to [0,1] range
return image
image_ds = path_ds.map(preprocess_image)
image_label_ds = tf.data.Dataset.zip((image_ds, label_ds))
Images Preprocessing and creating the dataset
path_ds = tf.data.Dataset.from_tensor_slices(all_image_paths)
label_ds = tf.data.Dataset.from_tensor_slices(all_image_labels)
def preprocess_image(path):
image = tf.io.read_file(path)
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.resize(image, [192, 192])
image /= 255.0 # normalize to [0,1] range
return image
image_ds = path_ds.map(preprocess_image)
image_label_ds = tf.data.Dataset.zip((image_ds, label_ds))
Images Preprocessing and creating the dataset
path_ds = tf.data.Dataset.from_tensor_slices(all_image_paths)
label_ds = tf.data.Dataset.from_tensor_slices(all_image_labels)
def preprocess_image(path):
image = tf.io.read_file(path)
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.resize(image, [192, 192])
image /= 255.0 # normalize to [0,1] range
return image
image_ds = path_ds.map(preprocess_image)
image_label_ds = tf.data.Dataset.zip((image_ds, label_ds))
Images Preprocessing and creating the dataset
path_ds = tf.data.Dataset.from_tensor_slices(all_image_paths)
label_ds = tf.data.Dataset.from_tensor_slices(all_image_labels)
def preprocess_image(path):
image = tf.io.read_file(path)
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.resize(image, [192, 192])
image /= 255.0 # normalize to [0,1] range
return image
image_ds = path_ds.map(preprocess_image)
image_label_ds = tf.data.Dataset.zip((image_ds, label_ds))
Images Training the model
BATCH_SIZE = 32
ds = image_label_ds.shuffle(
buffer_size=len(all_image_paths)).repeat().batch(BATCH_SIZE)
steps_per_epoch=tf.math.ceil(len(all_image_paths) / BATCH_SIZE).numpy()
train_file_path = tf.keras.utils.get_file(
"train.csv","https://storage.googleapis.com/tf-datasets/titanic/train.csv")
test_file_path = tf.keras.utils.get_file(
"Eval.csv", "https://storage.googleapis.com/tf-datasets/titanic/eval.csv")
raw_train_data = get_dataset(train_file_path)
raw_test_data = get_dataset(test_file_path)
CSV Predicting survivors with Titanic
train_file_path = tf.keras.utils.get_file(
"train.csv","https://storage.googleapis.com/tf-datasets/titanic/train.csv")
test_file_path = tf.keras.utils.get_file(
"Eval.csv", "https://storage.googleapis.com/tf-datasets/titanic/eval.csv")
raw_train_data = get_dataset(train_file_path)
raw_test_data = get_dataset(test_file_path)
CSV Predicting survivors with Titanic
train_file_path = tf.keras.utils.get_file(
"train.csv","https://storage.googleapis.com/tf-datasets/titanic/train.csv")
test_file_path = tf.keras.utils.get_file(
"Eval.csv", "https://storage.googleapis.com/tf-datasets/titanic/eval.csv")
raw_train_data = get_dataset(train_file_path)
raw_test_data = get_dataset(test_file_path)
CSV What happens after loading the CSV?
def show_batch(dataset):
for batch, label in dataset.take(1):
for key, value in batch.items():
print("{:20s}: {}".format(key,value.numpy()))
>>> show_batch(get_dataset(train_file_path))
sex : [b'female' b'female' b'female' b'male' b'male']
age : [40. 28. 52. 50. 34.]
n_siblings_spouses : [0 0 1 0 1]
parch : [0 0 0 0 0]
fare : [13. 7.75 78.2667 13. 21. ]
class : [b'Second' b'Third' b'First' b'Second' b'Second']
deck : [b'unknown' b'unknown' b'D' b'unknown' b'unknown']
embark_town : [b'Southampton' b'Queenstown' b'Cherbourg' b'Southampton' ..]
alone : [b'y' b'y' b'n' b'y' b'n']
CSV Getting data from named columns
>>> show_batch(temp_dataset)
SELECT_COLUMNS =['survived','age','n_siblings_spouses','class','deck','alone']
temp_dataset = get_dataset(train_file_path, select_columns=SELECT_COLUMNS)
>>> show_batch(temp_dataset)
age : [60. 34. 28. 40. 28.]
n_siblings_spouses : [1 1 1 0 0]
class : [b'Second' b'Third' b'Third' b'First' b'Third']
deck : [b'unknown' b'unknown' b'unknown' b'B' b'unknown']
alone : [b'n' b'n' b'n' b'y' b'y']
CSV Extracting features
packed_dataset = temp_dataset.map(pack)
CSV Extracting features
packed_dataset = temp_dataset.map(pack)
CSV Packing numeric features
class PackNumericFeatures(object):
def __init__(self, names):
self.names = names
packed_train_data = raw_train_data.map(
PackNumericFeatures(NUMERIC_FEATURES))
packed_test_data = raw_test_data.map(
PackNumericFeatures(NUMERIC_FEATURES))
CSV Packing numeric features
class PackNumericFeatures(object):
def __init__(self, names):
self.names = names
packed_train_data = raw_train_data.map(
PackNumericFeatures(NUMERIC_FEATURES))
packed_test_data = raw_test_data.map(
PackNumericFeatures(NUMERIC_FEATURES))
CSV Packing numeric features
class PackNumericFeatures(object):
def __init__(self, names):
self.names = names
packed_train_data = raw_train_data.map(
PackNumericFeatures(NUMERIC_FEATURES))
packed_test_data = raw_test_data.map(
PackNumericFeatures(NUMERIC_FEATURES))
CSV Showing packed features
>>> show_batch(packed_train_data)
sex : [b'male' b'male' ...]
class : [b'First' b'Third' ...]
deck : [b'unknown' b'unknown' ...]
embark_town : [b'Cherbourg' b'Southampton' ...]
alone : [b'n' b'y' ...]
numeric : [[28. 1. ...]
[49. 0. ...]
[27. 0. ...]
[0.83 0. ...]
[28. 0. ...]]
CSV Normalizing features
desc = pd.read_csv(train_file_path)[NUMERIC_FEATURES].describe()
normalizer = functools.partial(normalize_numeric_data,
mean=MEAN,
std=STD)
numeric_column = tf.feature_column.numeric_column(
'numeric',
normalizer_fn=normalizer,
shape=[len(NUMERIC_FEATURES)])
CSV Normalizing features
desc = pd.read_csv(train_file_path)[NUMERIC_FEATURES].describe()
normalizer = functools.partial(normalize_numeric_data,
mean=MEAN,
std=STD)
numeric_column = tf.feature_column.numeric_column(
'numeric',
normalizer_fn=normalizer,
shape=[len(NUMERIC_FEATURES)])
CSV Normalizing features
desc = pd.read_csv(train_file_path)[NUMERIC_FEATURES].describe()
normalizer = functools.partial(normalize_numeric_data,
mean=MEAN,
std=STD)
numeric_column = tf.feature_column.numeric_column(
'numeric',
normalizer_fn=normalizer,
shape=[len(NUMERIC_FEATURES)])
CSV Normalizing features
desc = pd.read_csv(train_file_path)[NUMERIC_FEATURES].describe()
normalizer = functools.partial(normalize_numeric_data,
mean=MEAN,
std=STD)
numeric_column = tf.feature_column.numeric_column(
'numeric',
normalizer_fn=normalizer,
shape=[len(NUMERIC_FEATURES)])
CSV Now for the categorical features
CATEGORIES = {
'sex': ['male', 'female'],
'class' : ['First', 'Second', 'Third'],
'deck' : ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'],
'embark_town' : ['Cherbourg', 'Southhampton', 'Queenstown'],
'alone' : ['y', 'n']
}
cat_feature_col = tf.feature_column.categorical_column_with_vocabulary_list(
key='class',
vocabulary_list=['First', 'Second', 'Third'])
categorical_column = tf.feature_column.indicator_column(cat_feature_col))
CSV Training the model
dense_features= tf.keras.layers.DenseFeatures(categorical_columns+numeric_columns)
model = tf.keras.Sequential([
dense_features,
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid'),
])
model.compile(
loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.fit(packed_train_data, epochs=20)
CSV Training the model
dense_features= tf.keras.layers.DenseFeatures(categorical_columns+numeric_columns)
model = tf.keras.Sequential([
dense_features,
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid'),
])
model.compile(
loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.fit(packed_train_data, epochs=20)
CSV Training the model
dense_features= tf.keras.layers.DenseFeatures(categorical_columns+numeric_columns)
model = tf.keras.Sequential([
dense_features,
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid'),
])
model.compile(
loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.fit(packed_train_data, epochs=20)
Text Identifying translators of a work
DIRECTORY_URL = 'https://storage.googleapis.com/download.tensorflow.org/data/illiad/'
FILE_NAMES = ['cowper.txt', 'derby.txt', 'butler.txt']
labeled_data_sets = []
for i, file_name in enumerate(FILE_NAMES):
file_path = tf.keras.utils.get_file(name, origin=DIRECTORY_URL+file_name)
lines_dataset = tf.data.TextLineDataset(file_path)
labeled_dataset = lines_dataset.map(lambda ex: labeler(ex, i))
labeled_data_sets.append(labeled_dataset)
Text Identifying translators of a work
DIRECTORY_URL = 'https://storage.googleapis.com/download.tensorflow.org/data/illiad/'
FILE_NAMES = ['cowper.txt', 'derby.txt', 'butler.txt']
labeled_data_sets = []
for i, file_name in enumerate(FILE_NAMES):
file_path = tf.keras.utils.get_file(name, origin=DIRECTORY_URL+file_name)
lines_dataset = tf.data.TextLineDataset(file_path)
labeled_dataset = lines_dataset.map(lambda ex: labeler(ex, i))
labeled_data_sets.append(labeled_dataset)
Text Preparing the dataset
dataset = labeled_data_sets[0]
for labeled_dataset in labeled_data_sets[1:]:
dataset = dataset.concatenate(labeled_dataset)
dataset = dataset.shuffle(buffer_size=50000)
tokenizer = tfds.features.text.Tokenizer()
vocabulary_set = set()
for text_tensor, _ in all_labeled_data:
some_tokens = tokenizer.tokenize(text_tensor.numpy())
vocabulary_set.update(some_tokens)
vocab_size = len(vocabulary_set)
>>> vocab_size
17178
https://www.coursera.org/learn/natural-language-processing-tensorflow
Text Encode an example
# Encode an example
encoded_text = encoder.encode(original_text)
all_encoded_data = all_labeled_data.map(encode_map_fn)
Text Prepare the dataset
BUFFER_SIZE = 50000
BATCH_SIZE = 64
TAKE_SIZE = 5000
train_data = all_encoded_data.skip(TAKE_SIZE).shuffle(BUFFER_SIZE)
train_data = train_data.padded_batch(BATCH_SIZE, padded_shapes=([-1],[]))
test_data = all_encoded_data.take(TAKE_SIZE)
test_data = test_data.padded_batch(BATCH_SIZE, padded_shapes=([-1],[]))
Text Training the model
model = tf.keras.Sequential([
tf.keras.layers.Embedding(vocab_size, 64),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
tf.keras.Sequential([
tf.keras.layers.Dense(units, activation='relu') for units in [64, 64]
]),
tf.keras.layers.Dense(3, activation='softmax')
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(train_data, epochs=3)