Exp 8 Machine Translation
Exp 8 Machine Translation
French
1.Importing the libraries
2.Reading the data
1.Initialization of Lists
2.Reading the File:
3.Processing Each Line:
3.Preparing input data for the Encoder
1.Collects and stores input sentences.
2.Tokenizes the sentences into sequences of integers.
3.Determines the maximum length of the input sequences.
4.add spaces to ensure they have the same length.
5: Create Input Word Dictionary and Determine Number of Tokens
4.Preparing input data for the Decoder
1.Collects Output Lines with Special Tokens
2.Tokenizes the Output Lines:
3.Determines Maximum Output Sequence Length:
4.add spaces to the Output Sequences:
5.Creates Output Word Dictionary and Determines Number of Tokens:
3.Define Decoder:
1.input layer
2.embedding layer
3.LSTM layer
4.output layer
4.Define the Model:
batch_size=64
epochs=25
latent_dim=256
num_samples=10000
data_path=r'D:\KMIT\NLP_Lab\Experiments\Dataset\Exp7_MT/eng-fra.txt'
lines=[]
f = open(data_path, 'r', encoding="utf8")
for l in f:
lines.append(l)
lines[:20]
['Go.\tVa !\n',
'Run!\tCours\u202f!\n',
'Run!\tCourez\u202f!\n',
'Wow!\tÇa alors\u202f!\n',
'Fire!\tAu feu !\n',
"Help!\tÀ l'aide\u202f!\n",
'Jump.\tSaute.\n',
'Stop!\tÇa suffit\u202f!\n',
'Stop!\tStop\u202f!\n',
'Stop!\tArrête-toi !\n',
'Wait!\tAttends !\n',
'Wait!\tAttendez !\n',
'I see.\tJe comprends.\n',
"I try.\tJ'essaye.\n",
"I won!\tJ'ai gagné !\n",
"I won!\tJe l'ai emporté !\n",
'Oh no!\tOh non !\n',
'Attack!\tAttaque !\n',
'Attack!\tAttaquez !\n',
'Cheers!\tSanté !\n']
len(lines)
135842
input_texts = []
target_texts = []
input_characters = set()
target_characters = set()
input_characters=sorted(list(input_characters))
target_characters=sorted(list(target_characters))
num_encoder_tokens=len(input_characters)
num_decoder_tokens=len(target_characters)
input_characters
[' ',
'!',
'"',
'$',
'&',
"'",
',',
'-',
'.',
'0',
'1',
'2',
'3',
'4',
'5',
'6',
'7',
'8',
'9',
':',
'?',
'A',
'B',
'C',
'D',
'E',
'F',
'G',
'H',
'I',
'J',
'K',
'L',
'M',
'N',
'O',
'P',
'Q',
'R',
'S',
'T',
'U',
'V',
'W',
'Y',
'Z',
'a',
'b',
'c',
'd',
'e',
'f',
'g',
'h',
'i',
'j',
'k',
'l',
'm',
'n',
'o',
'p',
'q',
'r',
's',
't',
'u',
'v',
'w',
'x',
'y',
'z',
'’']
#input_token_index
Summary of Process:
1) Turn the sentences into 3 Numpy arrays, encoder_input_data,
decoder_input_data, decoder_target_data:
Initializing 3D arrays
encoder_input_data = np.zeros(
(len(input_texts), max_encoder_seq_length, num_encoder_tokens),
dtype='float32')
decoder_input_data = np.zeros(
(len(input_texts), max_decoder_seq_length, num_decoder_tokens),
dtype='float32')
decoder_target_data = np.zeros(
(len(input_texts), max_decoder_seq_length, num_decoder_tokens),
dtype='float32')
print(encoder_input_data.shape)
print(decoder_input_data.shape)
print(decoder_target_data.shape)
encoder_input_data[:,:,50]
array([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 1., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]], dtype=float32)
state_c
# Run training
model.compile(optimizer='rmsprop', loss='categorical_crossentropy',
metrics=['accuracy'])
model.fit([encoder_input_data, decoder_input_data],
decoder_target_data,
batch_size=batch_size,
epochs=epochs,
validation_split=0.2)
Epoch 1/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 55s 213ms/step - accuracy: 0.6764 -
loss: 1.5467 - val_accuracy: 0.6634 - val_loss: 1.2065
Epoch 2/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 51s 203ms/step - accuracy: 0.7281 -
loss: 0.9781 - val_accuracy: 0.7167 - val_loss: 0.9704
Epoch 3/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 49s 198ms/step - accuracy: 0.7712 -
loss: 0.7919 - val_accuracy: 0.7466 - val_loss: 0.8657
Epoch 4/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 52s 208ms/step - accuracy: 0.7904 -
loss: 0.7153 - val_accuracy: 0.7616 - val_loss: 0.8103
Epoch 5/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 51s 202ms/step - accuracy: 0.8031 -
loss: 0.6692 - val_accuracy: 0.7795 - val_loss: 0.7631
Epoch 6/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 50s 199ms/step - accuracy: 0.8150 -
loss: 0.6262 - val_accuracy: 0.7862 - val_loss: 0.7326
Epoch 7/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 49s 196ms/step - accuracy: 0.8240 -
loss: 0.5967 - val_accuracy: 0.7945 - val_loss: 0.7018
Epoch 8/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 50s 198ms/step - accuracy: 0.8316 -
loss: 0.5722 - val_accuracy: 0.7993 - val_loss: 0.6796
Epoch 9/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 49s 196ms/step - accuracy: 0.8369 -
loss: 0.5511 - val_accuracy: 0.8043 - val_loss: 0.6602
Epoch 10/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 49s 194ms/step - accuracy: 0.8427 -
loss: 0.5293 - val_accuracy: 0.8082 - val_loss: 0.6432
Epoch 11/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 47s 188ms/step - accuracy: 0.8469 -
loss: 0.5144 - val_accuracy: 0.8125 - val_loss: 0.6304
Epoch 12/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 41s 164ms/step - accuracy: 0.8502 -
loss: 0.5001 - val_accuracy: 0.8158 - val_loss: 0.6197
Epoch 13/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 49s 196ms/step - accuracy: 0.8553 -
loss: 0.4836 - val_accuracy: 0.8206 - val_loss: 0.6035
Epoch 14/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 652s 3s/step - accuracy: 0.8589 -
loss: 0.4720 - val_accuracy: 0.8244 - val_loss: 0.5906
Epoch 15/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 48s 194ms/step - accuracy: 0.8625 -
loss: 0.4610 - val_accuracy: 0.8270 - val_loss: 0.5818
Epoch 16/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 49s 197ms/step - accuracy: 0.8655 -
loss: 0.4502 - val_accuracy: 0.8290 - val_loss: 0.5748
Epoch 17/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 48s 193ms/step - accuracy: 0.8680 -
loss: 0.4411 - val_accuracy: 0.8309 - val_loss: 0.5674
Epoch 18/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 49s 195ms/step - accuracy: 0.8700 -
loss: 0.4343 - val_accuracy: 0.8338 - val_loss: 0.5591
Epoch 19/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 49s 196ms/step - accuracy: 0.8729 -
loss: 0.4250 - val_accuracy: 0.8361 - val_loss: 0.5537
Epoch 20/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 49s 194ms/step - accuracy: 0.8748 -
loss: 0.4193 - val_accuracy: 0.8374 - val_loss: 0.5482
Epoch 21/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 49s 195ms/step - accuracy: 0.8774 -
loss: 0.4099 - val_accuracy: 0.8399 - val_loss: 0.5403
Epoch 22/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 49s 196ms/step - accuracy: 0.8793 -
loss: 0.4029 - val_accuracy: 0.8422 - val_loss: 0.5343
Epoch 23/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 48s 192ms/step - accuracy: 0.8819 -
loss: 0.3945 - val_accuracy: 0.8423 - val_loss: 0.5335
Epoch 24/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 49s 195ms/step - accuracy: 0.8839 -
loss: 0.3877 - val_accuracy: 0.8443 - val_loss: 0.5276
Epoch 25/25
250/250 ━━━━━━━━━━━━━━━━━━━━ 49s 195ms/step - accuracy: 0.8856 -
loss: 0.3823 - val_accuracy: 0.8448 - val_loss: 0.5259
<keras.src.callbacks.history.History at 0x14ff8fb39d0>
model.save('eng2french.h5')