0% found this document useful (0 votes)
8 views

Siamese Network Assignment

This document discusses training a siamese neural network model for audio-visual matching using triplet loss. It loads audio and image embeddings, creates training/test/validation datasets, trains the model over 500 epochs with early stopping, and calculates the identification accuracy on the validation set.

Uploaded by

heat mass
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
8 views

Siamese Network Assignment

This document discusses training a siamese neural network model for audio-visual matching using triplet loss. It loads audio and image embeddings, creates training/test/validation datasets, trains the model over 500 epochs with early stopping, and calculates the identification accuracy on the validation set.

Uploaded by

heat mass
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 3

# %%

! pip install scikit-learn

# %%
! pip install unidecode matplotlib

# %%
from classes import *
from functions import *
import pickle
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
import random
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt

# %%
from unidecode import unidecode

# %%
audio_files = os.listdir('datasets/audio')

with open('datasets/audio_embeddings.pickle', 'rb') as file:


audio_embeddings = pickle.load(file)

audio_names = [key.split('/')[0] for key in audio_embeddings.keys()]

audio_df = pd.DataFrame([{'name': key.split('/')[0], 'audio_embedding': value} for


key, value in audio_embeddings.items()])

audio_df['name'] = audio_df['name'].apply(unidecode)

audio_df['audio_embedding'] = audio_df['audio_embedding'].apply(lambda x:
x/np.linalg.norm(x))

# %%
with open('datasets/image_embeddings.pickle', 'rb') as file:
image_embeddings = pickle.load(file)

image_df = pd.DataFrame([{'name': key.split('/')[0], 'image_embedding': value} for


key, value in image_embeddings.items()])

image_df['name'] = image_df['name'].apply(unidecode)

# %%
matches_df = pd.merge(image_df, audio_df, on='name', how='outer')

matches_df.columns = ['name', 'anchor', 'positive']

# %%
coincidences = 0
negatives = []
for i, row in matches_df.iterrows():
while True:
sample = matches_df.sample(n=1)
sample.reset_index(inplace=True, drop=True)
if sample['name'][0] != row['name']:
negatives.append(sample['positive'][0])
break
else:
coincidences += 1

matches_df['negative'] = negatives

# %%
train_set, test_set , _, _2 = train_test_split(matches_df, matches_df['name'],
test_size=0.2)
test_set, validation_set, _, _2 = train_test_split(test_set, test_set['name'],
test_size=0.5)

# %%

# %% [markdown]
# ### Siamese Network Model With Triplet Loss Training

# %%
siamese_model = SiameseNetwork([512, 192], [[256, 512, 256], [256, 512, 256]], 256)

# %%
training_triplet_dataset = TripletDataset(train_set)
testing_triplet_dataset = TripletDataset(test_set)
validation_triplet_dataset = TripletDataset(validation_set)

# %%
train_triplet_dataloader = DataLoader(training_triplet_dataset, batch_size=32,
shuffle=True)
test_triplet_dataloader = DataLoader(testing_triplet_dataset, batch_size=32,
shuffle=True)
validation_triplet_dataloader = DataLoader(validation_triplet_dataset,
batch_size=32, shuffle=True)

# %% [markdown]
# ##### Training with Early Stopping

# %%
# optimizer = optim.SGD(siamese_model.parameters(), lr=0.1)
optimizer = optim.Adam(siamese_model.parameters(), lr=0.0005)
epochs = 500

# %%
training_losses = []
testing_losses = []
early_stopping_indicators = 0
for epoch in range(epochs):

total_loss = 0.0
total_testing_loss = 0.0
training_batches = 0
testing_batches = 0
for anchor_batch, positive_batch, negative_batch in train_triplet_dataloader:
training_batches += 1
optimizer.zero_grad()
loss = triplet_loss(siamese_model, anchor_batch, positive_batch,
negative_batch, margin=1.0)
loss.backward()
optimizer.step()
total_loss += loss.item()

# validation_losses.append(validation_loss)

for anchor_batch, positive_batch, negative_batch in test_triplet_dataloader:


testing_batches += 1
testing_loss = triplet_loss(siamese_model, anchor_batch, positive_batch,
negative_batch, margin=1.0)
total_testing_loss += testing_loss.item()

total_training_loss_per_batch = total_loss/training_batches
training_losses.append(total_training_loss_per_batch)
total_testing_loss_per_batch = total_testing_loss/testing_batches
testing_losses.append(total_testing_loss_per_batch)
print(f"epoch: {epoch + 1} Training Loss per batch:
{total_training_loss_per_batch}, Testing Loss per batch:
{total_testing_loss_per_batch}\n")
if (epoch > 8) and (np.mean(testing_losses[-6:-3] < np.mean(testing_losses[-
3:]))):
print('Early stopping')
break

# %%
epoch_list = [i+1 for i in range(epoch + 1)]

# %%
plt.plot(epoch_list, training_losses, label='Training Loss')
plt.plot(epoch_list, testing_losses, label='Testing Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# %%
from classes import *

# %% [markdown]
# ### 1:2 Identification Accuracy

# %%
ia = identification_accuracy(siamese_model, validation_triplet_dataloader)

# %%
print(f"1:2 Identification Accuracy for validation set is {ia*100}%")

You might also like