0% found this document useful (0 votes)
3 views

Breast Cancer Detection using VGG16

The document details a breast cancer detection project using VGG16 and Xception models enhanced with a Multihead Attention Mechanism. It includes data preprocessing steps, such as oversampling to balance class distribution, and training a modified VGG16 model on breast ultrasound images. The model's performance is evaluated using accuracy, loss metrics, and a classification report, achieving an overall accuracy of 89% on the test set.
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
3 views

Breast Cancer Detection using VGG16

The document details a breast cancer detection project using VGG16 and Xception models enhanced with a Multihead Attention Mechanism. It includes data preprocessing steps, such as oversampling to balance class distribution, and training a modified VGG16 model on breast ultrasound images. The model's performance is evaluated using accuracy, loss metrics, and a classification report, achieving an overall accuracy of 89% on the test set.
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 21

Breast Cancer Detection using VGG16 and

Xception with Multihead Attention Mechanism

import numpy as np
import pandas as pd

base_path =
'/kaggle/input/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT'
categories = ['benign', 'malignant', 'normal']

data = []

for category in categories:


category_path = os.path.join(base_path, category)

for filename in os.listdir(category_path):


if filename.endswith('.png') and '_mask' not in filename: #
Exclude mask images
file_path = os.path.join(category_path, filename)
data.append({'image_path': file_path, 'label': category})

df = pd.DataFrame(data)

df.head()
image_path label
0 /kaggle/input/breast-ultrasound-images-dataset... benign
1 /kaggle/input/breast-ultrasound-images-dataset... benign
2 /kaggle/input/breast-ultrasound-images-dataset... benign
3 /kaggle/input/breast-ultrasound-images-dataset... benign
4 /kaggle/input/breast-ultrasound-images-dataset... benign

df.tail()

image_path label
775 /kaggle/input/breast-ultrasound-images-dataset... normal
776 /kaggle/input/breast-ultrasound-images-dataset... normal
777 /kaggle/input/breast-ultrasound-images-dataset... normal
778 /kaggle/input/breast-ultrasound-images-dataset... normal
779 /kaggle/input/breast-ultrasound-images-dataset... normal

df.shape

(780, 2)

df.columns

Index(['image_path', 'label'], dtype='object')

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 780 entries, 0 to 779
Data columns (total 2 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 image_path 780 non-null object
1 label 780 non-null object
dtypes: object(2)
memory usage: 12.3+ KB

import seaborn as sns


import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))
sns.set_style("whitegrid")

ax = sns.countplot(data=df, x='label', palette='viridis')

for p in ax.patches:
ax.annotate(f'{int(p.get_height())}',
(p.get_x() + p.get_width() / 2., p.get_height()),
ha = 'center',
va = 'center',
xytext = (0, 10),
textcoords = 'offset points')
plt.title("Count of Each Category")
plt.xlabel("Category")
plt.ylabel("Count")

plt.show()

label_counts = df['label'].value_counts()

plt.figure(figsize=(8, 6))
plt.pie(label_counts, labels=label_counts.index, autopct='%1.1f%%',
startangle=140, colors=['#ff9999','#66b3ff','#99ff99'])

plt.title("Distribution of Categories")

plt.show()
import cv2

num_images = 5

fig, axes = plt.subplots(len(categories), num_images, figsize=(15,


10))
fig.suptitle("5 Sample Images from Each Category", fontsize=16)

for i, category in enumerate(categories):

sample_images = df[df['label'] == category].sample(num_images,


random_state=42)

for j, image_path in enumerate(sample_images['image_path']):

img = cv2.imread(image_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
axes[i, j].imshow(img)
axes[i, j].axis('off')
if j == 0:
axes[i, j].set_ylabel(category, fontsize=14)

plt.tight_layout(rect=[0, 0.03, 1, 0.95])


plt.show()

from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()

df['category_encoded'] = label_encoder.fit_transform(df['label'])

df = df[['image_path', 'category_encoded']]

from imblearn.over_sampling import RandomOverSampler

ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(df[['image_path']],
df['category_encoded'])

df_resampled = pd.DataFrame(X_resampled, columns=['image_path'])


df_resampled['category_encoded'] = y_resampled

print("\nClass distribution after oversampling:")


print(df_resampled['category_encoded'].value_counts())

Class distribution after oversampling:


category_encoded
0 437
1 437
2 437
Name: count, dtype: int64

df_resampled

image_path
category_encoded
0 /kaggle/input/breast-ultrasound-images-dataset...
0
1 /kaggle/input/breast-ultrasound-images-dataset...
0
2 /kaggle/input/breast-ultrasound-images-dataset...
0
3 /kaggle/input/breast-ultrasound-images-dataset...
0
4 /kaggle/input/breast-ultrasound-images-dataset...
0
... ...
...
1306 /kaggle/input/breast-ultrasound-images-dataset...
2
1307 /kaggle/input/breast-ultrasound-images-dataset...
2
1308 /kaggle/input/breast-ultrasound-images-dataset...
2
1309 /kaggle/input/breast-ultrasound-images-dataset...
2
1310 /kaggle/input/breast-ultrasound-images-dataset...
2

[1311 rows x 2 columns]

import time
import shutil
import pathlib
import itertools
from PIL import Image

import cv2
import seaborn as sns
sns.set_style('darkgrid')
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten,
Dense, Activation, Dropout, BatchNormalization
from tensorflow.keras import regularizers

import warnings
warnings.filterwarnings("ignore")

print ('check')

check

df_resampled['category_encoded'] =
df_resampled['category_encoded'].astype(str)

train_df_new, temp_df_new = train_test_split(


df_resampled,
train_size=0.8,
shuffle=True,
random_state=42,
stratify=df_resampled['category_encoded']
)

valid_df_new, test_df_new = train_test_split(


temp_df_new,
test_size=0.5,
shuffle=True,
random_state=42,
stratify=temp_df_new['category_encoded']
)

from tensorflow.keras.preprocessing.image import ImageDataGenerator

batch_size = 16
img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)

tr_gen = ImageDataGenerator(rescale=1./255)
ts_gen = ImageDataGenerator(rescale=1./255)

train_gen_new = tr_gen.flow_from_dataframe(
train_df_new,
x_col='image_path',
y_col='category_encoded',
target_size=img_size,
class_mode='sparse',
color_mode='rgb',
shuffle=True,
batch_size=batch_size
)

valid_gen_new = ts_gen.flow_from_dataframe(
valid_df_new,
x_col='image_path',
y_col='category_encoded',
target_size=img_size,
class_mode='sparse',
color_mode='rgb',
shuffle=True,
batch_size=batch_size
)

test_gen_new = ts_gen.flow_from_dataframe(
test_df_new,
x_col='image_path',
y_col='category_encoded',
target_size=img_size,
class_mode='sparse',
color_mode='rgb',
shuffle=False,
batch_size=batch_size
)

Found 1048 validated image filenames belonging to 3 classes.


Found 131 validated image filenames belonging to 3 classes.
Found 132 validated image filenames belonging to 3 classes.

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
print("Using GPU")
else:
print("Using CPU")

Using GPU

early_stopping = EarlyStopping(monitor='val_loss', patience=5,


restore_best_weights=True)

from tensorflow.keras.applications import VGG16


from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense,
Dropout, BatchNormalization, GaussianNoise, Input, MultiHeadAttention
from tensorflow.keras.optimizers import Adam

def create_vgg16_modified_model(input_shape):
inputs = Input(shape=input_shape)
base_model = VGG16(weights='imagenet', input_tensor=inputs,
include_top=False)

for layer in base_model.layers[:-4]:


layer.trainable = False

x = base_model.output

attention_output = MultiHeadAttention(num_heads=8,
key_dim=x.shape[-1])(x, x)
x = GaussianNoise(0.25)(attention_output)
x = GlobalAveragePooling2D()(x)

x = Dense(512, activation='relu')(x)
x = BatchNormalization()(x)
x = GaussianNoise(0.25)(x)
x = Dropout(0.3)(x)

x = Dense(256, activation='relu')(x)
x = Dropout(0.3)(x)

outputs = Dense(3, activation='softmax')(x)


model = Model(inputs=inputs, outputs=outputs)

return model

input_shape = (224, 224, 3)


cnn_modified_model = create_vgg16_modified_model(input_shape)

cnn_modified_model.compile(optimizer=Adam(learning_rate=0.0001),
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])

Downloading data from https://storage.googleapis.com/tensorflow/keras-


applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
58889256/58889256 ━━━━━━━━━━━━━━━━━━━━ 2s 0us/step

history = cnn_modified_model.fit(
train_gen_new,
validation_data=valid_gen_new,
epochs=5,
callbacks=[early_stopping],
verbose=1
)

Epoch 1/5

WARNING: All log messages before absl::InitializeLog() is called are


written to STDERR
I0000 00:00:1731309595.484548 103 service.cc:145] XLA service
0x7e1888003210 initialized for platform CUDA (this does not guarantee
that XLA will be used). Devices:
I0000 00:00:1731309595.484600 103 service.cc:153] StreamExecutor
device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1731309595.484604 103 service.cc:153] StreamExecutor
device (1): Tesla T4, Compute Capability 7.5
WARNING: All log messages before absl::InitializeLog() is called are
written to STDERR
I0000 00:00:1731309605.398138 137 asm_compiler.cc:369] ptxas
warning : Registers are spilled to local memory in function
'triton_gemm_dot', 1284 bytes spill stores, 1272 bytes spill loads

I0000 00:00:1731309607.863487 136 asm_compiler.cc:369] ptxas


warning : Registers are spilled to local memory in function
'triton_gemm_dot_20', 268 bytes spill stores, 268 bytes spill loads

I0000 00:00:1731309618.629396 137 asm_compiler.cc:369] ptxas


warning : Registers are spilled to local memory in function
'triton_gemm_dot_15', 1788 bytes spill stores, 1776 bytes spill loads

1/66 ━━━━━━━━━━━━━━━━━━━━ 45:47 42s/step - accuracy: 0.0625 - loss:


1.7667

I0000 00:00:1731309632.469919 103 device_compiler.h:188] Compiled


cluster using XLA! This line is logged at most once for the lifetime
of the process.

4/66 ━━━━━━━━━━━━━━━━━━━━ 9s 154ms/step - accuracy: 0.2526 - loss:


1.5053

I0000 00:00:1731309643.142415 212 asm_compiler.cc:369] ptxas


warning : Registers are spilled to local memory in function
'triton_gemm_dot_24', 268 bytes spill stores, 268 bytes spill loads

I0000 00:00:1731309651.544025 211 asm_compiler.cc:369] ptxas


warning : Registers are spilled to local memory in function
'triton_gemm_dot_19', 1280 bytes spill stores, 1268 bytes spill loads

I0000 00:00:1731309655.317129 212 asm_compiler.cc:369] ptxas


warning : Registers are spilled to local memory in function
'triton_gemm_dot_15', 1788 bytes spill stores, 1776 bytes spill loads

66/66 ━━━━━━━━━━━━━━━━━━━━ 0s 676ms/step - accuracy: 0.5218 - loss:


1.0610

I0000 00:00:1731309686.474296 293 asm_compiler.cc:369] ptxas


warning : Registers are spilled to local memory in function
'triton_gemm_dot_7', 1280 bytes spill stores, 1268 bytes spill loads

66/66 ━━━━━━━━━━━━━━━━━━━━ 99s 876ms/step - accuracy: 0.5235 - loss:


1.0582 - val_accuracy: 0.4885 - val_loss: 2.3655
Epoch 2/5
66/66 ━━━━━━━━━━━━━━━━━━━━ 13s 173ms/step - accuracy: 0.7861 - loss:
0.5245 - val_accuracy: 0.3664 - val_loss: 3.5323
Epoch 3/5
66/66 ━━━━━━━━━━━━━━━━━━━━ 13s 175ms/step - accuracy: 0.8943 - loss:
0.3154 - val_accuracy: 0.6870 - val_loss: 1.0176
Epoch 4/5
66/66 ━━━━━━━━━━━━━━━━━━━━ 13s 176ms/step - accuracy: 0.9167 - loss:
0.2553 - val_accuracy: 0.4046 - val_loss: 2.2229
Epoch 5/5
66/66 ━━━━━━━━━━━━━━━━━━━━ 13s 179ms/step - accuracy: 0.9352 - loss:
0.1700 - val_accuracy: 0.8321 - val_loss: 0.5656

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
test_labels = test_gen_new.classes
predictions = cnn_modified_model.predict(test_gen_new)
predicted_classes = np.argmax(predictions, axis=1)

8/9 ━━━━━━━━━━━━━━━━━━━━ 0s 157ms/step

I0000 00:00:1731309842.962945 392 asm_compiler.cc:369] ptxas


warning : Registers are spilled to local memory in function
'triton_gemm_dot_7', 1280 bytes spill stores, 1268 bytes spill loads

9/9 ━━━━━━━━━━━━━━━━━━━━ 12s 1s/step

report = classification_report(test_labels, predicted_classes,


target_names=list(test_gen_new.class_indices.keys()))
print(report)

precision recall f1-score support

0 1.00 0.68 0.81 44


1 0.91 0.98 0.95 44
2 0.80 1.00 0.89 44

accuracy 0.89 132


macro avg 0.90 0.89 0.88 132
weighted avg 0.90 0.89 0.88 132

conf_matrix = confusion_matrix(test_labels, predicted_classes)

plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
xticklabels=list(test_gen_new.class_indices.keys()),
yticklabels=list(test_gen_new.class_indices.keys()))
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()
from tensorflow.keras.applications import Xception
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense,
Dropout, BatchNormalization, GaussianNoise, Input, MultiHeadAttention
from tensorflow.keras.optimizers import Adam

def create_xception_modified_model(input_shape):
inputs = Input(shape=input_shape)
base_model = Xception(weights='imagenet', input_tensor=inputs,
include_top=False)

for layer in base_model.layers[:-4]:


layer.trainable = False

x = base_model.output

attention_output = MultiHeadAttention(num_heads=8,
key_dim=x.shape[-1])(x, x)
x = GaussianNoise(0.25)(attention_output)
x = GlobalAveragePooling2D()(x)

x = Dense(512, activation='relu')(x)
x = BatchNormalization()(x)
x = GaussianNoise(0.25)(x)
x = Dropout(0.3)(x)

x = Dense(256, activation='relu')(x)
x = Dropout(0.3)(x)

outputs = Dense(3, activation='softmax')(x)


model = Model(inputs=inputs, outputs=outputs)

return model

input_shape = (224, 224, 3)


cnn_modified_model_xception =
create_xception_modified_model(input_shape)

cnn_modified_model_xception.compile(optimizer=Adam(learning_rate=0.000
1),

loss='sparse_categorical_crossentropy',
metrics=['accuracy'])

Downloading data from https://storage.googleapis.com/tensorflow/keras-


applications/xception/
xception_weights_tf_dim_ordering_tf_kernels_notop.h5
83683744/83683744 ━━━━━━━━━━━━━━━━━━━━ 3s 0us/step
history = cnn_modified_model_xception.fit(
train_gen_new,
validation_data=valid_gen_new,
epochs=5,
callbacks=[early_stopping],
verbose=1
)

Epoch 1/5

I0000 00:00:1731310103.959926 433 asm_compiler.cc:369] ptxas


warning : Registers are spilled to local memory in function
'triton_gemm_dot_20', 268 bytes spill stores, 268 bytes spill loads

I0000 00:00:1731310112.690440 435 asm_compiler.cc:369] ptxas


warning : Registers are spilled to local memory in function
'triton_gemm_dot_15', 1788 bytes spill stores, 1776 bytes spill loads

I0000 00:00:1731310117.742646 434 asm_compiler.cc:369] ptxas


warning : Registers are spilled to local memory in function
'triton_gemm_dot', 1300 bytes spill stores, 1284 bytes spill loads

26/66 ━━━━━━━━━━━━━━━━━━━━ 10s 263ms/step - accuracy: 0.5244 - loss:


1.0531

I0000 00:00:1731310158.250458 495 asm_compiler.cc:369] ptxas


warning : Registers are spilled to local memory in function
'triton_gemm_dot_15', 1788 bytes spill stores, 1776 bytes spill loads

I0000 00:00:1731310161.417552 496 asm_compiler.cc:369] ptxas


warning : Registers are spilled to local memory in function
'triton_gemm_dot', 1300 bytes spill stores, 1284 bytes spill loads

66/66 ━━━━━━━━━━━━━━━━━━━━ 0s 770ms/step - accuracy: 0.6025 - loss:


0.9215

I0000 00:00:1731310196.852571 540 asm_compiler.cc:369] ptxas


warning : Registers are spilled to local memory in function
'triton_gemm_dot', 1300 bytes spill stores, 1284 bytes spill loads

66/66 ━━━━━━━━━━━━━━━━━━━━ 112s 1s/step - accuracy: 0.6036 - loss:


0.9194 - val_accuracy: 0.4962 - val_loss: 0.9124
Epoch 2/5
66/66 ━━━━━━━━━━━━━━━━━━━━ 22s 309ms/step - accuracy: 0.8305 - loss:
0.4508 - val_accuracy: 0.8397 - val_loss: 0.4801
Epoch 3/5
66/66 ━━━━━━━━━━━━━━━━━━━━ 22s 315ms/step - accuracy: 0.9268 - loss:
0.2183 - val_accuracy: 0.9084 - val_loss: 0.3146
Epoch 4/5
66/66 ━━━━━━━━━━━━━━━━━━━━ 21s 297ms/step - accuracy: 0.9476 - loss:
0.1515 - val_accuracy: 0.8168 - val_loss: 0.4342
Epoch 5/5
66/66 ━━━━━━━━━━━━━━━━━━━━ 22s 306ms/step - accuracy: 0.9766 - loss:
0.0723 - val_accuracy: 0.9084 - val_loss: 0.2720

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
test_labels = test_gen_new.classes
predictions = cnn_modified_model_xception.predict(test_gen_new)
predicted_classes = np.argmax(predictions, axis=1)

8/9 ━━━━━━━━━━━━━━━━━━━━ 0s 157ms/step

I0000 00:00:1731310356.675062 634 asm_compiler.cc:369] ptxas


warning : Registers are spilled to local memory in function
'triton_gemm_dot', 1300 bytes spill stores, 1284 bytes spill loads

9/9 ━━━━━━━━━━━━━━━━━━━━ 15s 2s/step

report = classification_report(test_labels, predicted_classes,


target_names=list(test_gen_new.class_indices.keys()))
print(report)

precision recall f1-score support

0 1.00 0.68 0.81 44


1 0.78 0.98 0.87 44
2 0.94 1.00 0.97 44

accuracy 0.89 132


macro avg 0.91 0.89 0.88 132
weighted avg 0.91 0.89 0.88 132

conf_matrix = confusion_matrix(test_labels, predicted_classes)

plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
xticklabels=list(test_gen_new.class_indices.keys()),
yticklabels=list(test_gen_new.class_indices.keys()))
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()
classes = ['Class 0', 'Class 1', 'Class 2']
vgg16_precision = [1.00, 0.91, 0.80]
vgg16_recall = [0.68, 0.98, 1.00]
vgg16_f1_score = [0.81, 0.95, 0.89]

xception_precision = [1.00, 0.78, 0.94]


xception_recall = [0.68, 0.98, 1.00]
xception_f1_score = [0.81, 0.87, 0.97]

x = np.arange(len(classes))
width = 0.25

fig, ax = plt.subplots(figsize=(10, 6))

ax.bar(x - width, vgg16_precision, width, label='VGG16 Precision',


color='blue', alpha=0.7)
ax.bar(x - width, vgg16_recall, width, label='VGG16 Recall',
color='green', alpha=0.7)
ax.bar(x - width, vgg16_f1_score, width, label='VGG16 F1-Score',
color='red', alpha=0.7)

ax.bar(x + width, xception_precision, width, label='Xception


Precision', color='darkblue', alpha=0.7)
ax.bar(x + width, xception_recall, width, label='Xception Recall',
color='darkgreen', alpha=0.7)
ax.bar(x + width, xception_f1_score, width, label='Xception F1-Score',
color='darkred', alpha=0.7)

ax.set_xlabel('Classes')
ax.set_ylabel('Scores')
ax.set_title('Precision, Recall, and F1-Score Comparison: VGG16 vs
Xception')
ax.set_xticks(x)
ax.set_xticklabels(classes)
ax.legend()

plt.tight_layout()
plt.show()
Copyright @ThinkAI - A Machine Learning
Community

You might also like