Breast Cancer Detection using VGG16
Breast Cancer Detection using VGG16
import numpy as np
import pandas as pd
base_path =
'/kaggle/input/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT'
categories = ['benign', 'malignant', 'normal']
data = []
df = pd.DataFrame(data)
df.head()
image_path label
0 /kaggle/input/breast-ultrasound-images-dataset... benign
1 /kaggle/input/breast-ultrasound-images-dataset... benign
2 /kaggle/input/breast-ultrasound-images-dataset... benign
3 /kaggle/input/breast-ultrasound-images-dataset... benign
4 /kaggle/input/breast-ultrasound-images-dataset... benign
df.tail()
image_path label
775 /kaggle/input/breast-ultrasound-images-dataset... normal
776 /kaggle/input/breast-ultrasound-images-dataset... normal
777 /kaggle/input/breast-ultrasound-images-dataset... normal
778 /kaggle/input/breast-ultrasound-images-dataset... normal
779 /kaggle/input/breast-ultrasound-images-dataset... normal
df.shape
(780, 2)
df.columns
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 780 entries, 0 to 779
Data columns (total 2 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 image_path 780 non-null object
1 label 780 non-null object
dtypes: object(2)
memory usage: 12.3+ KB
plt.figure(figsize=(8, 6))
sns.set_style("whitegrid")
for p in ax.patches:
ax.annotate(f'{int(p.get_height())}',
(p.get_x() + p.get_width() / 2., p.get_height()),
ha = 'center',
va = 'center',
xytext = (0, 10),
textcoords = 'offset points')
plt.title("Count of Each Category")
plt.xlabel("Category")
plt.ylabel("Count")
plt.show()
label_counts = df['label'].value_counts()
plt.figure(figsize=(8, 6))
plt.pie(label_counts, labels=label_counts.index, autopct='%1.1f%%',
startangle=140, colors=['#ff9999','#66b3ff','#99ff99'])
plt.title("Distribution of Categories")
plt.show()
import cv2
num_images = 5
img = cv2.imread(image_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
axes[i, j].imshow(img)
axes[i, j].axis('off')
if j == 0:
axes[i, j].set_ylabel(category, fontsize=14)
label_encoder = LabelEncoder()
df['category_encoded'] = label_encoder.fit_transform(df['label'])
df = df[['image_path', 'category_encoded']]
ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(df[['image_path']],
df['category_encoded'])
df_resampled
image_path
category_encoded
0 /kaggle/input/breast-ultrasound-images-dataset...
0
1 /kaggle/input/breast-ultrasound-images-dataset...
0
2 /kaggle/input/breast-ultrasound-images-dataset...
0
3 /kaggle/input/breast-ultrasound-images-dataset...
0
4 /kaggle/input/breast-ultrasound-images-dataset...
0
... ...
...
1306 /kaggle/input/breast-ultrasound-images-dataset...
2
1307 /kaggle/input/breast-ultrasound-images-dataset...
2
1308 /kaggle/input/breast-ultrasound-images-dataset...
2
1309 /kaggle/input/breast-ultrasound-images-dataset...
2
1310 /kaggle/input/breast-ultrasound-images-dataset...
2
import time
import shutil
import pathlib
import itertools
from PIL import Image
import cv2
import seaborn as sns
sns.set_style('darkgrid')
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten,
Dense, Activation, Dropout, BatchNormalization
from tensorflow.keras import regularizers
import warnings
warnings.filterwarnings("ignore")
print ('check')
check
df_resampled['category_encoded'] =
df_resampled['category_encoded'].astype(str)
batch_size = 16
img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)
tr_gen = ImageDataGenerator(rescale=1./255)
ts_gen = ImageDataGenerator(rescale=1./255)
train_gen_new = tr_gen.flow_from_dataframe(
train_df_new,
x_col='image_path',
y_col='category_encoded',
target_size=img_size,
class_mode='sparse',
color_mode='rgb',
shuffle=True,
batch_size=batch_size
)
valid_gen_new = ts_gen.flow_from_dataframe(
valid_df_new,
x_col='image_path',
y_col='category_encoded',
target_size=img_size,
class_mode='sparse',
color_mode='rgb',
shuffle=True,
batch_size=batch_size
)
test_gen_new = ts_gen.flow_from_dataframe(
test_df_new,
x_col='image_path',
y_col='category_encoded',
target_size=img_size,
class_mode='sparse',
color_mode='rgb',
shuffle=False,
batch_size=batch_size
)
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
print("Using GPU")
else:
print("Using CPU")
Using GPU
def create_vgg16_modified_model(input_shape):
inputs = Input(shape=input_shape)
base_model = VGG16(weights='imagenet', input_tensor=inputs,
include_top=False)
x = base_model.output
attention_output = MultiHeadAttention(num_heads=8,
key_dim=x.shape[-1])(x, x)
x = GaussianNoise(0.25)(attention_output)
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = BatchNormalization()(x)
x = GaussianNoise(0.25)(x)
x = Dropout(0.3)(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.3)(x)
return model
cnn_modified_model.compile(optimizer=Adam(learning_rate=0.0001),
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
history = cnn_modified_model.fit(
train_gen_new,
validation_data=valid_gen_new,
epochs=5,
callbacks=[early_stopping],
verbose=1
)
Epoch 1/5
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
test_labels = test_gen_new.classes
predictions = cnn_modified_model.predict(test_gen_new)
predicted_classes = np.argmax(predictions, axis=1)
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
xticklabels=list(test_gen_new.class_indices.keys()),
yticklabels=list(test_gen_new.class_indices.keys()))
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()
from tensorflow.keras.applications import Xception
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense,
Dropout, BatchNormalization, GaussianNoise, Input, MultiHeadAttention
from tensorflow.keras.optimizers import Adam
def create_xception_modified_model(input_shape):
inputs = Input(shape=input_shape)
base_model = Xception(weights='imagenet', input_tensor=inputs,
include_top=False)
x = base_model.output
attention_output = MultiHeadAttention(num_heads=8,
key_dim=x.shape[-1])(x, x)
x = GaussianNoise(0.25)(attention_output)
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = BatchNormalization()(x)
x = GaussianNoise(0.25)(x)
x = Dropout(0.3)(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.3)(x)
return model
cnn_modified_model_xception.compile(optimizer=Adam(learning_rate=0.000
1),
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
Epoch 1/5
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
test_labels = test_gen_new.classes
predictions = cnn_modified_model_xception.predict(test_gen_new)
predicted_classes = np.argmax(predictions, axis=1)
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
xticklabels=list(test_gen_new.class_indices.keys()),
yticklabels=list(test_gen_new.class_indices.keys()))
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()
classes = ['Class 0', 'Class 1', 'Class 2']
vgg16_precision = [1.00, 0.91, 0.80]
vgg16_recall = [0.68, 0.98, 1.00]
vgg16_f1_score = [0.81, 0.95, 0.89]
x = np.arange(len(classes))
width = 0.25
ax.set_xlabel('Classes')
ax.set_ylabel('Scores')
ax.set_title('Precision, Recall, and F1-Score Comparison: VGG16 vs
Xception')
ax.set_xticks(x)
ax.set_xticklabels(classes)
ax.legend()
plt.tight_layout()
plt.show()
Copyright @ThinkAI - A Machine Learning
Community