0% found this document useful (0 votes)
85 views

22

The document discusses loading and preprocessing breast cancer histopathology images for machine learning. Image files are gathered from directories and sorted into two classes - cancer and no cancer. The images are resized and converted to arrays along with their class labels. The data is split into training and test sets, encoded, and fed into a deep neural network model for classification. The model consists of 10 dense layers and is trained for 10 epochs, achieving around 66.96% accuracy on the test set.

Uploaded by

Arpita Das
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
85 views

22

The document discusses loading and preprocessing breast cancer histopathology images for machine learning. Image files are gathered from directories and sorted into two classes - cancer and no cancer. The images are resized and converted to arrays along with their class labels. The data is split into training and test sets, encoded, and fed into a deep neural network model for classification. The model consists of 10 dense layers and is trained for 10 epochs, achieving around 66.96% accuracy on the test set.

Uploaded by

Arpita Das
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 7

import pandas as pd

import numpy as np
import os
from glob import glob
import random
import matplotlib.pylab as plt
../input/breast-histopathology-images
mypaths=[]
for name in glob('../input/breast-histopathology-
images/IDC_regular_ps50_idx5/*',recursive=True):
mypaths.append(name)
print(mypaths[:5])
['../input/breast-histopathology-images/IDC_regular_ps50_idx5/10295',
'../input/breast-histopathology-images/IDC_regular_ps50_idx5/10304',
'../input/breast-histopathology-images/IDC_regular_ps50_idx5/12868',
'../input/breast-histopathology-images/IDC_regular_ps50_idx5/10274',
'../input/breast-histopathology-images/IDC_regular_ps50_idx5/12818']
len(mypaths)
279
mypaths[:5]
['../input/breast-histopathology-images/IDC_regular_ps50_idx5/10295',
'../input/breast-histopathology-images/IDC_regular_ps50_idx5/10304',
'../input/breast-histopathology-images/IDC_regular_ps50_idx5/12868',
'../input/breast-histopathology-images/IDC_regular_ps50_idx5/10274',
'../input/breast-histopathology-images/IDC_regular_ps50_idx5/12818']
mp=mypaths[60:100]
imagePatches=[]
for i in mp:
imagePatches+=glob(i+'/*/*.png', recursive=True)
len(imagePatches)
41604
# Two arrays holding images by class type

class0 = [] # 0 = no cancer
class1 = [] # 1 = cancer

for filename in imagePatches:


if filename.endswith("class0.png"):
class0.append(filename)
else:
class1.append(filename)
print(len(class0))
print("hi")
len(class1)
29338
hi
12266
sampled_class0 = random.sample(class0, len(class1))
sampled_class1 = random.sample(class1, len(class1))
len(sampled_class1)
12266
print(len(sampled_class0))
12266
from matplotlib.image import imread
import cv2

def get_image_arrays(data, label):


c=0
img_arrays = []
for i in data:
if i.endswith('.png'):
img = cv2.imread(i ,cv2.IMREAD_GRAYSCALE)
img_sized = cv2.resize(img, (50, 50), interpolation=cv2.INTER_LINEAR)
img_arrays.append([img_sized, label])
c+=1
return img_arrays,c
class0_array,c0 = get_image_arrays(sampled_class0, 0)
class1_array,c1 = get_image_arrays(sampled_class1[:int(len(sampled_class1)/2)], 1)
print('done')
done
print(c0)
print(c1)
12266
6133
class0_array[0]
[array([[123, 147, 148, ..., 123, 117, 109],
[105, 119, 148, ..., 120, 107, 126],
[110, 127, 106, ..., 144, 160, 185],
...,
[232, 219, 213, ..., 132, 161, 164],
[230, 230, 215, ..., 141, 160, 167],
[229, 231, 227, ..., 129, 136, 148]], dtype=uint8),
0]

combined_data = np.concatenate((class0_array, class1_array))


#random.seed(200)
#random.shuffle(combined_data)
<string>:6: VisibleDeprecationWarning: Creating an ndarray from ragged nested
sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different
lengths or shapes) is deprecated. If you meant to do this, you must specify
'dtype=object' when creating the ndarray
len(combined_data)
18399
combined_data[0]
array([array([[123, 147, 148, ..., 123, 117, 109],
[105, 119, 148, ..., 120, 107, 126],
[110, 127, 106, ..., 144, 160, 185],
...,
[232, 219, 213, ..., 132, 161, 164],
[230, 230, 215, ..., 141, 160, 167],
[229, 231, 227, ..., 129, 136, 148]], dtype=uint8),
0], dtype=object)
X = []
y = []

for features,label in combined_data:


X.append(features)
y.append(label)
c0=0
c1=0
for i in y:
if i==0:
c0+=1
else:
c1+=1
print(c0)
print(c1)
12266
6133
len(y)
18399
X[400]
array([[185, 165, 172, ..., 183, 177, 178],
[158, 168, 176, ..., 169, 178, 193],
[153, 177, 172, ..., 176, 161, 192],
...,
[177, 173, 169, ..., 174, 176, 168],
[164, 199, 174, ..., 187, 172, 165],
[171, 177, 185, ..., 186, 181, 169]], dtype=uint8)
y[40]
0
X = np.array(X).reshape(-1, 50*50)
X.shape
(18399, 2500)
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.40,
random_state=70)
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
(11039, 2500) (7360, 2500) (11039, 2) (7360, 2)
cn=0
cp=0
for i in y_test:
if i[0]>i[1]:
cn+=1
else:
cp+=1
print(cp)
print(cn)
2432
4928
X_train[0].shape
(2500,)
import tensorflow as tf
from tensorflow import keras
len(X_train)
11039
#plt.matshow(X_train[0])
y_test[0]
array([1., 0.], dtype=float32)
X_train[0]
array([238, 234, 227, ..., 132, 110, 169], dtype=uint8)
model=keras.Sequential([
keras.layers.Dense(1250,input_shape=(2500,),activation='sigmoid'),
keras.layers.Dense(625,input_shape=(1250,),activation='sigmoid'),
keras.layers.Dense(300,input_shape=(625,),activation='sigmoid'),
keras.layers.Dense(150,input_shape=(300,),activation='sigmoid'),
keras.layers.Dense(75,input_shape=(150,),activation='sigmoid'),
keras.layers.Dense(37,input_shape=(75,),activation='sigmoid'),
keras.layers.Dense(18,input_shape=(37,),activation='sigmoid'),
keras.layers.Dense(9,input_shape=(18,),activation='sigmoid'),
keras.layers.Dense(4,input_shape=(9,),activation='sigmoid'),
keras.layers.Dense(2,input_shape=(4,),activation='sigmoid')
])
model.compile(
optimizer=tf.optimizers.Adam(lr=0.01),
loss='binary_crossentropy',
metrics=['accuracy','Recall','Precision']
)
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) (None, 1250) 3126250
_________________________________________________________________
dense_1 (Dense) (None, 625) 781875
_________________________________________________________________
dense_2 (Dense) (None, 300) 187800
_________________________________________________________________
dense_3 (Dense) (None, 150) 45150
_________________________________________________________________
dense_4 (Dense) (None, 75) 11325
_________________________________________________________________
dense_5 (Dense) (None, 37) 2812
_________________________________________________________________
dense_6 (Dense) (None, 18) 684
_________________________________________________________________
dense_7 (Dense) (None, 9) 171
_________________________________________________________________
dense_8 (Dense) (None, 4) 40
_________________________________________________________________
dense_9 (Dense) (None, 2) 10
=================================================================
Total params: 4,156,117
Trainable params: 4,156,117
Non-trainable params: 0
_________________________________________________________________
history=model.fit(X_train,y_train, validation_data=(X_test, y_test),epochs=10)
Epoch 1/10
345/345 [==============================] - 13s 33ms/step - loss: 0.6407 - accuracy:
0.6631 - recall: 0.6428 - precision: 0.6618 - val_loss: 0.6350 - val_accuracy:
0.6696 - val_recall: 0.6696 - val_precision: 0.6696
Epoch 2/10
345/345 [==============================] - 10s 29ms/step - loss: 0.6400 - accuracy:
0.6625 - recall: 0.6625 - precision: 0.6625 - val_loss: 0.6345 - val_accuracy:
0.6696 - val_recall: 0.6696 - val_precision: 0.6696
Epoch 3/10
345/345 [==============================] - 10s 29ms/step - loss: 0.6390 - accuracy:
0.6640 - recall: 0.6640 - precision: 0.6640 - val_loss: 0.6348 - val_accuracy:
0.6696 - val_recall: 0.6696 - val_precision: 0.6696
Epoch 5/10
345/345 [==============================] - 10s 29ms/step - loss: 0.6428 - accuracy:
0.6589 - recall: 0.6589 - precision: 0.6589 - val_loss: 0.6346 - val_accuracy:
0.6696 - val_recall: 0.6696 - val_precision: 0.6696
Epoch 6/10
345/345 [==============================] - 10s 30ms/step - loss: 0.6397 - accuracy:
0.6624 - recall: 0.6624 - precision: 0.6624 - val_loss: 0.6345 - val_accuracy:
0.6696 - val_recall: 0.6696 - val_precision: 0.6696
Epoch 7/10
345/345 [==============================] - 10s 30ms/step - loss: 0.6394 - accuracy:
0.6628 - recall: 0.6628 - precision: 0.6628 - val_loss: 0.6345 - val_accuracy:
0.6696 - val_recall: 0.6696 - val_precision: 0.6696
Epoch 8/10
345/345 [==============================] - 10s 30ms/step - loss: 0.6416 - accuracy:
0.6596 - recall: 0.6596 - precision: 0.6596 - val_loss: 0.6346 - val_accuracy:
0.6696 - val_recall: 0.6696 - val_precision: 0.6696
Epoch 9/10
345/345 [==============================] - 10s 30ms/step - loss: 0.6396 - accuracy:
0.6630 - recall: 0.6630 - precision: 0.6630 - val_loss: 0.6346 - val_accuracy:
0.6696 - val_recall: 0.6696 - val_precision: 0.6696
Epoch 10/10
345/345 [==============================] - 10s 30ms/step - loss: 0.6398 - accuracy:
0.6627 - recall: 0.6627 - precision: 0.6627 - val_loss: 0.6351 - val_accuracy:
0.6696 - val_recall: 0.6696 - val_precision: 0.6696
#X_train=X_train/255
#X_test=X_test/255
#model.fit(X_train,y_train,epochs=100)
e=model.evaluate(X_test,y_test)
230/230 [==============================] - 2s 9ms/step - loss: 0.6351 - accuracy:
0.6696 - recall: 0.6696 - precision: 0.6696
y_test
array([[1., 0.],
[1., 0.],
[1., 0.],
...,
[1., 0.],
[1., 0.],
[1., 0.]], dtype=float32)
y_predicted=model.predict(X_test)
X_test[0]
array([144, 148, 137, ..., 118, 126, 136], dtype=uint8)
print(model.summary())
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) (None, 1250) 3126250
_________________________________________________________________
dense_1 (Dense) (None, 625) 781875
_________________________________________________________________
dense_2 (Dense) (None, 300) 187800
_________________________________________________________________
dense_3 (Dense) (None, 150) 45150
_________________________________________________________________
dense_4 (Dense) (None, 75) 11325
_________________________________________________________________
dense_5 (Dense) (None, 37) 2812
_________________________________________________________________
dense_6 (Dense) (None, 18) 684
_________________________________________________________________
dense_7 (Dense) (None, 9) 171
_________________________________________________________________
dense_8 (Dense) (None, 4) 40
_________________________________________________________________
dense_9 (Dense) (None, 2) 10
=================================================================
Total params: 4,156,117
Trainable params: 4,156,117
Non-trainable params: 0
_________________________________________________________________
None
import matplotlib.pyplot as plt
#plotting the Accuracy of test and training sets
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

y_pred=model.predict(X_test)
Y_pred=[]
for i in y_pred:
if i[0]>i[1]:
Y_pred.append(0)
else:
Y_pred.append(1)
Y_test=[]
for i in y_test:
if i[0]>i[1]:
Y_test.append(0)
else:
Y_test.append(1)
from sklearn.metrics import classification_report, confusion_matrix
print('Confusion Matrix')
print(confusion_matrix(Y_test, Y_pred))
print('Classification Report')
print(classification_report(Y_test, Y_pred, target_names=['Negative','Positive']))
Confusion Matrix
[[4928 0]
[2432 0]]
Classification Report
precision recall f1-score support

Negative 0.67 1.00 0.80 4928


Positive 0.00 0.00 0.00 2432

accuracy 0.67 7360


macro avg 0.33 0.50 0.40 7360
weighted avg 0.45 0.67 0.54 7360

/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1245:
UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0
in labels with no predicted samples. Use `zero_division` parameter to control this
behavior.
_warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1245:
UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0
in labels with no predicted samples. Use `zero_division` parameter to control this
behavior.
_warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1245:
UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0
in labels with no predicted samples. Use `zero_division` parameter to control this
behavior.
_warn_prf(average, modifier, msg_start, len(result))
confusionmatrix=confusion_matrix(Y_test, Y_pred)
classes=[0,1]
con_mat_df = pd.DataFrame(confusion_matrix(Y_test, Y_pred),
index = classes,
columns = classes)
con_mat_df
0 1
0 4928 0
1 2432 0
import seaborn as sns
figure = plt.figure(figsize=(6, 6))
sns.heatmap(con_mat_df, annot=True,cmap=plt.cm.cool,fmt='d')
plt.tight_layout()
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()

l=[[1501,88],[125,176]]
cm=np.array(l)
cm
array([[1501, 88],
[ 125, 176]])
classes=[0,1]
con_mat_df = pd.DataFrame(cm,
index = classes,
columns = classes)
import seaborn as sns
figure = plt.figure(figsize=(6, 6))
sns.heatmap(con_mat_df, annot=True,cmap=plt.cm.cool,fmt='d')
plt.tight_layout()
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()

You might also like