Ddos Dataset: Import As Import As Import As Import As From Import
Ddos Dataset: Import As Import As Import As Import As From Import
DDos Dataset
In [1]: import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
drive.mount('/content/drive')
Mounted at /content/drive
drive.mount('/content/drive',force_remount=True)
Mounted at /content/drive
In [4]: df =pd.read_csv('/content/drive/MyDrive/DDoS/compiled.csv')
/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py:2882:
DtypeWarning: Columns (85) have mixed types.Specify dtype option on import or
set low_memory=False.
In [5]: df.shape
In [6]: # df.to_csv('/content/drive/MyDrive/DDoS/compiled.csv',index=False)
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 1/51
6/20/22, 12:53 PM Copy_of_DDoS
In [7]: np.array(df.dtypes)
In [8]: df.head()
Out[8]:
Unnamed: Source Destination Destination
Flow ID Source IP Protocol Timestam
0 Port IP Port
172.16.0.5-
2018-12-0
0 12368 192.168.50.1- 172.16.0.5 550 192.168.50.1 1068 17
11:06:24.33969
550-1068-17
172.16.0.5-
2018-12-0
1 24112 192.168.50.1- 172.16.0.5 939 192.168.50.1 62932 17
11:06:21.1350
939-62932-17
172.16.0.5-
2018-12-0
2 23589 192.168.50.1- 172.16.0.5 564 192.168.50.1 32767 17
11:06:08.77624
564-32767-17
172.16.0.5-
2018-12-0
3 11258 192.168.50.1- 172.16.0.5 564 192.168.50.1 42118 17
11:06:19.0182
564-42118-17
172.16.0.5-
2018-12-0
4 9526 192.168.50.1- 172.16.0.5 559 192.168.50.1 10300 17
11:06:11.8384
559-10300-17
5 rows × 88 columns
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 2/51
6/20/22, 12:53 PM Copy_of_DDoS
In [9]: np.array(df.columns)
Out[9]: array(['Unnamed: 0', 'Flow ID', ' Source IP', ' Source Port',
' Destination IP', ' Destination Port', ' Protocol', ' Timestamp',
' Flow Duration', ' Total Fwd Packets', ' Total Backward Packets',
' Fwd Packet Length Max', ' Fwd Packet Length Min',
' Fwd Packet Length Mean', ' Fwd Packet Length Std',
' Bwd Packet Length Mean', ' Bwd Packet Length Std',
' Flow IAT Std', ' Flow IAT Max', ' Flow IAT Min', 'Fwd IAT Total',
' Fwd IAT Mean', ' Fwd IAT Std', ' Fwd IAT Max', ' Fwd IAT Min',
'Bwd IAT Total', ' Bwd IAT Mean', ' Bwd IAT Std', ' Bwd IAT Max',
' Bwd IAT Min', 'Fwd PSH Flags', ' Bwd PSH Flags',
' Fwd URG Flags', ' Bwd URG Flags', ' Fwd Header Length',
' Min Packet Length', ' Max Packet Length', ' Packet Length Mean',
' Packet Length Std', ' Packet Length Variance', 'FIN Flag Count',
' SYN Flag Count', ' RST Flag Count', ' PSH Flag Count',
' ACK Flag Count', ' URG Flag Count', ' CWE Flag Count',
' ECE Flag Count', ' Down/Up Ratio', ' Average Packet Size',
' Avg Fwd Segment Size', ' Avg Bwd Segment Size',
'Bwd Avg Bulk Rate', 'Subflow Fwd Packets', ' Subflow Fwd Bytes',
' Active Std', ' Active Max', ' Active Min', 'Idle Mean',
' Idle Std', ' Idle Max', ' Idle Min', 'SimillarHTTP', ' Inbound',
print(df[col].value_counts(dropna=False,normalize=True).head())
df[col].replace(np.inf,-1,inplace=True)
return df
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 3/51
6/20/22, 12:53 PM Copy_of_DDoS
if(df[col].dtype != object):
print("*"*20)
print("Column: ",col)
IsInt = False
mx = df[col].max()
mn = df[col].min()
if not np.isfinite(df[col]).all():
df = Pre_Process_data(df,col)
asint = df[col].fillna(0).astype(np.int64)
result = (df[col]-asint)
result = result.sum()
IsInt = True
if IsInt:
if mn>=0:
if mx<255:
df[col] = df[col].astype(np.uint8)
elif mx<65535:
df[col] = df[col].astype(np.uint16)
elif mx<4294967295:
df[col] = df[col].astype(np.uint32)
else:
df[col] = df[col].astype(np.uint64)
else:
df[col] = df[col].astype(np.int8)
df[col] = df[col].astype(np.int16)
df[col] = df[col].astype(np.int32)
df[col] = df[col].astype(np.int64)
else:
df[col] = df[col].astype(np.float32)
print("*"*20)
return df
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 4/51
6/20/22, 12:53 PM Copy_of_DDoS
In [12]: df = reduce_mem_usage(df)
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 5/51
6/20/22, 12:53 PM Copy_of_DDoS
********************
Column: Unnamed: 0
********************
********************
********************
********************
********************
********************
********************
********************
********************
Column: Protocol
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
0.000000e+00 0.162598
2.944000e+09 0.112522
4.580000e+08 0.102990
1.472000e+09 0.039172
2.290000e+08 0.020842
********************
********************
2.000000e+06 0.487212
1.000000e+06 0.108950
inf 0.033670
4.166667e+04 0.019860
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 7/51
6/20/22, 12:53 PM Copy_of_DDoS
6.666667e+05 0.014640
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 8/51
6/20/22, 12:53 PM Copy_of_DDoS
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 9/51
6/20/22, 12:53 PM Copy_of_DDoS
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 10/51
6/20/22, 12:53 PM Copy_of_DDoS
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 11/51
6/20/22, 12:53 PM Copy_of_DDoS
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
Column: Init_Win_bytes_forward
********************
********************
Column: Init_Win_bytes_backward
********************
********************
Column: act_data_pkt_fwd
********************
********************
Column: min_seg_size_forward
********************
********************
********************
********************
********************
********************
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 12/51
6/20/22, 12:53 PM Copy_of_DDoS
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
********************
Column: Inbound
********************
********************
Visualisation
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 13/51
6/20/22, 12:53 PM Copy_of_DDoS
In [13]: data_ = df
data = df
df[' Label'].value_counts()
sizes
Out[14]: array([39995, 39990, 39985, 39980, 39900, 39854, 39789, 39637, 39225,
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 14/51
6/20/22, 12:53 PM Copy_of_DDoS
explode = (0.3,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.3,0.2,0.1)
plt.rcParams.update({'font.size': 22})
plt.figure(figsize=(10,10))
plt.axis('equal')
plt.show()
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 15/51
6/20/22, 12:53 PM Copy_of_DDoS
In [16]: plt.figure(figsize=(40,20))
gt = g1.twinx()
In [17]: plt.figure(figsize=(40,20))
gt = g1.twinx()
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 16/51
6/20/22, 12:53 PM Copy_of_DDoS
In [18]: plt.figure(figsize=(40,16))
gt = g1.twinx()
In [19]: plt.figure(figsize=(40,16))
gt = g1.twinx()
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 17/51
6/20/22, 12:53 PM Copy_of_DDoS
In [20]: plt.figure(figsize=(20,16))
gt = g1.twinx()
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 18/51
6/20/22, 12:53 PM Copy_of_DDoS
In [21]: plt.figure(figsize=(20,16))
gt = g1.twinx()
df = df.drop(['Flow ID',' Source IP',' Source Port', ' Destination IP',' Desti
nation Port',' Timestamp','Fwd Packets/s','Flow Bytes/s','SimillarHTTP',' Labe
l'],axis=1)
X = StandardScaler().fit_transform(df)
X_norm = preprocessing.normalize(X)
In [24]: X_norm.shape
Out[25]: 1 395654
0 4346
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 19/51
6/20/22, 12:53 PM Copy_of_DDoS
In [26]: f = plt.figure(figsize=(20,15))
plt.matshow(df.corr(),fignum=f.number)
plt.xticks(range(df.shape[1]),df.columns,fontsize=10,rotation=90)
plt.yticks(range(df.shape[1]),df.columns,fontsize=10)
cb = plt.colorbar()
cb.ax.tick_params(labelsize=14)
In [26]:
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 20/51
6/20/22, 12:53 PM Copy_of_DDoS
total = 0
count=0
for i in df.columns:
for j in df.columns:
if i != j :
corr, _ = stats.pearsonr(data_[i],data_[j])
total=total+1
if corr>0.9:
count = count+1
print(count,total)
print(count/total)
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 21/51
6/20/22, 12:53 PM Copy_of_DDoS
/usr/local/lib/python3.7/dist-packages/scipy/stats/stats.py:3508: PearsonRCon
stantInputWarning: An input array is constant; the correlation coefficent is
not defined.
warnings.warn(PearsonRConstantInputWarning())
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 22/51
6/20/22, 12:53 PM Copy_of_DDoS
Person correlation between Flow Duration and Fwd IAT Total :1.000
Person correlation between Total Fwd Packets and Subflow Fwd Packets :1.000
Person correlation between Total Backward Packets and Subflow Bwd Packets :
1.000
Person correlation between Total Length of Fwd Packets and Subflow Fwd Bytes
:1.000
Person correlation between Total Length of Bwd Packets and Subflow Bwd Byte
s :1.000
Person correlation between Fwd Packet Length Max and Fwd Packet Length Min
:0.993
Person correlation between Fwd Packet Length Max and Fwd Packet Length Mean
:0.997
Person correlation between Fwd Packet Length Max and Min Packet Length :0.9
92
Person correlation between Fwd Packet Length Max and Max Packet Length :0.9
77
Person correlation between Fwd Packet Length Max and Packet Length Mean :0.
997
Person correlation between Fwd Packet Length Max and Average Packet Size :
0.993
Person correlation between Fwd Packet Length Max and Avg Fwd Segment Size :
0.997
Person correlation between Fwd Packet Length Min and Fwd Packet Length Max
:0.993
Person correlation between Fwd Packet Length Min and Fwd Packet Length Mean
:0.997
Person correlation between Fwd Packet Length Min and Min Packet Length :1.0
00
Person correlation between Fwd Packet Length Min and Max Packet Length :0.9
65
Person correlation between Fwd Packet Length Min and Packet Length Mean :0.
997
Person correlation between Fwd Packet Length Min and Average Packet Size :
0.996
Person correlation between Fwd Packet Length Min and Avg Fwd Segment Size :
0.997
Person correlation between Fwd Packet Length Mean and Fwd Packet Length Max
:0.997
Person correlation between Fwd Packet Length Mean and Fwd Packet Length Min
:0.997
Person correlation between Fwd Packet Length Mean and Min Packet Length :0.
997
Person correlation between Fwd Packet Length Mean and Max Packet Length :0.
970
Person correlation between Fwd Packet Length Mean and Packet Length Mean :
0.999
Person correlation between Fwd Packet Length Mean and Average Packet Size :
0.996
Person correlation between Fwd Packet Length Mean and Avg Fwd Segment Size
:1.000
Person correlation between Bwd Packet Length Mean and Avg Bwd Segment Size
:1.000
Person correlation between Flow IAT Mean and Flow IAT Std :0.984
Person correlation between Flow IAT Mean and Flow IAT Max :0.954
Person correlation between Flow IAT Mean and Fwd IAT Mean :0.991
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 23/51
6/20/22, 12:53 PM Copy_of_DDoS
Person correlation between Flow IAT Mean and Fwd IAT Std :0.975
Person correlation between Flow IAT Mean and Fwd IAT Max :0.954
Person correlation between Flow IAT Mean and Idle Mean :0.952
Person correlation between Flow IAT Mean and Idle Max :0.954
Person correlation between Flow IAT Std and Flow IAT Mean :0.984
Person correlation between Flow IAT Std and Flow IAT Max :0.969
Person correlation between Flow IAT Std and Fwd IAT Mean :0.986
Person correlation between Flow IAT Std and Fwd IAT Std :0.998
Person correlation between Flow IAT Std and Fwd IAT Max :0.969
Person correlation between Flow IAT Std and Idle Mean :0.978
Person correlation between Flow IAT Std and Idle Max :0.969
Person correlation between Flow IAT Std and Idle Min :0.926
Person correlation between Flow IAT Max and Flow IAT Mean :0.954
Person correlation between Flow IAT Max and Flow IAT Std :0.969
Person correlation between Flow IAT Max and Fwd IAT Mean :0.968
Person correlation between Flow IAT Max and Fwd IAT Std :0.974
Person correlation between Flow IAT Max and Fwd IAT Max :1.000
Person correlation between Flow IAT Max and Idle Mean :0.968
Person correlation between Flow IAT Max and Idle Max :0.998
Person correlation between Flow IAT Min and Fwd IAT Min :0.999
Person correlation between Fwd IAT Total and Flow Duration :1.000
Person correlation between Fwd IAT Mean and Flow IAT Mean :0.991
Person correlation between Fwd IAT Mean and Flow IAT Std :0.986
Person correlation between Fwd IAT Mean and Flow IAT Max :0.968
Person correlation between Fwd IAT Mean and Fwd IAT Std :0.985
Person correlation between Fwd IAT Mean and Fwd IAT Max :0.969
Person correlation between Fwd IAT Mean and Idle Mean :0.963
Person correlation between Fwd IAT Mean and Idle Max :0.968
Person correlation between Fwd IAT Std and Flow IAT Mean :0.975
Person correlation between Fwd IAT Std and Flow IAT Std :0.998
Person correlation between Fwd IAT Std and Flow IAT Max :0.974
Person correlation between Fwd IAT Std and Fwd IAT Mean :0.985
Person correlation between Fwd IAT Std and Fwd IAT Max :0.974
Person correlation between Fwd IAT Std and Idle Mean :0.984
Person correlation between Fwd IAT Std and Idle Max :0.973
Person correlation between Fwd IAT Std and Idle Min :0.933
Person correlation between Fwd IAT Max and Flow IAT Mean :0.954
Person correlation between Fwd IAT Max and Flow IAT Std :0.969
Person correlation between Fwd IAT Max and Flow IAT Max :1.000
Person correlation between Fwd IAT Max and Fwd IAT Mean :0.969
Person correlation between Fwd IAT Max and Fwd IAT Std :0.974
Person correlation between Fwd IAT Max and Idle Mean :0.968
Person correlation between Fwd IAT Max and Idle Max :0.998
Person correlation between Fwd IAT Min and Flow IAT Min :0.999
Person correlation between Bwd IAT Total and Bwd IAT Max :0.919
Person correlation between Bwd IAT Mean and Bwd IAT Std :0.995
Person correlation between Bwd IAT Mean and Bwd IAT Max :0.958
Person correlation between Bwd IAT Std and Bwd IAT Mean :0.995
Person correlation between Bwd IAT Std and Bwd IAT Max :0.976
Person correlation between Bwd IAT Max and Bwd IAT Total :0.919
Person correlation between Bwd IAT Max and Bwd IAT Mean :0.958
Person correlation between Bwd IAT Max and Bwd IAT Std :0.976
Person correlation between Fwd PSH Flags and RST Flag Count :1.000
Person correlation between Fwd Header Length and Fwd Header Length.1 :1.000
Person correlation between Min Packet Length and Fwd Packet Length Max :0.9
92
Person correlation between Min Packet Length and Fwd Packet Length Min :1.0
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 24/51
6/20/22, 12:53 PM Copy_of_DDoS
00
Person correlation between Min Packet Length and Fwd Packet Length Mean :0.
997
Person correlation between Min Packet Length and Max Packet Length :0.964
Person correlation between Min Packet Length and Packet Length Mean :0.997
Person correlation between Min Packet Length and Average Packet Size :0.996
Person correlation between Min Packet Length and Avg Fwd Segment Size :0.99
7
Person correlation between Max Packet Length and Fwd Packet Length Max :0.9
77
Person correlation between Max Packet Length and Fwd Packet Length Min :0.9
65
Person correlation between Max Packet Length and Fwd Packet Length Mean :0.
970
Person correlation between Max Packet Length and Min Packet Length :0.964
Person correlation between Max Packet Length and Packet Length Mean :0.975
Person correlation between Max Packet Length and Average Packet Size :0.969
Person correlation between Max Packet Length and Avg Fwd Segment Size :0.97
0
Person correlation between Packet Length Mean and Fwd Packet Length Max :0.
997
Person correlation between Packet Length Mean and Fwd Packet Length Min :0.
997
Person correlation between Packet Length Mean and Fwd Packet Length Mean :
0.999
Person correlation between Packet Length Mean and Min Packet Length :0.997
Person correlation between Packet Length Mean and Max Packet Length :0.975
Person correlation between Packet Length Mean and Average Packet Size :0.99
7
Person correlation between Packet Length Mean and Avg Fwd Segment Size :0.9
99
Person correlation between RST Flag Count and Fwd PSH Flags :1.000
Person correlation between Average Packet Size and Fwd Packet Length Max :
0.993
Person correlation between Average Packet Size and Fwd Packet Length Min :
0.996
Person correlation between Average Packet Size and Fwd Packet Length Mean :
0.996
Person correlation between Average Packet Size and Min Packet Length :0.996
Person correlation between Average Packet Size and Max Packet Length :0.969
Person correlation between Average Packet Size and Packet Length Mean :0.99
7
Person correlation between Average Packet Size and Avg Fwd Segment Size :0.
996
Person correlation between Avg Fwd Segment Size and Fwd Packet Length Max :
0.997
Person correlation between Avg Fwd Segment Size and Fwd Packet Length Min :
0.997
Person correlation between Avg Fwd Segment Size and Fwd Packet Length Mean
:1.000
Person correlation between Avg Fwd Segment Size and Min Packet Length :0.99
7
Person correlation between Avg Fwd Segment Size and Max Packet Length :0.97
0
Person correlation between Avg Fwd Segment Size and Packet Length Mean :0.9
99
Person correlation between Avg Fwd Segment Size and Average Packet Size :0.
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 25/51
6/20/22, 12:53 PM Copy_of_DDoS
996
Person correlation between Avg Bwd Segment Size and Bwd Packet Length Mean
:1.000
Person correlation between Fwd Header Length.1 and Fwd Header Length :1.000
Person correlation between Subflow Fwd Packets and Total Fwd Packets :1.000
Person correlation between Subflow Fwd Bytes and Total Length of Fwd Packets
:1.000
Person correlation between Subflow Bwd Packets and Total Backward Packets :
1.000
Person correlation between Subflow Bwd Bytes and Total Length of Bwd Packet
s :1.000
Person correlation between Idle Mean and Flow IAT Mean :0.952
Person correlation between Idle Mean and Flow IAT Std :0.978
Person correlation between Idle Mean and Flow IAT Max :0.968
Person correlation between Idle Mean and Fwd IAT Mean :0.963
Person correlation between Idle Mean and Fwd IAT Std :0.984
Person correlation between Idle Mean and Fwd IAT Max :0.968
Person correlation between Idle Max and Flow IAT Mean :0.954
Person correlation between Idle Max and Flow IAT Std :0.969
Person correlation between Idle Max and Flow IAT Max :0.998
Person correlation between Idle Max and Fwd IAT Mean :0.968
Person correlation between Idle Max and Fwd IAT Std :0.973
Person correlation between Idle Max and Fwd IAT Max :0.998
Person correlation between Idle Min and Flow IAT Std :0.926
Person correlation between Idle Min and Fwd IAT Std :0.933
148 6006
0.024642024642024644
mean_vec = np.mean(X,axis=0)
cov_mat = (X-mean_vec).T.dot((X-mean_vec))/(X.shape[0]-1)
Covarience matrix
0.05059552]
0.08154645]
-0.03082452]
...
-0.03363851]
-0.06401936]
1.0000025 ]]
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 26/51
6/20/22, 12:53 PM Copy_of_DDoS
Hypothesis Testing
alpha = 0.05
print(" P value is "+str(p))
if p > alpha :
else:
P value is 4.0030002619486493e-66
alpha = 0.05
print(" P value is "+str(p))
if p > alpha :
else:
P value is 1.0
alpha = 0.05
print(" P value is "+str(p))
if p > alpha :
else:
P value is 0.0
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 27/51
6/20/22, 12:53 PM Copy_of_DDoS
Out[32]:
Protocol 0 6 17
0 76 72324 327121
1 0 479 0
T Test
In [33]: from scipy.stats import ttest_ind
print(score)
Ttest_indResult(statistic=0.11945494497236958, pvalue=0.9049149630747436)
X_std = StandardScaler().fit_transform(df)
mean_vec = np.mean(X,axis= 0)
cov_mat = (X-mean_vec).T.dot((X-mean_vec))/(X.shape[0]-1)
Covarience matrix
0.05059552]
0.08154645]
-0.03082452]
...
-0.03363851]
-0.06401936]
1.0000025 ]]
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 28/51
6/20/22, 12:53 PM Copy_of_DDoS
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 29/51
6/20/22, 12:53 PM Copy_of_DDoS
Eigen Vector
0. +0.j 0. +0.j]
0. +0.j 0. +0.j]
0. +0.j 0. +0.j]
...
0. +0.j 0. +0.j]
0. +0.j 0. +0.j]
0. +0.j 0. +0.j]]
Eigen Values
[ 1.42033356e+01+0.00000000e+00j 7.72649536e+00+0.00000000e+00j
6.97276476e+00+0.00000000e+00j 3.44372591e+00+0.00000000e+00j
3.36559607e+00+0.00000000e+00j 3.06290611e+00+0.00000000e+00j
2.73200811e+00+0.00000000e+00j 2.16244583e+00+0.00000000e+00j
2.04284143e+00+0.00000000e+00j 1.98999267e+00+0.00000000e+00j
1.92553259e+00+0.00000000e+00j 1.87241124e+00+0.00000000e+00j
1.57834156e+00+0.00000000e+00j 1.31971871e+00+0.00000000e+00j
1.20821059e+00+0.00000000e+00j 6.46386672e-01+0.00000000e+00j
7.93066087e-01+0.00000000e+00j 1.04245104e+00+0.00000000e+00j
1.00743115e+00+0.00000000e+00j 9.98571864e-01+0.00000000e+00j
9.40046537e-01+0.00000000e+00j 8.64781483e-01+0.00000000e+00j
8.86464711e-01+0.00000000e+00j 5.41688756e-01+0.00000000e+00j
4.35892964e-01+0.00000000e+00j 3.35441715e-01+0.00000000e+00j
2.69710392e-01+0.00000000e+00j 4.63365483e-01+0.00000000e+00j
3.84410884e-01+0.00000000e+00j 2.14326099e-01+0.00000000e+00j
2.07266827e-01+0.00000000e+00j 8.51670083e-02+0.00000000e+00j
6.95002450e-02+0.00000000e+00j 5.28876705e-02+0.00000000e+00j
3.53399844e-02+0.00000000e+00j 3.39899391e-02+0.00000000e+00j
2.15243164e-02+0.00000000e+00j 1.62337338e-02+0.00000000e+00j
1.43799765e-02+0.00000000e+00j 1.03590900e-02+0.00000000e+00j
6.89319475e-03+0.00000000e+00j 3.29125760e-03+0.00000000e+00j
2.65339198e-03+0.00000000e+00j 2.00362749e-03+0.00000000e+00j
1.30937508e-03+0.00000000e+00j 1.38314696e-03+0.00000000e+00j
1.35270413e-03+0.00000000e+00j 1.09949040e-03+0.00000000e+00j
8.90704310e-04+0.00000000e+00j 6.29806195e-04+0.00000000e+00j
5.03692830e-04+0.00000000e+00j 3.76318337e-04+0.00000000e+00j
2.14122590e-04+0.00000000e+00j 3.15230787e-04+0.00000000e+00j
1.20002518e-04+0.00000000e+00j 9.35033083e-05+0.00000000e+00j
1.66480243e-05+0.00000000e+00j 7.56703066e-06+0.00000000e+00j
1.54407558e-16+0.00000000e+00j 7.59839399e-17+6.76312141e-17j
7.59839399e-17-6.76312141e-17j 9.16402231e-17+0.00000000e+00j
-5.44726130e-17+1.81967420e-17j -5.44726130e-17-1.81967420e-17j
-8.25377906e-19+0.00000000e+00j 2.01016260e-17+0.00000000e+00j
0.00000000e+00+0.00000000e+00j 0.00000000e+00+0.00000000e+00j
0.00000000e+00+0.00000000e+00j 0.00000000e+00+0.00000000e+00j
0.00000000e+00+0.00000000e+00j 0.00000000e+00+0.00000000e+00j
0.00000000e+00+0.00000000e+00j 0.00000000e+00+0.00000000e+00j
0.00000000e+00+0.00000000e+00j 0.00000000e+00+0.00000000e+00j
0.00000000e+00+0.00000000e+00j 0.00000000e+00+0.00000000e+00j]
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 30/51
6/20/22, 12:53 PM Copy_of_DDoS
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 31/51
6/20/22, 12:53 PM Copy_of_DDoS
eig_pairs.sort(key=lambda x:[0],reverse=True)
for i, j in enumerate(eig_pairs):
print(i,j[0])
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 32/51
6/20/22, 12:53 PM Copy_of_DDoS
0 14.203335635459023
1 7.72649536453649
2 6.972764758518834
3 3.4437259145677888
4 3.3655960738401918
5 3.0629061126132973
6 2.732008106736874
7 2.162445831484391
8 2.0428414264157237
9 1.989992674733237
10 1.9255325868241058
11 1.872411238759887
12 1.5783415578920403
13 1.3197187145205351
14 1.2082105909293108
15 0.6463866715041711
16 0.7930660872085342
17 1.0424510364507815
18 1.0074311525518724
19 0.9985718638465273
20 0.9400465373681084
21 0.8647814834581833
22 0.8864647110533768
23 0.5416887563792396
24 0.4358929638916721
25 0.3354417151908467
26 0.2697103922117089
27 0.46336548291789303
28 0.38441088408307755
29 0.21432609904951572
30 0.20726682701012103
31 0.0851670083082053
32 0.06950024499568283
33 0.05288767048344181
34 0.035339984422644515
35 0.03398993913758611
36 0.02152431638474884
37 0.01623373382139512
38 0.014379976511664358
39 0.010359090026185399
40 0.006893194750722876
41 0.0032912575989725934
42 0.0026533919781355636
43 0.002003627487092827
44 0.0013093750777122082
45 0.00138314696307466
46 0.001352704129549723
47 0.0010994903982587496
48 0.0008907043103310611
49 0.0006298061948454105
50 0.0005036928300685212
51 0.0003763183374628182
52 0.00021412258994108418
53 0.0003152307866004254
54 0.00012000251762704192
55 9.350330830589868e-05
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 33/51
6/20/22, 12:53 PM Copy_of_DDoS
56 1.6648024281554622e-05
57 7.5670306649343e-06
58 1.5440755813161445e-16
59 1.0172286002046191e-16
60 1.0172286002046191e-16
61 9.164022310312424e-17
62 5.743158529421716e-17
63 5.743158529421716e-17
64 8.253779064973264e-19
65 2.0101625965361303e-17
66 0.0
67 0.0
68 0.0
69 0.0
70 0.0
71 0.0
72 0.0
73 0.0
74 0.0
75 0.0
76 0.0
77 0.0
In [37]: eig_pairs[0][1]
Explained Variance
In [38]: tot=sum(eig_vals)
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 34/51
6/20/22, 12:53 PM Copy_of_DDoS
plt.figure(figsize=(30,30))
plt.bar(range(78),var_exp,alpha=0.5,align="center",label="individual explain
ed variance")
plt.xlabel("Principal Component")
plt.legend(loc="best")
plt.tight_layout()
/usr/local/lib/python3.7/dist-packages/matplotlib/transforms.py:789: ComplexW
arning: Casting complex values to real discards the imaginary part
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 35/51
6/20/22, 12:53 PM Copy_of_DDoS
In [40]: matrix_w=np.hstack((eig_pairs[0][1].reshape(78,1),eig_pairs[1][1].reshape(78,1
)))
print("Matrix W: \n",matrix_w)
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 36/51
6/20/22, 12:53 PM Copy_of_DDoS
Matrix W:
[[-1.36443195e-01+0.j -6.22649911e-02+0.j]
[ 1.85219855e-01+0.j 2.75809848e-02+0.j]
[-2.22020551e-01+0.j 3.69437371e-03+0.j]
[-9.58716391e-04+0.j 2.68948940e-03+0.j]
[-5.31782329e-02+0.j 2.93186133e-01+0.j]
[ 2.64032641e-02+0.j 2.46285459e-03+0.j]
[-2.03674472e-02+0.j 2.70705165e-01+0.j]
[ 1.59768349e-01+0.j 9.77303616e-02+0.j]
[ 1.60797920e-01+0.j 8.02322353e-02+0.j]
[ 1.62009163e-01+0.j 8.43531223e-02+0.j]
[-1.76346755e-02+0.j 1.23578115e-01+0.j]
[-3.93283262e-02+0.j 3.23065708e-01+0.j]
[-5.17699479e-03+0.j 4.97207168e-02+0.j]
[-3.51036243e-02+0.j 2.90787160e-01+0.j]
[-4.01112340e-02+0.j 2.93186523e-01+0.j]
[ 9.05963286e-02+0.j -9.91649673e-03+0.j]
[-2.30173147e-01+0.j -2.48931229e-02+0.j]
[-2.32136421e-01+0.j -1.55999806e-02+0.j]
[-2.37368521e-01+0.j 6.69290113e-03+0.j]
[-4.04002103e-03+0.j 9.35728608e-04+0.j]
[-2.21882711e-01+0.j 3.16978406e-03+0.j]
[-2.34409551e-01+0.j -2.02868225e-02+0.j]
[-2.33253515e-01+0.j -1.11587362e-02+0.j]
[-2.37167823e-01+0.j 5.91666077e-03+0.j]
[-3.98763123e-03+0.j 8.68998842e-04+0.j]
[-9.46949890e-02+0.j 9.76610626e-02+0.j]
[-8.65356604e-02+0.j 3.31227630e-02+0.j]
[-8.89126885e-02+0.j 4.09135207e-02+0.j]
[-9.42693534e-02+0.j 6.42026251e-02+0.j]
[-4.06034182e-02+0.j 4.41208073e-03+0.j]
[-1.14390720e-02+0.j 1.76059773e-02+0.j]
[ 0.00000000e+00+0.j 0.00000000e+00+0.j]
[ 0.00000000e+00+0.j 0.00000000e+00+0.j]
[ 0.00000000e+00+0.j 0.00000000e+00+0.j]
[-4.67306418e-03+0.j 3.33247609e-04+0.j]
[ 2.59700180e-04+0.j -1.17091172e-03+0.j]
[-7.03104829e-03+0.j -3.50336382e-04+0.j]
[ 1.60830839e-01+0.j 7.96550072e-02+0.j]
[ 1.49144245e-01+0.j 1.61194665e-01+0.j]
[ 1.61128010e-01+0.j 9.31440822e-02+0.j]
[-3.29054579e-02+0.j 2.78601320e-01+0.j]
[-2.52655579e-02+0.j 3.05088181e-01+0.j]
[ 0.00000000e+00+0.j 0.00000000e+00+0.j]
[-1.69732333e-03+0.j 8.65537371e-04+0.j]
[-1.14390720e-02+0.j 1.76059773e-02+0.j]
[ 0.00000000e+00+0.j 0.00000000e+00+0.j]
[-1.84399501e-01+0.j -3.28368343e-02+0.j]
[-1.43509487e-02+0.j 2.27161780e-02+0.j]
[-9.28137823e-03+0.j 1.66573978e-02+0.j]
[ 0.00000000e+00+0.j 0.00000000e+00+0.j]
[-2.59693125e-02+0.j 6.79906605e-02+0.j]
[ 1.58066480e-01+0.j 8.85095299e-02+0.j]
[ 1.62009163e-01+0.j 8.43531223e-02+0.j]
[-3.51036243e-02+0.j 2.90787160e-01+0.j]
[-4.67306418e-03+0.j 3.33247609e-04+0.j]
[ 0.00000000e+00+0.j 0.00000000e+00+0.j]
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 37/51
6/20/22, 12:53 PM Copy_of_DDoS
[ 0.00000000e+00+0.j 0.00000000e+00+0.j]
[ 0.00000000e+00+0.j 0.00000000e+00+0.j]
[ 0.00000000e+00+0.j 0.00000000e+00+0.j]
[ 0.00000000e+00+0.j 0.00000000e+00+0.j]
[ 0.00000000e+00+0.j 0.00000000e+00+0.j]
[-9.58716391e-04+0.j 2.68948940e-03+0.j]
[ 2.64032641e-02+0.j 2.46285459e-03+0.j]
[-5.31782329e-02+0.j 2.93186133e-01+0.j]
[-2.03674472e-02+0.j 2.70705165e-01+0.j]
[-1.42732184e-01+0.j 7.40240838e-02+0.j]
[-1.44776217e-02+0.j 4.07840410e-02+0.j]
[ 1.57499410e-02+0.j -2.33859330e-03+0.j]
[-2.13709539e-02+0.j -3.97657590e-03+0.j]
[-3.40706178e-02+0.j 4.25100042e-02+0.j]
[-4.45546699e-02+0.j 3.16233496e-02+0.j]
[-4.66047215e-02+0.j 4.08394136e-02+0.j]
[-1.70703123e-02+0.j 3.08096393e-02+0.j]
[-2.33357423e-01+0.j 7.66424312e-03+0.j]
[-1.95649654e-01+0.j -1.47155175e-02+0.j]
[-2.36660758e-01+0.j 1.15920396e-03+0.j]
[-2.15315572e-01+0.j 1.45766138e-02+0.j]
[ 2.79204962e-02+0.j -1.08012869e-01+0.j]]
In [41]: Y=X_std.dot(matrix_w)
...,
[ -1.93651008+0.j, -1.22422617+0.j],
[ -2.03393674+0.j, -1.26868622+0.j],
[-11.84870487+0.j, -1.02759442+0.j]])
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 38/51
6/20/22, 12:53 PM Copy_of_DDoS
pca=PCA().fit(X_std)
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlim(0,78,1)
plt.xlabel("Number of components")
In [43]: sklearn_pca=PCA(n_components=30)
Y_sklearn=sklearn_pca.fit_transform(X_std)
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 39/51
6/20/22, 12:53 PM Copy_of_DDoS
In [44]: pca=PCA(n_components=2)
principalComponents=pca.fit_transform(X_norm)
plt.figure(figsize=(16,16))
g1=sns.scatterplot(principalComponents[:,0],principalComponents[:,1],s=100,hue
=data_[" Label"],cmap="Spectral",alpha=0.7)
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 40/51
6/20/22, 12:53 PM Copy_of_DDoS
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarni
ng: Pass the following variables as keyword args: x, y. From version 0.12, th
e only valid positional argument will be `data`, and passing other arguments
without an explicit keyword will result in an error or misinterpretation.
FutureWarning
/usr/local/lib/python3.7/dist-packages/google/colab/_event_manager.py:28: Use
rWarning: Creating legend with loc="best" can be slow with large amounts of d
ata.
func(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/IPython/core/pylabtools.py:125: UserWa
rning: Creating legend with loc="best" can be slow with large amounts of dat
a.
fig.canvas.print_figure(bytes_io, **kw)
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 41/51
6/20/22, 12:53 PM Copy_of_DDoS
y=LabelEncoder().fit_transform(y)
oversample=SMOTE()
X,y=oversample.fit_resample(X,y)
counter=Counter(y)
for k, v in counter.items():
per=v/len(y)*100
pyplot.bar(counter.keys(),counter.values())
pyplot.show()
In [46]: y.shape
Out[46]: (479940,)
In [47]: X.shape
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 42/51
6/20/22, 12:53 PM Copy_of_DDoS
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state
=1)
model=LogisticRegression(max_iter=440000)
ovr=OneVsRestClassifier(model)
ovr.fit(X_train,y_train)
y_pred=ovr.predict(X_test)
print(classification_report(y_test,y_pred))
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 43/51
6/20/22, 12:53 PM Copy_of_DDoS
classifier=DecisionTreeClassifier()
classifier.fit(X_train,y_train)
y_pred=classifier.predict(X_test)
print(classification_report(y_test,y_pred))
In [ ]: print(accuracy_score(y_test,y_pred))
0.7284327207567612
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 44/51
6/20/22, 12:53 PM Copy_of_DDoS
classifier.fit(X_train,y_train)
Y_pred = classifier.predict(X_test)
print(classification_report(y_test,Y_pred))
In [ ]: print(confusion_matrix(y_test,y_pred))
[[9727 2 1 0 4 0 0 1 0 1 5 0]
[ 7 45 1 3 9951 2 0 0 4 0 0 11]
[ 0 9 0 2 0 3800 6341 1 1 0 2 0]
[ 0 0 0 0 0 0 0 0 1 8060 1922 0]
[ 0 1 0 1 3 1 0 1 2 0 0 9945]]
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 45/51
6/20/22, 12:53 PM Copy_of_DDoS
In [ ]: skplt.metrics.plot_confusion_matrix(y_test,y_pred,figsize=(16,16))
Feature Scaling
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 46/51
6/20/22, 12:53 PM Copy_of_DDoS
import pandas as pd
x=df.iloc[:,1:3].values
min_max_scaler=preprocessing.MinMaxScaler(feature_range=(0,1))
x_after_min_max_scaler=min_max_scaler.fit_transform(x)
Standardisation=preprocessing.StandardScaler()
x_after_Standardisation=Standardisation.fit_transform(x)
[ 17 1]
[ 17 44]
...
[ 6 1]
[ 6 1]
[ 6 112584179]]
[1.00000000e+00 8.33333646e-09]
[1.00000000e+00 3.66666804e-07]
...
[3.52941176e-01 8.33333646e-09]
[3.52941176e-01 8.33333646e-09]
[3.52941176e-01 9.38201843e-01]]
after standardisation :
[[ 0.47191611 -0.22334861]
[ 0.47191611 -0.22334861]
[ 0.47191611 -0.22334626]
...
[-2.11674636 -0.22334861]
[-2.11674636 -0.22334861]
[-2.11674636 5.91918411]]
import pandas as pd
In [60]: y.shape
Out[60]: (479940,)
In [61]: print(X.shape)
(479940, 78)
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 47/51
6/20/22, 12:53 PM Copy_of_DDoS
X_train,X_test,y_train,y_test =train_test_split(X,y,test_size=0.20)
scaler =StandardScaler()
scaler.fit(X_train)
X_train=scaler.transform(X_train)
X_test=scaler.transform(X_test)
extra_tree =ExtraTreeClassifier(random_state=0)
cls=BaggingClassifier(extra_tree,random_state=0).fit(X_train,y_train)
cls.score(X_test,y_test)
Out[65]: 0.7575738633995917
In [66]: y_pred=cls.predict(X_test)
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))
[[7988 1 0 0 0 0 0 0 0 0 0 0]
[ 5 31 1 0 7844 1 0 0 3 1 1 14]
[ 0 4 0 1 1 2560 5379 0 1 0 1 0]
[ 1 0 0 0 0 0 0 0 0 6624 1465 0]
[ 0 0 0 0 0 0 0 0 0 0 0 8139]]
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 48/51
6/20/22, 12:53 PM Copy_of_DDoS
X,y=make_classification(n_samples=1000,n_features=11,n_informative=3,n_redunda
nt=0,n_repeated=0,n_classes=2,random_state=0,shuffle=False)
forest=ExtraTreesClassifier(n_estimators=250,random_state=0)
forest.fit(X,y)
importances =forest.feature_importances_
axis=0)
indices=np.argsort(importances)[::-1]
print("Feature ranking:")
# for f in range(X.shape[1]):
plt.figure()
plt.title("Feature importances")
plt.bar(range(X.shape[1]),importances[indices],
color="r",yerr=std[indices],align="center")
plt.xticks(range(X.shape[1]),indices)
plt.xlim([-1,x.shape[1]])
plt.show()
Feature ranking:
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 49/51
6/20/22, 12:53 PM Copy_of_DDoS
def get_dataset():
X,y=make_classification(n_samples=1000,n_features=20,n_informative=15,n_redu
ndant=5,random_state=4)
return X,y
def get_models():
models=dict()
n_trees=[10,50,100,500,1000,5000]
for n in n_trees:
models[str(n)]=ExtraTreesClassifier(n_estimators=n)
return models
def evaluate_model(model,X,y):
cv=RepeatedStratifiedKFold(n_splits=10,n_repeats=3,random_state=1)
scores=cross_val_score(model,X,y,scoring="accuracy",cv=cv,n_jobs=-1)
return scores
X,y=get_dataset()
models =get_models()
results,names =list(),list()
scores=evaluate_model(model,X,y)
results.append(scores)
names.append(name)
pyplot.boxplot(results,labels=names,showmeans=True)
pyplot.show()
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 50/51
6/20/22, 12:53 PM Copy_of_DDoS
/usr/local/lib/python3.7/dist-packages/joblib/externals/loky/process_executo
r.py:705: UserWarning: A worker stopped while some jobs were given to the exe
cutor. This can be caused by a too short worker timeout or by a memory leak.
In [ ]:
localhost:8888/nbconvert/html/Downloads/Copy_of_DDoS.ipynb?download=false 51/51