DL Lab2
DL Lab2
In [1]: # Aim : Study Panda library, matplotlib library and seaborn library for data vis
23
33
13
[1 2 3 4 5 6 7 8 9]
[ 1. 2. 3. 4. 5. 6. 7. 8. 9. 10.]
[0. 0. 0. 0. 0.]
[[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]]
[1. 1. 1. 1. 1.]
[[1. 0. 0.]
[0. 1. 0.]
[0. 0. 1.]]
Random
In [3]: print(np.random.rand())
print(np.random.rand(5,5))
print(np.random.randn(5,5))
print(np.random.randint(1,5))
print(np.random.randint(1,5,[5,5]))
0.4811542495722224
[[0.81600037 0.68605224 0.42616075 0.45677208 0.95968575]
[0.93088032 0.33064225 0.84204006 0.46428493 0.86596586]
[0.50502598 0.49812489 0.85992254 0.53302527 0.03854323]
[0.84521242 0.28999238 0.43269991 0.63489068 0.37950397]
[0.36523548 0.3142058 0.78013427 0.9078802 0.56435694]]
[[ 1.03521258 -0.76082373 0.82305377 -0.53192129 0.43914877]
[-1.15339486 0.35871618 1.3517111 2.46412224 1.29976723]
[-0.12778346 -0.61315538 0.53263178 1.44915474 0.20682747]
[ 0.0195015 -1.3908235 0.31817992 0.52484251 -1.13534305]
[-0.79089533 1.04359843 0.36620065 0.34072973 -0.84415667]]
3
[[1 1 4 3 3]
[4 2 3 2 4]
[4 4 3 4 4]
[3 3 1 4 3]
[2 1 4 2 3]]
Array Methods
In [4]: data = [[1,4,3,5],[5,5,7,9]]
arr = np.array(data)
In [5]: arr
In [6]: print(arr.dtype)
print(arr.shape)
int32
(2, 4)
In [7]: a = np.random.rand(100)
b = np.random.randn(10)
In [8]: a
Out[8]: array([0.0667193 , 0.24996791, 0.18991164, 0.68245125, 0.69585948,
0.38049153, 0.38155562, 0.9862294 , 0.69594282, 0.77594545,
0.630243 , 0.79207447, 0.40428319, 0.21133484, 0.86509552,
0.50544835, 0.0150724 , 0.62275523, 0.98706215, 0.06596928,
0.08103634, 0.69451973, 0.2302759 , 0.96136337, 0.89402532,
0.63739668, 0.63607801, 0.84750941, 0.01950963, 0.62206543,
0.81532596, 0.07627552, 0.45184363, 0.19838202, 0.52181347,
0.82862354, 0.93121004, 0.23603411, 0.64326166, 0.54478935,
0.18237217, 0.6095559 , 0.76317213, 0.40650933, 0.93058601,
0.02814542, 0.11439223, 0.55123581, 0.68336245, 0.42704194,
0.89916515, 0.71809763, 0.94386994, 0.4633378 , 0.03994971,
0.87034481, 0.85223003, 0.2285538 , 0.98959497, 0.56410605,
0.07144686, 0.12718115, 0.02560205, 0.77798617, 0.0258084 ,
0.12678993, 0.21281529, 0.12848544, 0.34730102, 0.09806135,
0.46669377, 0.28006002, 0.02748664, 0.03385199, 0.27847907,
0.49061412, 0.02777332, 0.81850325, 0.31134813, 0.87079945,
0.92933361, 0.45399394, 0.58539194, 0.0412505 , 0.57143901,
0.36690098, 0.93040279, 0.35587989, 0.49084104, 0.59497039,
0.5924753 , 0.40145439, 0.35390004, 0.51806492, 0.06584004,
0.54030551, 0.62285484, 0.42619897, 0.41632971, 0.7137144 ])
In [9]: b
In [10]: a.min()
Out[10]: 0.015072397162660622
In [11]: a.max()
Out[11]: 0.989594972948553
In [12]: a.mean()
Out[12]: 0.4786000487777759
In [13]: b.min(),b.max()
In [14]: b.mean()
Out[14]: 0.25195704290761534
In [15]: c = np.random.rand(4)
c
In [16]: c.argmin()
Out[16]: 0
In [17]: c.argmax()
Out[17]: 1
In [18]: d = c.reshape(2,2)
print(d.shape)
d
(2, 2)
Out[18]: array([[0.10000233, 0.8027446 ],
[0.69281857, 0.46018424]])
In [19]: c.reshape(4)
In [20]: c.reshape(2,2)
In [21]: c.flatten()
In [22]: e = np.array([0,1])
In [23]: e
In [24]: f = np.array([0.])
f
Out[24]: array([0.])
In [25]: e.dtype
Out[25]: dtype('int32')
In [26]: f.dtype
Out[26]: dtype('float64')
In [28]: a
In [29]: a[:]
Out[29]: array([0, 1, 2, 3, 4])
In [30]: a[2]
Out[30]: 2
In [31]: a[2:4]
In [32]: a[-1]
Out[32]: 4
In [33]: a[2:]
In [34]: a[:2]
In [35]: a[:2]=10
In [36]: a
In [37]: a = np.arange(10)
In [38]: a
In [39]: b = a[2:7]
In [40]: b
In [41]: b[2:5]=10
In [42]: b
In [43]: a
In [44]: c = a.copy()
In [45]: c
Out[45]: array([ 0, 1, 2, 3, 10, 10, 10, 7, 8, 9])
In [46]: c[4:7]=0
In [47]: c
2d
In [48]: a = np.random.rand(5,5)
In [49]: a
In [50]: a[0]
In [51]: a[:,0]
In [52]: a[1:4,1:4]=10
In [53]: a
In [55]: a
Out[55]: array([[-0.52943346, -1.3563547 , -0.24950645, 0.36018632, 1.2761073 ],
[ 1.79306115, -0.40990395, -0.96592178, -1.05521176, 1.35609131],
[-1.22044469, 0.67669323, -1.79852855, -1.97324236, -1.08481568],
[-0.39202164, 0.46372706, -0.56059791, 0.6952029 , 0.47310958],
[-1.23361362, -1.61172041, -0.27054277, -0.83542174, 1.12427138]])
In [56]: a>0
In [57]: a[a>0]=10
a
Operator
In [58]: a = np.array([0,2,4,6,8])
b = np.array([1,3,5,7,9])
c=a+b
c
In [59]: d = b - 1
d
In [60]: c = d==a
c
In [61]: c.any()
Out[61]: True
In [62]: d = np.array([0,5,6,9,9])
In [63]: c = d==a
c
In [64]: c.any()
Out[64]: True
In [65]: c.all()
Out[65]: False
functions
In [66]: a = np.arange(0,1,0.1)
In [67]: a
Out[67]: array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
In [68]: np.square(a)
Out[68]: array([0. , 0.01, 0.04, 0.09, 0.16, 0.25, 0.36, 0.49, 0.64, 0.81])
In [69]: np.sin(a)
In [70]: np.exp(a)
C = np.dot(A, B)
print(C)
[[19 22]
[43 50]]
print("Eigenvalues:")
print(eigenvalues)
print("\nEigenvectors:")
print(eigenvectors)
Eigenvalues:
[-0.37228132 5.37228132]
Eigenvectors:
[[-0.82456484 -0.41597356]
[ 0.56576746 -0.90937671]]
In [73]: # Define a matrix
A = np.array([[1, 2, 3], [4, 5, 6]])
# Compute SVD
U, S, V = np.linalg.svd(A)
print("U:")
print(U)
print("\nS:")
print(S)
print("\nV:")
print(V)
U:
[[-0.3863177 0.92236578]
[-0.92236578 -0.3863177 ]]
S:
[9.508032 0.77286964]
V:
[[-0.42866713 -0.56630692 -0.7039467 ]
[-0.80596391 -0.11238241 0.58119908]
[ 0.40824829 -0.81649658 0.40824829]]
Pandas
In [74]: import pandas as pd
Series
In [75]: pd.Series([6,2,3])
Out[75]: 0 6
1 2
2 3
dtype: int64
In [76]: pd.Series(data=[6,2,3],index=['abc','def','ghi'])
Out[76]: abc 6
def 2
ghi 3
dtype: int64
In [77]: a = pd.Series(data=[6,2,3],index=['abc','def','ghi'])
a['abc']
Out[77]: 6
In [78]: pd.Series([6,2,3],['abc','def','ghi'])
Out[78]: abc 6
def 2
ghi 3
dtype: int64
In [79]: pd.Series(np.array([6,2,3]),['abc','def','ghi'])
Out[79]: abc 6
def 2
ghi 3
dtype: int32
In [81]: pd.Series(dict)
Out[81]: abc 6
def 2
ghi 3
dtype: int64
In [82]: a = [2,3,5]
In [83]: b = pd.Series([sum,min,max])
In [84]: b
In [85]: b[0](a)
Out[85]: 10
In [86]: b[1](a)
Out[86]: 2
In [87]: b[2](a)
Out[87]: 5
In [88]: a = {'a':5,'b':3,'c':10,'d':20}
b = {'a':2,'b':3,'c':10}
In [89]: pd.Series(a)+pd.Series(b)
Out[89]: a 7.0
b 6.0
c 20.0
d NaN
dtype: float64
Dataframe
In [90]: pd.DataFrame(data=[[1,2,3],[4,5,6],[7,8,9]])
Out[90]: 0 1 2
0 1 2 3
1 4 5 6
2 7 8 9
In [91]: pd.DataFrame(data=[[1,2,3],[4,5,6],[7,8,9]],columns=['y1','y2','y3'])
Out[91]: y1 y2 y3
0 1 2 3
1 4 5 6
2 7 8 9
In [92]: pd.DataFrame(data=[[1,2,3],[4,5,6],[7,8,9]],index=['x1','x2','x3'])
Out[92]: 0 1 2
x1 1 2 3
x2 4 5 6
x3 7 8 9
In [93]: pd.DataFrame(data=[[1,2,3],[4,5,6],[7,8,9]],index=['x1','x2','x3'],columns=['y1'
Out[93]: y1 y2 y3
x1 1 2 3
x2 4 5 6
x3 7 8 9
In [94]: a = pd.DataFrame([[1,2,3],[4,5,6],[7,8,9]],['x1','x2','x3'],['y1','y2','y3'])
a
Out[94]: y1 y2 y3
x1 1 2 3
x2 4 5 6
x3 7 8 9
Indexing
In [95]: a['y1']
Out[95]: x1 1
x2 4
x3 7
Name: y1, dtype: int64
In [96]: a.y1
Out[96]: x1 1
x2 4
x3 7
Name: y1, dtype: int64
In [97]: a.loc['x1']
Out[97]: y1 1
y2 2
y3 3
Name: x1, dtype: int64
In [98]: a.iloc[0]
Out[98]: y1 1
y2 2
y3 3
Name: x1, dtype: int64
In [99]: a.loc['x1','y1']
Out[99]: 1
In [100… a.iloc[0,0]
Out[100… 1
In [101… a.loc[['x1','x3'],['y1','y3']]
Out[101… y1 y3
x1 1 3
x3 7 9
In [102… a.iloc[[0,1],[0,1]]
Out[102… y1 y2
x1 1 2
x2 4 5
In [103… a
Out[103… y1 y2 y3
x1 1 2 3
x2 4 5 6
x3 7 8 9
In [104… a['sum']=a['y1']+a['y2']+a['y3']
a
Out[104… y1 y2 y3 sum
x1 1 2 3 6
x2 4 5 6 15
x3 7 8 9 24
In [105… a.drop('y3',axis=1)
Out[105… y1 y2 sum
x1 1 2 6
x2 4 5 15
x3 7 8 24
In [106… a.drop('x2',axis=0)
Out[106… y1 y2 y3 sum
x1 1 2 3 6
x3 7 8 9 24
In [107… a>5
Out[107… y1 y2 y3 sum
In [108… a[a>5]
Out[108… y1 y2 y3 sum
Out[109… y1 y2 y3 sum
x3 7 8 9 24
Out[110… y1 y2 y3 sum
x2 4 5 6 15
In [111… a
Out[111… y1 y2 y3 sum
x1 1 2 3 6
x2 4 5 6 15
x3 7 8 9 24
In [112… a.reset_index()
0 x1 1 2 3 6
1 x2 4 5 6 15
2 x3 7 8 9 24
In [113… a.set_index('sum')
Out[113… y1 y2 y3
sum
6 1 2 3
15 4 5 6
24 7 8 9
In [114… a
Out[114… y1 y2 y3 sum
x1 1 2 3 6
x2 4 5 6 15
x3 7 8 9 24
In [115… b = a.set_index('sum')
In [116… b
Out[116… y1 y2 y3
sum
6 1 2 3
15 4 5 6
24 7 8 9
In [117… a.set_index('sum',inplace=True)
In [118… a
Out[118… y1 y2 y3
sum
6 1 2 3
15 4 5 6
24 7 8 9
Multi Index
In [119… first = [1,1,1,1,2,2,2,2]
second = [1,1,2,2,1,1,2,2]
third = [1,2,1,2,1,2,1,2]
ind = list(zip(first,second,third))
ind = pd.MultiIndex.from_tuples(ind)
In [120… ind
In [121… a = pd.DataFrame(np.random.rand(8,3),index=ind,columns=['x1','x2','x3'])
a
Out[121… x1 x2 x3
In [122… a.loc[1]
Out[122… x1 x2 x3
In [123… a.loc[1].loc[2]
Out[123… x1 x2 x3
In [124… a.loc[1].loc[2].loc[1]
Out[124… x1 0.713340
x2 0.347137
x3 0.122599
Name: 1, dtype: float64
In [126… a
Out[126… x1 x2 x3
Cleaning of data
In [127… a = pd.DataFrame(np.random.randn(5,5),'x1 x2 x3 x4 x5'.split(),'y1 y2 y3 y4 y5'.
a
Out[127… y1 y2 y3 y4 y5
In [128… c = a[a>-1]
c
Out[128… y1 y2 y3 y4 y5
In [129… c.dropna()
Out[129… y1 y2 y3 y4 y5
Out[130… y1 y2 y3 y4 y5
In [131… b.iloc[1,4]=0
b.iloc[3,3]=0
b
Out[131… y1 y2 y3 y4 y5
In [132… c = a/b
c
Out[132… y1 y2 y3 y4 y5
In [133… c.replace([np.inf,-np.inf],np.nan,inplace=True)
c
Out[133… y1 y2 y3 y4 y5
In [134… c.dropna()
Out[134… y1 y2 y3 y4 y5
In [135… c = a/b
c
Out[135… y1 y2 y3 y4 y5
In [136… c.replace([np.inf,-np.inf],np.nan).dropna()
Out[136… y1 y2 y3 y4 y5
In [137… c
Out[137… y1 y2 y3 y4 y5
In [138… c = a/b
c.replace([np.inf,-np.inf],np.nan,inplace=True)
In [139… c
Out[139… y1 y2 y3 y4 y5
In [140… c.dropna(axis=1)
Out[140… y1 y2 y3
In [141… c.fillna(c.mean())
Out[141… y1 y2 y3 y4 y5
In [143… a
0 0 STD1 SUB1 AA 10
1 1 STD1 SUB2 AB 9
2 2 STD1 SUB3 BB 8
3 3 STD2 SUB1 BB 8
4 4 STD2 SUB2 AA 10
5 5 STD2 SUB3 AA 10
6 6 STD3 SUB1 BB 8
7 7 STD3 SUB2 AB 9
8 8 STD3 SUB3 AA 10
In [144… a.to_csv('demo',index=False)
In [145… b = a.groupby('student')
In [146… b
In [147… b.describe()
Out[147… Unnamed: 4
student
In [148… a.groupby('subject').describe()
Out[148… student
count mean std min 25% 50% 75% max count mean std mi
subject
STD1 3.0 1.0 1.0 0.0 0.5 1.0 1.5 2.0 3.0 9.000000 1.000000 8.
STD2 3.0 4.0 1.0 3.0 3.5 4.0 4.5 5.0 3.0 9.333333 1.154701 8.
STD3 3.0 7.0 1.0 6.0 6.5 7.0 7.5 8.0 3.0 9.000000 1.000000 8.
In [149… a.groupby('grade').describe()
Out[149… student
count mean std min 25% 50% 75% max count mean std min
grade
SUB1 3.0 3.0 3.0 0.0 1.5 3.0 4.5 6.0 3.0 8.666667 1.154701 8.0
SUB2 3.0 4.0 3.0 1.0 2.5 4.0 5.5 7.0 3.0 9.333333 0.577350 9.0
SUB3 3.0 5.0 3.0 2.0 3.5 5.0 6.5 8.0 3.0 9.333333 1.154701 8.0
In [150… a = pd.DataFrame(np.random.randint(1,4,[3,3]))
a
Out[150… 0 1 2
0 3 2 1
1 2 3 2
2 3 2 2
In [151… b = pd.DataFrame(np.random.randint(3,6,[3,3]))
b
Out[151… 0 1 2
0 4 4 4
1 3 5 4
2 3 5 4
In [152… c = pd.DataFrame(np.random.randint(5,8,[3,3]))
c
Out[152… 0 1 2
0 5 7 7
1 6 6 5
2 7 5 5
In [153… d = pd.concat([a,b,c])
d
Out[153… 0 1 2
0 3 2 1
1 2 3 2
2 3 2 2
0 4 4 4
1 3 5 4
2 3 5 4
0 5 7 7
1 6 6 5
2 7 5 5
In [154… e = pd.concat([a,b,c],axis=1)
e
Out[154… 0 1 2 0 1 2 0 1 2
0 3 2 1 4 4 4 5 7 7
1 2 3 2 3 5 4 6 6 5
2 3 2 2 3 5 4 7 5 5
In [155… a = pd.read_csv("STUDENT_DATA.csv")
a
Out[155… student subject grade marks Unnamed: 4
0 0 STD1 SUB1 AA 10
1 1 STD1 SUB2 AB 9
2 2 STD1 SUB3 BB 8
3 3 STD2 SUB1 BB 8
4 4 STD2 SUB2 AA 10
5 5 STD2 SUB3 AA 10
6 6 STD3 SUB1 BB 8
7 7 STD3 SUB2 AB 9
8 8 STD3 SUB3 AA 10
In [156… b = a.set_index('student')
b
student
0 STD1 SUB1 AA 10
1 STD1 SUB2 AB 9
2 STD1 SUB3 BB 8
3 STD2 SUB1 BB 8
4 STD2 SUB2 AA 10
5 STD2 SUB3 AA 10
6 STD3 SUB1 BB 8
7 STD3 SUB2 AB 9
8 STD3 SUB3 AA 10
In [157… a = pd.DataFrame({'STD1':['AA','AB','BB']},index=['SUB1','SUB2','SUB3'])
a
Out[157… STD1
SUB1 AA
SUB2 AB
SUB3 BB
In [158… b = pd.DataFrame({'STD2':['BB','AA','AA']},index=['SUB1','SUB2','SUB3'])
b
Out[158… STD2
SUB1 BB
SUB2 AA
SUB3 AA
In [159… a.join(b)
SUB1 AA BB
SUB2 AB AA
SUB3 BB AA
In [161… x = np.linspace(0,15,100)
y = np.sin(x)
In [162… plt.plot(x,y,'b')
plt.xlabel('time')
plt.ylabel('amplitude')
plt.title('Sine Wave')
plt.show()
In [163… z = np.cos(x)
In [164… plt.subplot(2,1,1)
plt.plot(x,y,'--b')
plt.ylabel('amplitude')
plt.title('Sine Wave')
plt.subplot(2,1,2)
plt.plot(x,y,'-*r')
plt.xlabel('time')
plt.ylabel('amplitude')
plt.title('Cos Wave')
plt.show()
ax = fig.add_axes([0,0,1,1])
In [167… plt.hist(data)
Out[167… (array([ 3., 13., 50., 121., 211., 270., 182., 105., 36., 9.]),
array([-3.54012283, -2.88125418, -2.22238553, -1.56351688, -0.90464823,
-0.24577958, 0.41308907, 1.07195772, 1.73082638, 2.38969503,
3.04856368]),
<BarContainer object of 10 artists>)
In [168… x = np.linspace(0,5,100)
fig, axes = plt.subplots(1, 2, figsize=(10,4))
ax2 = ax1.twinx()
ax2.plot(x, np.exp(x), lw=2, color="red")
ax2.set_ylabel(r"volume $(m^3)$", fontsize=18, color="red")
for label in ax2.get_yticklabels():
label.set_color("red")
In [174… tips.head()
In [175… sns.distplot(tips['total_bill'])
C:\Users\aksha\AppData\Local\Temp\ipykernel_13848\4271412032.py:1: UserWarning:
Please adapt your code to use either `displot` (a figure-level function with
similar flexibility) or `histplot` (an axes-level function for histograms).
For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751
sns.distplot(tips['total_bill'])
Out[175… <Axes: xlabel='total_bill', ylabel='Density'>