0% found this document useful (0 votes)
18 views

DL Lab2

Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
18 views

DL Lab2

Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 38

U21EC082 - Jinhal Maheshwari

In [1]: # Aim : Study Panda library, matplotlib library and seaborn library for data vis

In [2]: import numpy as np


a = np.array([1,2,3])
b = np.array([[11,12,13],[21,22,23],[31,32,33]])
print(b[1,2])
print(b[2,2])
print(b[0][2])
print(np.arange(1,10,1))
print(np.linspace(1,10,10))
print(np.zeros(5))
print(np.zeros((5,5)))
print(np.ones((5)))
print(np.eye(3))

23
33
13
[1 2 3 4 5 6 7 8 9]
[ 1. 2. 3. 4. 5. 6. 7. 8. 9. 10.]
[0. 0. 0. 0. 0.]
[[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]]
[1. 1. 1. 1. 1.]
[[1. 0. 0.]
[0. 1. 0.]
[0. 0. 1.]]

Random
In [3]: print(np.random.rand())
print(np.random.rand(5,5))
print(np.random.randn(5,5))
print(np.random.randint(1,5))
print(np.random.randint(1,5,[5,5]))
0.4811542495722224
[[0.81600037 0.68605224 0.42616075 0.45677208 0.95968575]
[0.93088032 0.33064225 0.84204006 0.46428493 0.86596586]
[0.50502598 0.49812489 0.85992254 0.53302527 0.03854323]
[0.84521242 0.28999238 0.43269991 0.63489068 0.37950397]
[0.36523548 0.3142058 0.78013427 0.9078802 0.56435694]]
[[ 1.03521258 -0.76082373 0.82305377 -0.53192129 0.43914877]
[-1.15339486 0.35871618 1.3517111 2.46412224 1.29976723]
[-0.12778346 -0.61315538 0.53263178 1.44915474 0.20682747]
[ 0.0195015 -1.3908235 0.31817992 0.52484251 -1.13534305]
[-0.79089533 1.04359843 0.36620065 0.34072973 -0.84415667]]
3
[[1 1 4 3 3]
[4 2 3 2 4]
[4 4 3 4 4]
[3 3 1 4 3]
[2 1 4 2 3]]

Array Methods
In [4]: data = [[1,4,3,5],[5,5,7,9]]
arr = np.array(data)

In [5]: arr

Out[5]: array([[1, 4, 3, 5],


[5, 5, 7, 9]])

In [6]: print(arr.dtype)
print(arr.shape)

int32
(2, 4)

In [7]: a = np.random.rand(100)
b = np.random.randn(10)

In [8]: a
Out[8]: array([0.0667193 , 0.24996791, 0.18991164, 0.68245125, 0.69585948,
0.38049153, 0.38155562, 0.9862294 , 0.69594282, 0.77594545,
0.630243 , 0.79207447, 0.40428319, 0.21133484, 0.86509552,
0.50544835, 0.0150724 , 0.62275523, 0.98706215, 0.06596928,
0.08103634, 0.69451973, 0.2302759 , 0.96136337, 0.89402532,
0.63739668, 0.63607801, 0.84750941, 0.01950963, 0.62206543,
0.81532596, 0.07627552, 0.45184363, 0.19838202, 0.52181347,
0.82862354, 0.93121004, 0.23603411, 0.64326166, 0.54478935,
0.18237217, 0.6095559 , 0.76317213, 0.40650933, 0.93058601,
0.02814542, 0.11439223, 0.55123581, 0.68336245, 0.42704194,
0.89916515, 0.71809763, 0.94386994, 0.4633378 , 0.03994971,
0.87034481, 0.85223003, 0.2285538 , 0.98959497, 0.56410605,
0.07144686, 0.12718115, 0.02560205, 0.77798617, 0.0258084 ,
0.12678993, 0.21281529, 0.12848544, 0.34730102, 0.09806135,
0.46669377, 0.28006002, 0.02748664, 0.03385199, 0.27847907,
0.49061412, 0.02777332, 0.81850325, 0.31134813, 0.87079945,
0.92933361, 0.45399394, 0.58539194, 0.0412505 , 0.57143901,
0.36690098, 0.93040279, 0.35587989, 0.49084104, 0.59497039,
0.5924753 , 0.40145439, 0.35390004, 0.51806492, 0.06584004,
0.54030551, 0.62285484, 0.42619897, 0.41632971, 0.7137144 ])

In [9]: b

Out[9]: array([-1.47429956, 1.13147032, 1.23224329, -0.05600973, 1.22352228,


0.60733283, 1.32561277, -0.6887697 , -1.0138324 , 0.23230032])

In [10]: a.min()

Out[10]: 0.015072397162660622

In [11]: a.max()

Out[11]: 0.989594972948553

In [12]: a.mean()

Out[12]: 0.4786000487777759

In [13]: b.min(),b.max()

Out[13]: (-1.4742995574503355, 1.3256127718969688)

In [14]: b.mean()

Out[14]: 0.25195704290761534

In [15]: c = np.random.rand(4)
c

Out[15]: array([0.10000233, 0.8027446 , 0.69281857, 0.46018424])

In [16]: c.argmin()

Out[16]: 0

In [17]: c.argmax()
Out[17]: 1

In [18]: d = c.reshape(2,2)
print(d.shape)
d

(2, 2)
Out[18]: array([[0.10000233, 0.8027446 ],
[0.69281857, 0.46018424]])

In [19]: c.reshape(4)

Out[19]: array([0.10000233, 0.8027446 , 0.69281857, 0.46018424])

In [20]: c.reshape(2,2)

Out[20]: array([[0.10000233, 0.8027446 ],


[0.69281857, 0.46018424]])

In [21]: c.flatten()

Out[21]: array([0.10000233, 0.8027446 , 0.69281857, 0.46018424])

In [22]: e = np.array([0,1])

In [23]: e

Out[23]: array([0, 1])

In [24]: f = np.array([0.])
f

Out[24]: array([0.])

In [25]: e.dtype

Out[25]: dtype('int32')

In [26]: f.dtype

Out[26]: dtype('float64')

Indexing and broadcasting


1d
In [27]: a = np.arange(5)

In [28]: a

Out[28]: array([0, 1, 2, 3, 4])

In [29]: a[:]
Out[29]: array([0, 1, 2, 3, 4])

In [30]: a[2]

Out[30]: 2

In [31]: a[2:4]

Out[31]: array([2, 3])

In [32]: a[-1]

Out[32]: 4

In [33]: a[2:]

Out[33]: array([2, 3, 4])

In [34]: a[:2]

Out[34]: array([0, 1])

In [35]: a[:2]=10

In [36]: a

Out[36]: array([10, 10, 2, 3, 4])

In [37]: a = np.arange(10)

In [38]: a

Out[38]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [39]: b = a[2:7]

In [40]: b

Out[40]: array([2, 3, 4, 5, 6])

In [41]: b[2:5]=10

In [42]: b

Out[42]: array([ 2, 3, 10, 10, 10])

In [43]: a

Out[43]: array([ 0, 1, 2, 3, 10, 10, 10, 7, 8, 9])

In [44]: c = a.copy()

In [45]: c
Out[45]: array([ 0, 1, 2, 3, 10, 10, 10, 7, 8, 9])

In [46]: c[4:7]=0

In [47]: c

Out[47]: array([0, 1, 2, 3, 0, 0, 0, 7, 8, 9])

2d
In [48]: a = np.random.rand(5,5)

In [49]: a

Out[49]: array([[4.13194693e-01, 3.42367648e-01, 8.23871339e-01, 1.62693171e-01,


3.33730761e-01],
[8.94658016e-01, 4.22719533e-01, 1.86298826e-01, 5.21480828e-01,
1.21599174e-02],
[7.67741406e-06, 7.04318562e-01, 1.82695434e-01, 7.82246802e-01,
9.06395802e-01],
[9.42145059e-01, 8.85820142e-01, 9.61163006e-01, 1.46818575e-01,
7.73911990e-01],
[4.82852952e-01, 1.27025177e-01, 1.22871447e-01, 4.62138706e-01,
3.34508235e-01]])

In [50]: a[0]

Out[50]: array([0.41319469, 0.34236765, 0.82387134, 0.16269317, 0.33373076])

In [51]: a[:,0]

Out[51]: array([4.13194693e-01, 8.94658016e-01, 7.67741406e-06, 9.42145059e-01,


4.82852952e-01])

In [52]: a[1:4,1:4]=10

In [53]: a

Out[53]: array([[4.13194693e-01, 3.42367648e-01, 8.23871339e-01, 1.62693171e-01,


3.33730761e-01],
[8.94658016e-01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
1.21599174e-02],
[7.67741406e-06, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
9.06395802e-01],
[9.42145059e-01, 1.00000000e+01, 1.00000000e+01, 1.00000000e+01,
7.73911990e-01],
[4.82852952e-01, 1.27025177e-01, 1.22871447e-01, 4.62138706e-01,
3.34508235e-01]])

logical operator to filter


In [54]: a = np.random.randn(5,5)

In [55]: a
Out[55]: array([[-0.52943346, -1.3563547 , -0.24950645, 0.36018632, 1.2761073 ],
[ 1.79306115, -0.40990395, -0.96592178, -1.05521176, 1.35609131],
[-1.22044469, 0.67669323, -1.79852855, -1.97324236, -1.08481568],
[-0.39202164, 0.46372706, -0.56059791, 0.6952029 , 0.47310958],
[-1.23361362, -1.61172041, -0.27054277, -0.83542174, 1.12427138]])

In [56]: a>0

Out[56]: array([[False, False, False, True, True],


[ True, False, False, False, True],
[False, True, False, False, False],
[False, True, False, True, True],
[False, False, False, False, True]])

In [57]: a[a>0]=10
a

Out[57]: array([[-0.52943346, -1.3563547 , -0.24950645, 10. , 10. ],


[10. , -0.40990395, -0.96592178, -1.05521176, 10. ],
[-1.22044469, 10. , -1.79852855, -1.97324236, -1.08481568],
[-0.39202164, 10. , -0.56059791, 10. , 10. ],
[-1.23361362, -1.61172041, -0.27054277, -0.83542174, 10. ]])

Operator
In [58]: a = np.array([0,2,4,6,8])
b = np.array([1,3,5,7,9])
c=a+b
c

Out[58]: array([ 1, 5, 9, 13, 17])

In [59]: d = b - 1
d

Out[59]: array([0, 2, 4, 6, 8])

In [60]: c = d==a
c

Out[60]: array([ True, True, True, True, True])

In [61]: c.any()

Out[61]: True

In [62]: d = np.array([0,5,6,9,9])

In [63]: c = d==a
c

Out[63]: array([ True, False, False, False, False])

In [64]: c.any()
Out[64]: True

In [65]: c.all()

Out[65]: False

functions
In [66]: a = np.arange(0,1,0.1)

In [67]: a

Out[67]: array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])

In [68]: np.square(a)

Out[68]: array([0. , 0.01, 0.04, 0.09, 0.16, 0.25, 0.36, 0.49, 0.64, 0.81])

In [69]: np.sin(a)

Out[69]: array([0. , 0.09983342, 0.19866933, 0.29552021, 0.38941834,


0.47942554, 0.56464247, 0.64421769, 0.71735609, 0.78332691])

In [70]: np.exp(a)

Out[70]: array([1. , 1.10517092, 1.22140276, 1.34985881, 1.4918247 ,


1.64872127, 1.8221188 , 2.01375271, 2.22554093, 2.45960311])

In [71]: A = np.array([[1, 2], [3, 4]])


B = np.array([[5, 6], [7, 8]])

C = np.dot(A, B)

print(C)

[[19 22]
[43 50]]

In [72]: # Define a matrix


A = np.array([[1, 2], [3, 4]])

# Compute eigenvalues and eigenvectors


eigenvalues, eigenvectors = np.linalg.eig(A)

print("Eigenvalues:")
print(eigenvalues)

print("\nEigenvectors:")
print(eigenvectors)

Eigenvalues:
[-0.37228132 5.37228132]

Eigenvectors:
[[-0.82456484 -0.41597356]
[ 0.56576746 -0.90937671]]
In [73]: # Define a matrix
A = np.array([[1, 2, 3], [4, 5, 6]])

# Compute SVD
U, S, V = np.linalg.svd(A)

print("U:")
print(U)

print("\nS:")
print(S)

print("\nV:")
print(V)

U:
[[-0.3863177 0.92236578]
[-0.92236578 -0.3863177 ]]

S:
[9.508032 0.77286964]

V:
[[-0.42866713 -0.56630692 -0.7039467 ]
[-0.80596391 -0.11238241 0.58119908]
[ 0.40824829 -0.81649658 0.40824829]]

Pandas
In [74]: import pandas as pd

Series
In [75]: pd.Series([6,2,3])

Out[75]: 0 6
1 2
2 3
dtype: int64

In [76]: pd.Series(data=[6,2,3],index=['abc','def','ghi'])

Out[76]: abc 6
def 2
ghi 3
dtype: int64

In [77]: a = pd.Series(data=[6,2,3],index=['abc','def','ghi'])
a['abc']

Out[77]: 6

In [78]: pd.Series([6,2,3],['abc','def','ghi'])
Out[78]: abc 6
def 2
ghi 3
dtype: int64

In [79]: pd.Series(np.array([6,2,3]),['abc','def','ghi'])

Out[79]: abc 6
def 2
ghi 3
dtype: int32

In [80]: dict = {'abc':6,'def':2,'ghi':3}


dict

Out[80]: {'abc': 6, 'def': 2, 'ghi': 3}

In [81]: pd.Series(dict)

Out[81]: abc 6
def 2
ghi 3
dtype: int64

In [82]: a = [2,3,5]

In [83]: b = pd.Series([sum,min,max])

In [84]: b

Out[84]: 0 <built-in function sum>


1 <built-in function min>
2 <built-in function max>
dtype: object

In [85]: b[0](a)

Out[85]: 10

In [86]: b[1](a)

Out[86]: 2

In [87]: b[2](a)

Out[87]: 5

In [88]: a = {'a':5,'b':3,'c':10,'d':20}
b = {'a':2,'b':3,'c':10}

In [89]: pd.Series(a)+pd.Series(b)

Out[89]: a 7.0
b 6.0
c 20.0
d NaN
dtype: float64
Dataframe
In [90]: pd.DataFrame(data=[[1,2,3],[4,5,6],[7,8,9]])

Out[90]: 0 1 2

0 1 2 3

1 4 5 6

2 7 8 9

In [91]: pd.DataFrame(data=[[1,2,3],[4,5,6],[7,8,9]],columns=['y1','y2','y3'])

Out[91]: y1 y2 y3

0 1 2 3

1 4 5 6

2 7 8 9

In [92]: pd.DataFrame(data=[[1,2,3],[4,5,6],[7,8,9]],index=['x1','x2','x3'])

Out[92]: 0 1 2

x1 1 2 3

x2 4 5 6

x3 7 8 9

In [93]: pd.DataFrame(data=[[1,2,3],[4,5,6],[7,8,9]],index=['x1','x2','x3'],columns=['y1'

Out[93]: y1 y2 y3

x1 1 2 3

x2 4 5 6

x3 7 8 9

In [94]: a = pd.DataFrame([[1,2,3],[4,5,6],[7,8,9]],['x1','x2','x3'],['y1','y2','y3'])
a

Out[94]: y1 y2 y3

x1 1 2 3

x2 4 5 6

x3 7 8 9

Indexing
In [95]: a['y1']

Out[95]: x1 1
x2 4
x3 7
Name: y1, dtype: int64

In [96]: a.y1

Out[96]: x1 1
x2 4
x3 7
Name: y1, dtype: int64

In [97]: a.loc['x1']

Out[97]: y1 1
y2 2
y3 3
Name: x1, dtype: int64

In [98]: a.iloc[0]

Out[98]: y1 1
y2 2
y3 3
Name: x1, dtype: int64

In [99]: a.loc['x1','y1']

Out[99]: 1

In [100… a.iloc[0,0]

Out[100… 1

In [101… a.loc[['x1','x3'],['y1','y3']]

Out[101… y1 y3

x1 1 3

x3 7 9

In [102… a.iloc[[0,1],[0,1]]

Out[102… y1 y2

x1 1 2

x2 4 5

In [103… a
Out[103… y1 y2 y3

x1 1 2 3

x2 4 5 6

x3 7 8 9

In [104… a['sum']=a['y1']+a['y2']+a['y3']
a

Out[104… y1 y2 y3 sum

x1 1 2 3 6

x2 4 5 6 15

x3 7 8 9 24

In [105… a.drop('y3',axis=1)

Out[105… y1 y2 sum

x1 1 2 6

x2 4 5 15

x3 7 8 24

In [106… a.drop('x2',axis=0)

Out[106… y1 y2 y3 sum

x1 1 2 3 6

x3 7 8 9 24

In [107… a>5

Out[107… y1 y2 y3 sum

x1 False False False True

x2 False False True True

x3 True True True True

In [108… a[a>5]

Out[108… y1 y2 y3 sum

x1 NaN NaN NaN 6

x2 NaN NaN 6.0 15

x3 7.0 8.0 9.0 24


In [109… a[a['y2']>5]

Out[109… y1 y2 y3 sum

x3 7 8 9 24

In [110… a[(a['y2']>2) & (a['y1']<5) ]

Out[110… y1 y2 y3 sum

x2 4 5 6 15

In [111… a

Out[111… y1 y2 y3 sum

x1 1 2 3 6

x2 4 5 6 15

x3 7 8 9 24

In [112… a.reset_index()

Out[112… index y1 y2 y3 sum

0 x1 1 2 3 6

1 x2 4 5 6 15

2 x3 7 8 9 24

In [113… a.set_index('sum')

Out[113… y1 y2 y3

sum

6 1 2 3

15 4 5 6

24 7 8 9

In [114… a

Out[114… y1 y2 y3 sum

x1 1 2 3 6

x2 4 5 6 15

x3 7 8 9 24

In [115… b = a.set_index('sum')
In [116… b

Out[116… y1 y2 y3

sum

6 1 2 3

15 4 5 6

24 7 8 9

In [117… a.set_index('sum',inplace=True)

In [118… a

Out[118… y1 y2 y3

sum

6 1 2 3

15 4 5 6

24 7 8 9

Multi Index
In [119… first = [1,1,1,1,2,2,2,2]
second = [1,1,2,2,1,1,2,2]
third = [1,2,1,2,1,2,1,2]
ind = list(zip(first,second,third))
ind = pd.MultiIndex.from_tuples(ind)

In [120… ind

Out[120… MultiIndex([(1, 1, 1),


(1, 1, 2),
(1, 2, 1),
(1, 2, 2),
(2, 1, 1),
(2, 1, 2),
(2, 2, 1),
(2, 2, 2)],
)

In [121… a = pd.DataFrame(np.random.rand(8,3),index=ind,columns=['x1','x2','x3'])
a
Out[121… x1 x2 x3

1 1 1 0.992617 0.295806 0.763376

2 0.890585 0.787643 0.718873

2 1 0.713340 0.347137 0.122599

2 0.814235 0.865838 0.960321

2 1 1 0.403557 0.123391 0.747101

2 0.371121 0.177547 0.305646

2 1 0.277509 0.134885 0.201760

2 0.563267 0.223719 0.629745

In [122… a.loc[1]

Out[122… x1 x2 x3

1 1 0.992617 0.295806 0.763376

2 0.890585 0.787643 0.718873

2 1 0.713340 0.347137 0.122599

2 0.814235 0.865838 0.960321

In [123… a.loc[1].loc[2]

Out[123… x1 x2 x3

1 0.713340 0.347137 0.122599

2 0.814235 0.865838 0.960321

In [124… a.loc[1].loc[2].loc[1]

Out[124… x1 0.713340
x2 0.347137
x3 0.122599
Name: 1, dtype: float64

In [125… a.index.names = ['first','second','third']

In [126… a
Out[126… x1 x2 x3

first second third

1 1 1 0.992617 0.295806 0.763376

2 0.890585 0.787643 0.718873

2 1 0.713340 0.347137 0.122599

2 0.814235 0.865838 0.960321

2 1 1 0.403557 0.123391 0.747101

2 0.371121 0.177547 0.305646

2 1 0.277509 0.134885 0.201760

2 0.563267 0.223719 0.629745

Cleaning of data
In [127… a = pd.DataFrame(np.random.randn(5,5),'x1 x2 x3 x4 x5'.split(),'y1 y2 y3 y4 y5'.
a

Out[127… y1 y2 y3 y4 y5

x1 -1.448586 0.422869 -0.358784 -0.484319 -1.365999

x2 -0.887149 -0.649741 0.471689 -0.590671 -1.660827

x3 -0.320703 1.588749 -1.769257 0.451186 0.757541

x4 -0.466107 1.290884 -0.651038 -1.214620 0.412459

x5 0.176581 -0.009012 -0.421195 1.162797 -0.587634

In [128… c = a[a>-1]
c

Out[128… y1 y2 y3 y4 y5

x1 NaN 0.422869 -0.358784 -0.484319 NaN

x2 -0.887149 -0.649741 0.471689 -0.590671 NaN

x3 -0.320703 1.588749 NaN 0.451186 0.757541

x4 -0.466107 1.290884 -0.651038 NaN 0.412459

x5 0.176581 -0.009012 -0.421195 1.162797 -0.587634

In [129… c.dropna()

Out[129… y1 y2 y3 y4 y5

x5 0.176581 -0.009012 -0.421195 1.162797 -0.587634


In [130… b = pd.DataFrame(np.random.randn(5,5),'x1 x2 x3 x4 x5'.split(),'y1 y2 y3 y4 y5'.
b

Out[130… y1 y2 y3 y4 y5

x1 0.117845 -0.191077 0.128738 -1.182221 -1.606983

x2 0.206689 -0.249197 -1.343354 0.464192 -2.247734

x3 1.630035 -0.060636 0.946045 -0.865498 -0.389274

x4 -0.593982 1.038098 0.387769 0.077844 0.073295

x5 0.919375 1.185456 1.437039 -0.577709 -0.427376

In [131… b.iloc[1,4]=0
b.iloc[3,3]=0
b

Out[131… y1 y2 y3 y4 y5

x1 0.117845 -0.191077 0.128738 -1.182221 -1.606983

x2 0.206689 -0.249197 -1.343354 0.464192 0.000000

x3 1.630035 -0.060636 0.946045 -0.865498 -0.389274

x4 -0.593982 1.038098 0.387769 0.000000 0.073295

x5 0.919375 1.185456 1.437039 -0.577709 -0.427376

In [132… c = a/b
c

Out[132… y1 y2 y3 y4 y5

x1 -12.292283 -2.213085 -2.786929 0.409668 0.850040

x2 -4.292194 2.607333 -0.351128 -1.272471 -inf

x3 -0.196746 -26.201459 -1.870162 -0.521303 -1.946037

x4 0.784716 1.243509 -1.678931 -inf 5.627419

x5 0.192067 -0.007602 -0.293099 -2.012773 1.374982

In [133… c.replace([np.inf,-np.inf],np.nan,inplace=True)
c
Out[133… y1 y2 y3 y4 y5

x1 -12.292283 -2.213085 -2.786929 0.409668 0.850040

x2 -4.292194 2.607333 -0.351128 -1.272471 NaN

x3 -0.196746 -26.201459 -1.870162 -0.521303 -1.946037

x4 0.784716 1.243509 -1.678931 NaN 5.627419

x5 0.192067 -0.007602 -0.293099 -2.012773 1.374982

In [134… c.dropna()

Out[134… y1 y2 y3 y4 y5

x1 -12.292283 -2.213085 -2.786929 0.409668 0.850040

x3 -0.196746 -26.201459 -1.870162 -0.521303 -1.946037

x5 0.192067 -0.007602 -0.293099 -2.012773 1.374982

In [135… c = a/b
c

Out[135… y1 y2 y3 y4 y5

x1 -12.292283 -2.213085 -2.786929 0.409668 0.850040

x2 -4.292194 2.607333 -0.351128 -1.272471 -inf

x3 -0.196746 -26.201459 -1.870162 -0.521303 -1.946037

x4 0.784716 1.243509 -1.678931 -inf 5.627419

x5 0.192067 -0.007602 -0.293099 -2.012773 1.374982

In [136… c.replace([np.inf,-np.inf],np.nan).dropna()

Out[136… y1 y2 y3 y4 y5

x1 -12.292283 -2.213085 -2.786929 0.409668 0.850040

x3 -0.196746 -26.201459 -1.870162 -0.521303 -1.946037

x5 0.192067 -0.007602 -0.293099 -2.012773 1.374982

In [137… c
Out[137… y1 y2 y3 y4 y5

x1 -12.292283 -2.213085 -2.786929 0.409668 0.850040

x2 -4.292194 2.607333 -0.351128 -1.272471 -inf

x3 -0.196746 -26.201459 -1.870162 -0.521303 -1.946037

x4 0.784716 1.243509 -1.678931 -inf 5.627419

x5 0.192067 -0.007602 -0.293099 -2.012773 1.374982

In [138… c = a/b
c.replace([np.inf,-np.inf],np.nan,inplace=True)

In [139… c

Out[139… y1 y2 y3 y4 y5

x1 -12.292283 -2.213085 -2.786929 0.409668 0.850040

x2 -4.292194 2.607333 -0.351128 -1.272471 NaN

x3 -0.196746 -26.201459 -1.870162 -0.521303 -1.946037

x4 0.784716 1.243509 -1.678931 NaN 5.627419

x5 0.192067 -0.007602 -0.293099 -2.012773 1.374982

In [140… c.dropna(axis=1)

Out[140… y1 y2 y3

x1 -12.292283 -2.213085 -2.786929

x2 -4.292194 2.607333 -0.351128

x3 -0.196746 -26.201459 -1.870162

x4 0.784716 1.243509 -1.678931

x5 0.192067 -0.007602 -0.293099

In [141… c.fillna(c.mean())

Out[141… y1 y2 y3 y4 y5

x1 -12.292283 -2.213085 -2.786929 0.409668 0.850040

x2 -4.292194 2.607333 -0.351128 -1.272471 1.476601

x3 -0.196746 -26.201459 -1.870162 -0.521303 -1.946037

x4 0.784716 1.243509 -1.678931 -0.849219 5.627419

x5 0.192067 -0.007602 -0.293099 -2.012773 1.374982

Data loading and Inspection


In [142… a = pd.read_csv("STUDENT_DATA.csv")

In [143… a

Out[143… student subject grade marks Unnamed: 4

0 0 STD1 SUB1 AA 10

1 1 STD1 SUB2 AB 9

2 2 STD1 SUB3 BB 8

3 3 STD2 SUB1 BB 8

4 4 STD2 SUB2 AA 10

5 5 STD2 SUB3 AA 10

6 6 STD3 SUB1 BB 8

7 7 STD3 SUB2 AB 9

8 8 STD3 SUB3 AA 10

In [144… a.to_csv('demo',index=False)

In [145… b = a.groupby('student')

In [146… b

Out[146… <pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000015207A8A190>

In [147… b.describe()

Out[147… Unnamed: 4

count mean std min 25% 50% 75% max

student

0 1.0 10.0 NaN 10.0 10.0 10.0 10.0 10.0

1 1.0 9.0 NaN 9.0 9.0 9.0 9.0 9.0

2 1.0 8.0 NaN 8.0 8.0 8.0 8.0 8.0

3 1.0 8.0 NaN 8.0 8.0 8.0 8.0 8.0

4 1.0 10.0 NaN 10.0 10.0 10.0 10.0 10.0

5 1.0 10.0 NaN 10.0 10.0 10.0 10.0 10.0

6 1.0 8.0 NaN 8.0 8.0 8.0 8.0 8.0

7 1.0 9.0 NaN 9.0 9.0 9.0 9.0 9.0

8 1.0 10.0 NaN 10.0 10.0 10.0 10.0 10.0

In [148… a.groupby('subject').describe()
Out[148… student

count mean std min 25% 50% 75% max count mean std mi

subject

STD1 3.0 1.0 1.0 0.0 0.5 1.0 1.5 2.0 3.0 9.000000 1.000000 8.

STD2 3.0 4.0 1.0 3.0 3.5 4.0 4.5 5.0 3.0 9.333333 1.154701 8.

STD3 3.0 7.0 1.0 6.0 6.5 7.0 7.5 8.0 3.0 9.000000 1.000000 8.

In [149… a.groupby('grade').describe()

Out[149… student

count mean std min 25% 50% 75% max count mean std min

grade

SUB1 3.0 3.0 3.0 0.0 1.5 3.0 4.5 6.0 3.0 8.666667 1.154701 8.0

SUB2 3.0 4.0 3.0 1.0 2.5 4.0 5.5 7.0 3.0 9.333333 0.577350 9.0

SUB3 3.0 5.0 3.0 2.0 3.5 5.0 6.5 8.0 3.0 9.333333 1.154701 8.0

In [150… a = pd.DataFrame(np.random.randint(1,4,[3,3]))
a

Out[150… 0 1 2

0 3 2 1

1 2 3 2

2 3 2 2

In [151… b = pd.DataFrame(np.random.randint(3,6,[3,3]))
b

Out[151… 0 1 2

0 4 4 4

1 3 5 4

2 3 5 4

In [152… c = pd.DataFrame(np.random.randint(5,8,[3,3]))
c
Out[152… 0 1 2

0 5 7 7

1 6 6 5

2 7 5 5

In [153… d = pd.concat([a,b,c])
d

Out[153… 0 1 2

0 3 2 1

1 2 3 2

2 3 2 2

0 4 4 4

1 3 5 4

2 3 5 4

0 5 7 7

1 6 6 5

2 7 5 5

In [154… e = pd.concat([a,b,c],axis=1)
e

Out[154… 0 1 2 0 1 2 0 1 2

0 3 2 1 4 4 4 5 7 7

1 2 3 2 3 5 4 6 6 5

2 3 2 2 3 5 4 7 5 5

In [155… a = pd.read_csv("STUDENT_DATA.csv")
a
Out[155… student subject grade marks Unnamed: 4

0 0 STD1 SUB1 AA 10

1 1 STD1 SUB2 AB 9

2 2 STD1 SUB3 BB 8

3 3 STD2 SUB1 BB 8

4 4 STD2 SUB2 AA 10

5 5 STD2 SUB3 AA 10

6 6 STD3 SUB1 BB 8

7 7 STD3 SUB2 AB 9

8 8 STD3 SUB3 AA 10

In [156… b = a.set_index('student')
b

Out[156… subject grade marks Unnamed: 4

student

0 STD1 SUB1 AA 10

1 STD1 SUB2 AB 9

2 STD1 SUB3 BB 8

3 STD2 SUB1 BB 8

4 STD2 SUB2 AA 10

5 STD2 SUB3 AA 10

6 STD3 SUB1 BB 8

7 STD3 SUB2 AB 9

8 STD3 SUB3 AA 10

In [157… a = pd.DataFrame({'STD1':['AA','AB','BB']},index=['SUB1','SUB2','SUB3'])
a

Out[157… STD1

SUB1 AA

SUB2 AB

SUB3 BB

In [158… b = pd.DataFrame({'STD2':['BB','AA','AA']},index=['SUB1','SUB2','SUB3'])
b
Out[158… STD2

SUB1 BB

SUB2 AA

SUB3 AA

In [159… a.join(b)

Out[159… STD1 STD2

SUB1 AA BB

SUB2 AB AA

SUB3 BB AA

In [160… import matplotlib.pyplot as plt


%matplotlib inline

In [161… x = np.linspace(0,15,100)
y = np.sin(x)

In [162… plt.plot(x,y,'b')
plt.xlabel('time')
plt.ylabel('amplitude')
plt.title('Sine Wave')
plt.show()
In [163… z = np.cos(x)

In [164… plt.subplot(2,1,1)
plt.plot(x,y,'--b')
plt.ylabel('amplitude')
plt.title('Sine Wave')
plt.subplot(2,1,2)
plt.plot(x,y,'-*r')
plt.xlabel('time')
plt.ylabel('amplitude')
plt.title('Cos Wave')
plt.show()

In [165… fig = plt.figure()

ax = fig.add_axes([0,0,1,1])

ax.plot(x, x**2, label="x**2")


ax.plot(x, x**3, label="x**3")
ax.legend()

Out[165… <matplotlib.legend.Legend at 0x15208ff9290>


In [166… data = np.random.normal(0, 1, 1000)

In [167… plt.hist(data)

Out[167… (array([ 3., 13., 50., 121., 211., 270., 182., 105., 36., 9.]),
array([-3.54012283, -2.88125418, -2.22238553, -1.56351688, -0.90464823,
-0.24577958, 0.41308907, 1.07195772, 1.73082638, 2.38969503,
3.04856368]),
<BarContainer object of 10 artists>)
In [168… x = np.linspace(0,5,100)
fig, axes = plt.subplots(1, 2, figsize=(10,4))

axes[0].plot(x, x**2, x, np.exp(x))


axes[0].set_title("Normal scale")

axes[1].plot(x, x**2, x, np.exp(x))


axes[1].set_yscale("log")
axes[1].set_title("Logarithmic scale (y)");

In [169… fig, ax1 = plt.subplots()

ax1.plot(x, x**2, lw=2, color="blue")


ax1.set_ylabel(r"area $(m^2)$", fontsize=18, color="blue")
for label in ax1.get_yticklabels():
label.set_color("blue")

ax2 = ax1.twinx()
ax2.plot(x, np.exp(x), lw=2, color="red")
ax2.set_ylabel(r"volume $(m^3)$", fontsize=18, color="red")
for label in ax2.get_yticklabels():
label.set_color("red")

In [170… data = {'x': [1, 2, 3, 4, 5], 'y': [2, 4, 6, 8, 10]}


df = pd.DataFrame(data)

# Plotting a line plot


df.plot(x='x', y='y')
plt.show()
In [171… !pip install seaborn

Requirement already satisfied: seaborn in c:\users\aksha\anaconda3\envs\jinhal\li


b\site-packages (0.13.2)
Requirement already satisfied: numpy!=1.24.0,>=1.20 in c:\users\aksha\anaconda3\e
nvs\jinhal\lib\site-packages (from seaborn) (1.26.0)
Requirement already satisfied: pandas>=1.2 in c:\users\aksha\anaconda3\envs\jinha
l\lib\site-packages (from seaborn) (2.1.4)
Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in c:\users\aksha\anaconda
3\envs\jinhal\lib\site-packages (from seaborn) (3.8.0)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\aksha\anaconda3\envs
\jinhal\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\aksha\anaconda3\envs\jinh
al\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\aksha\anaconda3\envs
\jinhal\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (4.25.0)
Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\aksha\anaconda3\envs
\jinhal\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.4.4)
Requirement already satisfied: packaging>=20.0 in c:\users\aksha\anaconda3\envs\j
inhal\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (23.1)
Requirement already satisfied: pillow>=6.2.0 in c:\users\aksha\anaconda3\envs\jin
hal\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (10.0.1)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\aksha\anaconda3\envs
\jinhal\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (3.0.9)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\aksha\anaconda3\e
nvs\jinhal\lib\site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in c:\users\aksha\anaconda3\envs\jinh
al\lib\site-packages (from pandas>=1.2->seaborn) (2023.3.post1)
Requirement already satisfied: tzdata>=2022.1 in c:\users\aksha\anaconda3\envs\ji
nhal\lib\site-packages (from pandas>=1.2->seaborn) (2023.3)
Requirement already satisfied: six>=1.5 in c:\users\aksha\anaconda3\envs\jinhal\l
ib\site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.4->seaborn)
(1.16.0)

In [172… import seaborn as sns

In [173… tips = sns.load_dataset('tips')

In [174… tips.head()

Out[174… total_bill tip sex smoker day time size

0 16.99 1.01 Female No Sun Dinner 2

1 10.34 1.66 Male No Sun Dinner 3

2 21.01 3.50 Male No Sun Dinner 3

3 23.68 3.31 Male No Sun Dinner 2

4 24.59 3.61 Female No Sun Dinner 4

In [175… sns.distplot(tips['total_bill'])
C:\Users\aksha\AppData\Local\Temp\ipykernel_13848\4271412032.py:1: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn v0.14.0.

Please adapt your code to use either `displot` (a figure-level function with
similar flexibility) or `histplot` (an axes-level function for histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(tips['total_bill'])
Out[175… <Axes: xlabel='total_bill', ylabel='Density'>

In [176… sns.scatterplot(data=tips, x="total_bill", y="tip")

Out[176… <Axes: xlabel='total_bill', ylabel='tip'>


In [177… sns.jointplot(x='total_bill',y='tip',data=tips,kind='scatter')

Out[177… <seaborn.axisgrid.JointGrid at 0x1520da03750>


In [178… sns.jointplot(x='total_bill',y='tip',data=tips,kind='hex')

Out[178… <seaborn.axisgrid.JointGrid at 0x1520daa6d90>


In [179… sns.pairplot(tips)

Out[179… <seaborn.axisgrid.PairGrid at 0x1520d9bc410>


In [180… sns.pairplot(tips,hue='sex',palette='coolwarm')

Out[180… <seaborn.axisgrid.PairGrid at 0x1520ede6290>


In [181… sns.barplot(x='day',y='tip',hue='time',data=tips)

Out[181… <Axes: xlabel='day', ylabel='tip'>


In [182… sns.barplot(x='sex',y='tip',hue='day',data=tips)

Out[182… <Axes: xlabel='sex', ylabel='tip'>

In [183… sns.boxplot(x="day", y="total_bill",hue='time', data=tips)

Out[183… <Axes: xlabel='day', ylabel='total_bill'>


In [184… sns.violinplot(x="day", y="total_bill", hue='smoker',data=tips)

Out[184… <Axes: xlabel='day', ylabel='total_bill'>

You might also like