PANDAS intro 1
PANDAS intro 1
In [3]: a=[1,5,10]
x=pd.Series(a)
print(x)
0 1
1 5
2 10
dtype: int64
In [4]: x[1]
5
Out[4]:
In [5]: a=["Maths","Science","Chem"]
x=pd.Series(a)
print(x)
0 Maths
1 Science
2 Chem
dtype: object
In [6]: a=["Maths","Science","Chem"]
x=pd.Series(a,index=["x","y","z"])
print(x)
x Maths
y Science
z Chem
dtype: object
In [7]: x["y"]
'Science'
Out[7]:
In [8]: Calories={"Day1":420,"Day2":380,"Day3":390}
x=pd.Series(Calories)
print(x)
Day1 420
Day2 380
Day3 390
dtype: int64
In [9]: data={"Calories":[420,380,390],"Duration":[50,40,45]}
df=pd.DataFrame(data)
df
0 420 50
1 380 40
2 390 45
In [10]: df.loc[2]
Calories 390
Out[10]:
Duration 45
Name: 2, dtype: int64
In [11]: df.loc[[0,1]]
0 420 50
1 380 40
In [12]: data={"Calories":[420,380,390],"Duration":[50,40,45]}
x=pd.DataFrame(data,index=["Day1","Day2","Day3"])
x
Day1 420 50
Day2 380 40
Day3 390 45
In [13]: data={"Name":["Vishnu","Saanvi","Chetan","Phani","Ammu"],"Age":[2,5,10,12,13],"Branch":[
df=pd.DataFrame(data)
df
0 Vishnu 2 AI
1 Saanvi 5 ML
2 Chetan 10 CSE
3 Phani 12 ECE
4 Ammu 13 BKT
In [14]: df=pd.read_csv("grades.csv")
df
Out[14]: Last name "First name" "SSN" "Test1" "Test2" "Test3" "Test4" "Final" "Grade"
8 Airpump "Andrew" "223-45-6789" 49.0 1.0 90.0 100.0 83.0 "A" NaN
In [17]: df.head()
In [18]: df.head(20)
9 60 98 124 269.0
14 60 98 123 275.0
15 60 98 120 215.2
17 45 90 112 NaN
19 45 97 125 243.0
In [19]: df.tail()
In [20]: df.tail(20)
In [21]: df.to_string()
' Duration Pulse Maxpulse Calories\n0 60 110 130 409.1\n1
Out[21]:
60 117 145 479.0\n2 60 103 135 340.0\n3
45 109 175 282.4\n4 45 117 148 406.0\n5
60 102 127 300.0\n6 60 110 136 374.0\n7 45
104 134 253.3\n8 30 109 133 195.1\n9 60 9
8 124 269.0\n10 60 103 147 329.3\n11 60 100
120 250.7\n12 60 106 128 345.3\n13 60 104
132 379.3\n14 60 98 123 275.0\n15 60 98 120
215.2\n16 60 100 120 300.0\n17 45 90 112
NaN\n18 60 103 123 323.0\n19 45 97 125 243.
0\n20 60 108 131 364.2\n21 45 100 119 282.0\n2
2 60 130 101 300.0\n23 45 105 132 246.0\n24
60 102 126 334.5\n25 60 100 120 250.0\n26
60 92 118 241.0\n27 60 103 132 NaN\n28 60
100 132 280.0\n29 60 102 129 380.3\n30 60
92 115 243.0\n31 45 90 112 180.1\n32 60 101
124 299.0\n33 60 93 113 223.0\n34 60 107
136 361.0\n35 60 114 140 415.0\n36 60 102 127
300.0\n37 60 100 120 300.0\n38 60 100 120
300.0\n39 45 104 129 266.0\n40 45 90 112 180.
1\n41 60 98 126 286.0\n42 60 100 122 329.4\n4
3 60 111 138 400.0\n44 60 111 131 397.0\n45
60 99 119 273.0\n46 60 109 153 387.6\n47
45 111 136 300.0\n48 45 108 129 298.0\n49 60
111 139 397.6\n50 60 107 136 380.2\n51 80
123 146 643.1\n52 60 106 130 263.0\n53 60 118
151 486.0\n54 30 136 175 238.0\n55 60 121
146 450.7\n56 60 118 121 413.0\n57 45 115 14
4 305.0\n58 20 153 172 226.4\n59 45 123 152
321.0\n60 210 108 160 1376.0\n61 160 110 137 103
4.4\n62 160 109 135 853.0\n63 45 118 141 341.0
\n64 20 110 130 131.4\n65 180 90 130 800.4\n66
150 105 135 873.4\n67 150 107 130 816.0\n68
20 106 136 110.4\n69 300 108 143 1500.2\n70
150 97 129 1115.0\n71 60 109 153 387.6\n72 90
100 127 700.0\n73 150 97 127 953.2\n74 45 1
14 146 304.0\n75 90 98 125 563.2\n76 45 105
134 251.0\n77 45 110 141 300.0\n78 120 100
130 500.4\n79 270 100 131 1729.0\n80 30 159 182
319.2\n81 45 149 169 344.0\n82 30 103 139
151.1\n83 120 100 130 500.0\n84 45 100 120 225.
3\n85 30 151 170 300.0\n86 45 102 136 234.0\n8
7 120 100 157 1000.1\n88 45 129 103 242.0\n89
20 83 107 50.3\n90 180 101 127 600.1\n91
45 107 137 NaN\n92 30 90 107 105.3\n93 15
80 100 50.5\n94 20 150 171 127.4\n95 20
151 168 229.4\n96 30 95 128 128.2\n97 25 152
168 244.2\n98 30 109 131 188.2\n99 90 93
124 604.1\n100 20 95 112 77.7\n101 90 90 11
0 500.0\n102 90 90 100 500.0\n103 90 90 100
500.4\n104 30 92 108 92.7\n105 30 93 128 12
4.0\n106 180 90 120 800.3\n107 30 90 120 86.2
\n108 90 90 120 500.3\n109 210 137 184 1860.4\n11
0 60 102 124 325.2\n111 45 107 124 275.0\n112
15 124 139 124.2\n113 45 100 120 225.3\n114
60 108 131 367.6\n115 60 108 151 351.7\n116 60
116 141 443.0\n117 60 97 122 277.4\n118 60 10
5 125 NaN\n119 60 103 124 332.7\n120 30 112
137 193.9\n121 45 100 120 100.7\n122 60 119
169 336.7\n123 60 107 127 344.9\n124 60 111 151
368.5\n125 60 98 122 271.0\n126 60 97 124
275.3\n127 60 109 127 382.0\n128 90 99 125 466.
4\n129 60 114 151 384.0\n130 60 104 134 342.5\n1
31 60 107 138 357.5\n132 60 103 133 335.0\n133
60 106 132 327.5\n134 60 103 136 339.0\n135
20 136 156 189.0\n136 45 117 143 317.7\n137 45
115 137 318.0\n138 45 113 138 308.0\n139 20
141 162 222.4\n140 60 108 135 390.0\n141 60 97
127 NaN\n142 45 100 120 250.4\n143 45 122
149 335.4\n144 60 136 170 470.2\n145 45 106 12
6 270.8\n146 60 107 136 400.0\n147 60 112 146
361.9\n148 30 103 127 185.0\n149 60 110 150 40
9.4\n150 60 106 134 343.0\n151 60 109 129 353.2
\n152 60 109 138 374.0\n153 30 150 167 275.8\n15
4 60 105 128 328.0\n155 60 111 151 368.5\n156
60 97 131 270.4\n157 60 100 120 270.4\n158
60 114 150 382.8\n159 30 80 120 240.9\n160 30
85 120 250.4\n161 45 90 130 260.4\n162 45 9
5 130 270.0\n163 45 100 140 280.9\n164 60 105
140 290.8\n165 60 110 145 300.0\n166 60 115
145 310.2\n167 75 120 150 320.4\n168 75 125 150
330.4'
In [22]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 169 entries, 0 to 168
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Duration 169 non-null int64
1 Pulse 169 non-null int64
2 Maxpulse 169 non-null int64
3 Calories 164 non-null float64
dtypes: float64(1), int64(3)
memory usage: 5.4 KB
In [23]: df.sample()
In [24]: df.sample(6)
In [25]: df.columns
In [26]: df.shape
(169, 4)
Out[26]:
In [27]: df.describe()
In [28]: df.describe().T
In [29]: df.isnull()
In [30]: df.isnull().sum()
Duration 0
Out[30]:
Pulse 0
Maxpulse 0
Calories 5
dtype: int64
In [31]: df.isnull().mean()*100
Duration 0.00000
Out[31]:
Pulse 0.00000
Maxpulse 0.00000
Calories 2.95858
dtype: float64
In [32]: df.duplicated()
0 False
Out[32]:
1 False
2 False
3 False
4 False
...
164 False
165 False
166 False
167 False
168 False
Length: 169, dtype: bool
In [33]: df.duplicated().sum()
7
Out[33]:
In [34]: df.drop_duplicates()
In [35]: df.duplicated().sum()
7
Out[35]:
In [36]: df.drop_duplicates(inplace=True)
In [37]: df.duplicated().sum()
0
Out[37]:
In [38]: df.isnull().sum()
Duration 0
Out[38]:
Pulse 0
Maxpulse 0
Calories 5
dtype: int64
In [39]: df.columns
In [40]: df1=df.rename(columns={"Pulse":"New_Pulse"})
In [41]: df1
In [42]: df.columns
Index(['Duration', 'Pulse', 'Maxpulse', 'Calories'], dtype='object')
Out[42]:
In [43]: df.fillna("#",inplace=True)
In [44]: df["Duration"].fillna("#",inplace=True)
In [45]: df.tail(10)
In [46]: df.ffill()
In [47]: x=df["Duration"].mean()
In [48]: x
64.19753086419753
Out[48]:
In [49]: df["Duration"].fillna(x,inplace=True)
In [50]: df.loc[1:4]
In [51]: df.loc[160:168]
In [52]: #valuecount
In [53]: df.columns
Index(['Duration', 'Pulse', 'Maxpulse', 'Calories'], dtype='object')
Out[53]:
In [54]: df.value_counts("Duration")
Duration
Out[54]:
60 74
45 33
30 16
20 9
90 8
150 4
120 3
180 3
15 2
75 2
160 2
210 2
25 1
80 1
270 1
300 1
dtype: int64
In [55]: df1=pd.read_csv("grades.csv")
df1
Out[55]: Last name "First name" "SSN" "Test1" "Test2" "Test3" "Test4" "Final" "Grade"
8 Airpump "Andrew" "223-45-6789" 49.0 1.0 90.0 100.0 83.0 "A" NaN
16
Out[57]:
In [58]: df1.shape
(16, 9)
Out[58]:
In [59]: #Final
column=df1[' "Final"']
max_value=column.max()
max_value
' "A"'
Out[59]:
In [60]: df1.columns
Index(['Last name', ' "First name"', ' "SSN"', ' "Test1"', ' "Test2"',
Out[60]:
' "Test3"', ' "Test4"', ' "Final"', ' "Grade"'],
dtype='object')
column=df1[' "Test4"']
In [61]:
max_value=column.max()
max_value
97.0
Out[61]:
35.768701402203575
Out[62]:
79.0
Out[63]:
In [64]: df2=pd.read_csv("possum.csv")
df2
Out[64]: case site Pop sex age hdlngth skullw totlngth taill footlgth earconch eye chest belly
0 1 1 Vic m 8.0 94.1 60.4 89.0 36.0 74.5 54.5 15.2 28.0 36.0
1 2 1 Vic f 6.0 92.5 57.6 91.5 36.5 72.5 51.2 16.0 28.5 33.0
2 3 1 Vic f 6.0 94.0 60.0 95.5 39.0 75.4 51.9 15.5 30.0 34.0
3 4 1 Vic f 6.0 93.2 57.1 92.0 38.0 76.1 52.2 15.2 28.0 34.0
4 5 1 Vic f 2.0 91.5 56.3 85.5 36.0 71.0 53.2 15.1 28.5 33.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
99 100 7 other m 1.0 89.5 56.0 81.5 36.5 66.0 46.8 14.8 23.0 27.0
100 101 7 other m 1.0 88.6 54.7 82.5 39.0 64.4 48.0 14.0 25.0 33.0
101 102 7 other f 6.0 92.4 55.0 89.0 38.0 63.5 45.4 13.0 25.0 30.0
102 103 7 other m 4.0 91.5 55.2 82.5 36.5 62.9 45.9 15.4 25.0 29.0
103 104 7 other f 3.0 93.6 59.9 89.0 40.0 67.6 46.0 14.8 28.5 33.5
Out[65]: Passengerid Age Fare Sex sibsp zero zero.1 zero.2 zero.3 zero.4 ... zero.12 zero.13 zero.14
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
Out[66]: pclass survived name sex age sibsp parch ticket fare cabin embarked boat body h
Allen,
Miss.
0 1.0 1.0 female 29.0000 0.0 0.0 24160 211.3375 B5 S 2 NaN
Elisabeth
Walton
Allison,
Master. C22
1 1.0 1.0 male 0.9167 1.0 2.0 113781 151.5500 S 11 NaN
Hudson C26 C
Trevor
Allison,
Miss. C22
2 1.0 0.0 female 2.0000 1.0 2.0 113781 151.5500 S NaN NaN
Helen C26 C
Loraine
Allison,
Mr.
C22
3 1.0 0.0 Hudson male 30.0000 1.0 2.0 113781 151.5500 S NaN 135.0
C26 C
Joshua
Creighton
Allison,
Mrs.
Hudson J C22
4 1.0 0.0 female 25.0000 1.0 2.0 113781 151.5500 S NaN NaN
C (Bessie C26 C
Waldo
Daniels)
In [67]: df5.head(4)
Out[67]: pclass survived name sex age sibsp parch ticket fare cabin embarked boat body h
Allen,
Miss.
0 1.0 1.0 female 29.0000 0.0 0.0 24160 211.3375 B5 S 2 NaN
Elisabeth
Walton
Allison,
Master. C22
1 1.0 1.0 male 0.9167 1.0 2.0 113781 151.5500 S 11 NaN
Hudson C26 C
Trevor
Allison,
Miss. C22
2 1.0 0.0 female 2.0000 1.0 2.0 113781 151.5500 S NaN NaN
Helen C26 C
Loraine
Allison,
Mr.
C22
3 1.0 0.0 Hudson male 30.0000 1.0 2.0 113781 151.5500 S NaN 135.0
C26 C
Joshua
Creighton
In [68]: df5.tail(6)
Out[68]: pclass survived name sex age sibsp parch ticket fare cabin embarked boat body
Zabour,
1304 3.0 0.0 female 14.5 1.0 0.0 2665 14.4542 NaN C NaN 328.0
Miss. Hileni
Zabour,
1305 3.0 0.0 Miss. female NaN 1.0 0.0 2665 14.4542 NaN C NaN NaN
Thamine
Zakarian, Mr.
1306 3.0 0.0 male 26.5 0.0 0.0 2656 7.2250 NaN C NaN 304.0
Mapriededer
Zakarian, Mr.
1307 3.0 0.0 male 27.0 0.0 0.0 2670 7.2250 NaN C NaN NaN
Ortin
Zimmerman,
1308 3.0 0.0 male 29.0 0.0 0.0 315082 7.8750 NaN S NaN NaN
Mr. Leo
1309 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
In [69]: df5.sample()
Out[69]: pclass survived name sex age sibsp parch ticket fare cabin embarked boat body home.
M
Gustafsson,
Sw
839 3.0 0.0 Mr. Karl male 19.0 0.0 0.0 347069 7.775 NaN S NaN NaN
New
Gideon
In [70]: df5.sample(4)
Out[70]: pclass survived name sex age sibsp parch ticket fare cabin embarked boat body h
Stengel,
Mr.
280 1.0 1.0 Charles male 54.0 1.0 0.0 11778 55.4417 C116 C 1 NaN N
Emil
Henry
Mockler,
Miss.
1023 3.0 1.0 Helen female NaN 0.0 0.0 330980 7.8792 NaN Q 16 NaN
Mary
"Ellie"
Moubarek,
Mrs.
George
1036 3.0 1.0 female NaN 0.0 2.0 2661 15.2458 NaN C C NaN
(Omine
"Amenia"
Alexander)
In [71]: df5.info
body home.dest
0 NaN St Louis, MO
1 NaN Montreal, PQ / Chesterville, ON
2 NaN Montreal, PQ / Chesterville, ON
3 135.0 Montreal, PQ / Chesterville, ON
4 NaN Montreal, PQ / Chesterville, ON
... ... ...
1305 NaN NaN
1306 304.0 NaN
1307 NaN NaN
1308 NaN NaN
1309 NaN NaN
In [72]: df5.columns
In [73]: df5.shape
(1310, 14)
Out[73]:
In [74]: df5.describe()
In [75]: df5.describe().T
In [76]: df5.isnull()
Out[76]: pclass survived name sex age sibsp parch ticket fare cabin embarked boat body home.dest
0 False False False False False False False False False False False False True False
1 False False False False False False False False False False False False True False
2 False False False False False False False False False False False True True False
3 False False False False False False False False False False False True False False
4 False False False False False False False False False False False True True False
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1305 False False False False True False False False False True False True True True
1306 False False False False False False False False False True False True False True
1307 False False False False False False False False False True False True True True
1308 False False False False False False False False False True False True True True
1309 True True True True True True True True True True True True True True
In [77]: df5.isnull().sum()
pclass 1
Out[77]:
survived 1
name 1
sex 1
age 264
sibsp 1
parch 1
ticket 1
fare 2
cabin 1015
embarked 3
boat 824
body 1189
home.dest 565
dtype: int64
In [78]: df5.isnull().mean()*100
pclass 0.076336
Out[78]:
survived 0.076336
name 0.076336
sex 0.076336
age 20.152672
sibsp 0.076336
parch 0.076336
ticket 0.076336
fare 0.152672
cabin 77.480916
embarked 0.229008
boat 62.900763
body 90.763359
home.dest 43.129771
dtype: float64
In [79]: df5.columns
In [80]: df5["fare"].max()
512.3292
Out[80]:
In [81]: df5["fare"].std()
51.758668239174135
Out[81]:
In [82]: df5.value_counts("name").sum()
1309
Out[82]:
In [83]: df5.duplicated().sum()
0
Out[83]:
In [86]: df5.pclass.value_counts()
3.0 709
Out[86]:
1.0 323
2.0 277
Name: pclass, dtype: int64
In [88]: survive=df5.groupby(["pclass"])["survived"]
survive.sum()
pclass
Out[88]:
1.0 200.0
2.0 119.0
3.0 181.0
Name: survived, dtype: float64
pclass
In [91]: #4. count of males and females who survived in each class
In [92]: count=df5.groupby(["pclass","sex"])["survived"]
count.sum()
pclass sex
Out[92]:
1.0 female 139.0
male 61.0
2.0 female 94.0
male 25.0
3.0 female 106.0
male 75.0
Name: survived, dtype: float64
In [93]: count=df5.groupby(["pclass","sex"],as_index=False)["survived"]
count.sum()
In [95]: df5["survived"].mean()
0.3819709702062643
Out[95]:
In [102… count=df5.groupby(["pclass"])["survived"].agg(["mean","sum"])
count
pclass
In [106… abc=pd.cut(df5.age,bins=3,labels=("young","middle","old"))
abc.head(5)
0 middle
Out[106]:
1 young
2 young
3 middle
4 young
Name: age, dtype: category
Categories (3, object): ['young' < 'middle' < 'old']
In [107… abc
0 middle
Out[107]:
1 young
2 young
3 middle
4 young
...
1305 NaN
1306 young
1307 middle
1308 middle
1309 NaN
Name: age, Length: 1310, dtype: category
Categories (3, object): ['young' < 'middle' < 'old']
In [108… df5
Out[108]: pclass survived name sex age sibsp parch ticket fare cabin embarked boat bo
Allen, Miss.
0 1.0 1.0 Elisabeth female 29.0000 0.0 0.0 24160 211.3375 B5 S 2 N
Walton
Allison,
Master. C22
1 1.0 1.0 male 0.9167 1.0 2.0 113781 151.5500 S 11 N
Hudson C26
Trevor
Allison, Miss.
C22
2 1.0 0.0 Helen female 2.0000 1.0 2.0 113781 151.5500 S NaN N
C26
Loraine
Allison, Mr.
Hudson C22
3 1.0 0.0 male 30.0000 1.0 2.0 113781 151.5500 S NaN 13
Joshua C26
Creighton
Allison, Mrs.
Hudson J C
C22
4 1.0 0.0 (Bessie female 25.0000 1.0 2.0 113781 151.5500 S NaN N
C26
Waldo
Daniels)
... ... ... ... ... ... ... ... ... ... ... ... ...
Zabour,
1305 3.0 0.0 Miss. female NaN 1.0 0.0 2665 14.4542 NaN C NaN N
Thamine
Zakarian, Mr.
1306 3.0 0.0 male 26.5000 0.0 0.0 2656 7.2250 NaN C NaN 30
Mapriededer
1307 3.0 0.0 Zakarian, Mr. male 27.0000 0.0 0.0 2670 7.2250 NaN C NaN N
Ortin
Zimmerman,
1308 3.0 0.0 male 29.0000 0.0 0.0 315082 7.8750 NaN S NaN N
Mr. Leo
1309 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN N
In [109… df5.columns
In [111… df6=pd.read_csv("titan.csv")
df6
Out[111]: PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare ... Embarked
Braund,
0 1 0.0 3 Mr. Owen male 22.0 1 0 A/5 21171 7.2500 ... S
Harris
Cumings,
Mrs. John
Bradley
1 2 1.0 1 female 38.0 1 0 PC 17599 71.2833 ... C
(Florence
Briggs
Th...
Heikkinen,
STON/O2.
2 3 1.0 3 Miss. female 26.0 0 0 7.9250 ... S
3101282
Laina
Futrelle,
Mrs.
Jacques
3 4 1.0 1 female 35.0 1 0 113803 53.1000 ... S
Heath
(Lily May
Peel)
Allen, Mr.
4 5 0.0 3 William male 35.0 0 0 373450 8.0500 ... S
Henry
... ... ... ... ... ... ... ... ... ... ... ... ...
Spector,
1304 1305 NaN 3 male NaN 0 0 A.5. 3236 8.0500 ... S
Mr. Woolf
Oliva y
Ocana,
1305 1306 NaN 1 female 39.0 0 0 PC 17758 108.9000 ... C
Dona.
Fermina
Saether,
SOTON/O.Q.
1306 1307 NaN 3 Mr. Simon male 38.5 0 0 7.2500 ... S
3101262
Sivertsen
Ware, Mr.
1307 1308 NaN 3 male NaN 0 0 359309 8.0500 ... S
Frederick
1308 1309 NaN 3 Peter, male NaN 1 1 2668 22.3583 ... C
Master.
Michael J
In [116… noofsurv=df6.groupby(["Boarded"],as_index=False)["Survived"]
noofsurv.sum()
0 Belfast 0.0
1 Cherbourg 93.0
2 Queenstown 29.0
3 Southampton 219.0
In [126… avgfare=df6.groupby(["Pclass"],as_index=False)["Fare"]
avgfare.mean()
0 1 87.508992
1 2 21.179196
2 3 13.302889
In [142… df6
0 young
Out[142]:
1 middle
2 young
3 middle
4 middle
...
1304 NaN
1305 middle
1306 middle
1307 NaN
1308 NaN
Name: Age, Length: 1309, dtype: category
Categories (3, object): ['young' < 'middle' < 'old']
In [140… df6
0 young
Out[140]:
1 middle
2 young
3 middle
4 middle
...
1304 NaN
1305 middle
1306 middle
1307 NaN
1308 NaN
Name: Age, Length: 1309, dtype: category
Categories (3, object): ['young' < 'middle' < 'old']
In [145… abc=pd.cut(df5.age,bins=3,labels=("young","middle","old"))
df5.head()
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[145], line 1
----> 1 abc=pd.cut(df5.age,bins=3,labels=("young","middle","old"))
2 df5.head()
In [135… df6
Out[135]: PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare ... Embarked
Braund,
0 1 0.0 3 Mr. Owen male 22.0 1 0 A/5 21171 7.2500 ... S
Harris
Cumings,
Mrs. John
Bradley
1 2 1.0 1 female 38.0 1 0 PC 17599 71.2833 ... C
(Florence
Briggs
Th...
Heikkinen,
STON/O2.
2 3 1.0 3 Miss. female 26.0 0 0 7.9250 ... S
3101282
Laina
Futrelle,
Mrs.
Jacques
3 4 1.0 1 female 35.0 1 0 113803 53.1000 ... S
Heath
(Lily May
Peel)
Allen, Mr.
4 5 0.0 3 William male 35.0 0 0 373450 8.0500 ... S
Henry
... ... ... ... ... ... ... ... ... ... ... ... ...
Spector,
1304 1305 NaN 3 male NaN 0 0 A.5. 3236 8.0500 ... S
Mr. Woolf
Saether,
SOTON/O.Q.
1306 1307 NaN 3 Mr. Simon male 38.5 0 0 7.2500 ... S
3101262
Sivertsen
Ware, Mr.
1307 1308 NaN 3 male NaN 0 0 359309 8.0500 ... S
Frederick
Peter,
1308 1309 NaN 3 Master. male NaN 1 1 2668 22.3583 ... C
Michael J
In [136… cat=df6.groupby(["Pclass"],as_index=False)["Age"]
cat.sum()
0 1 11121.42
1 2 7701.25
2 3 12433.00
In [ ]: