Python Pandas Demo PDF
Python Pandas Demo PDF
PYTHON PANDAS
Python pandas
duplicated() Method creates a Boolean Series and uses it to extract rows that have duplicate values
drop_duplicates() Method is an alternative option to identifying duplicate rows and removing them through
filtering
set_index() Method sets the DataFrame index (row labels) using one or more existing columns
reset_index() Method resets index of a Data Frame. This method sets a list of integer ranging from 0 to
length of data as index
where() Method is used to check a Data Frame for one or more condition and return the result
accordingly. By default, the rows not satisfying the condition are filled with NaN value
Attribute Description
Columns The column label of the data frame
Index The index (row labels) of the data frame
Axes Returns a list representing both the axes of the data frame
dtypes Return the data type of the dataFrame
size Returns the int representing the number of element in this object
shape Returns a tuple representing the dimensionally of the dat frame
values Returns a numpy representation of the data frame
empty Indicator whether dataframe is empty
ndim Returns a int representing the number of axes
T Transpose index and columns
SELECTING OR ACCESSING A COLUMN:
<data frame object [<column name>] {USE SQUARE BRACKETS}
Or
<Data frame object>.<column name> {USE DOT NOTATION}
Selecting a subset from a data frame Using Rows / Column Names
1. To Access A Row <DF OBJECT>.loc[<row label>,:]
print("-"*30)
print(df.isnull())
print("-"*30)
print(df.dropna(1))
print("-"*30)
for i, j in df.iterrows():
print(i, j)
print()
Output:
Info 1 Info 2 Info 3 Info 4 ------------------------------ 3 Info 1 60.0
0 20.0 50.0 40.0 NaN 0 Info 1 20.0 Info 2 NaN
1 30.0 20.0 70.0 NaN Info 2 50.0 Info 3 20.0
2 NaN 70.0 10.0 NaN Info 3 40.0 Info 4 NaN
3 60.0 NaN 20.0 NaN Info 4 NaN Name: 3, dtype: float64
4 34.0 14.0 24.0 NaN Name: 0, dtype: float64
5 33.0 23.0 NaN NaN 4 Info 1 34.0
------------------------------ 1 Info 1 30.0 Info 2 14.0
Info 1 Info 2 Info 3 Info 4 Info 2 20.0 Info 3 24.0
0 False False False True Info 3 70.0 Info 4 NaN
1 False False False True Info 4 NaN Name: 4, dtype: float64
2 True False False True Name: 1, dtype: float64
3 False True False True 5 Info 1 33.0
4 False False False True 2 Info 1 NaN Info 2 23.0
5 False False True True Info 2 70.0 Info 3 NaN
------------------------------ Info 3 10.0 Info 4 NaN
Empty DataFrame Info 4 NaN Name: 5, dtype: float64
Columns: [] Name: 2, dtype: float64
Index: [0, 1, 2, 3, 4, 5]
Example 2:
import pandas as pd
import numpy as np
DATA = {'Stu_Name':['Mohan', 'Rahul', 'Jeevin', 'Pawan'],
'Total_Marks':[250, 210, 319, 218]}
df = pd.DataFrame(DATA)
print(df)
print("-"*30)
print(df[['Stu_Name', 'Total_Marks']])
Output:
Stu_Name Total_Marks Stu_Name Total_Marks
0 Mohan 250 0 Mohan 250
1 Rahul 210 1 Rahul 210
2 Jeevin 319 2 Jeevin 319
3 Pawan 218 3 Pawan 218
------------------------------
Example 3:
import pandas as pd
import numpy as np
dict = {'Info 1':[20,30,np.nan,60,34,33],
'Info 2': [50,20,70,np.nan,14,23],
'Info 3':[70,np.nan,40,50,40,13],
'Info 3':[40,70,10,20,24,np.nan],
'Info 4':[np.nan,np.nan,np.nan,np.nan,np.nan,np.nan]}
df = pd.DataFrame(dict)
Page 4 of 23 EDUCATION FOR EVERYONE
DoyPyEdu PLAY WITH PYTHON
print(df)
print("-"*30)
columns = list(df)
print(columns)
print("-"*30)
for i in columns:
print (df[i][2])
print("-"*30)
Output:
Info 1 Info 2 Info 3 Info 4 ------------------------------
0 20.0 50.0 40.0 NaN nan
1 30.0 20.0 70.0 NaN ------------------------------
2 NaN 70.0 10.0 NaN 70.0
3 60.0 NaN 20.0 NaN ------------------------------
4 34.0 14.0 24.0 NaN 10.0
5 33.0 23.0 NaN NaN ------------------------------
------------------------------ nan
['Info 1', 'Info 2', 'Info 3', 'Info 4'] ------------------------------
Example 4:
import pandas as pd for i in columns:
import numpy as np print (df[i],end=", ")
dict = {'Info 1':[20,30,np.nan,60,34,33], print("-"*30)
'Info 2': [50,20,70,np.nan,14,23], for i in columns:
'Info 3':[70,np.nan,40,50,40,13], print (df[i][2],end=", ")
'Info 3':[40,70,10,20,24,np.nan]} print()
df = pd.DataFrame(dict) print("-"*30)
print(df) for i in columns:
print("-"*30) for j in range(0,len(df[i])):
columns = list(df) print (df[i][j],end=", ")
print(columns) print()
print("-"*30) print("-"*30)
Output:
Info 1 Info 2 Info 3 4 34.0 4 24.0
0 20.0 50.0 40.0 5 33.0 5 NaN
1 30.0 20.0 70.0 Name: Info 1, dtype: float64, 0 Name: Info 3, dtype: float64,
2 NaN 70.0 10.0 50.0 ------------------------------
3 60.0 NaN 20.0 1 20.0 nan,
4 34.0 14.0 24.0 2 70.0 70.0,
5 33.0 23.0 NaN 3 NaN 10.0,
------------------------------ 4 14.0 ------------------------------
['Info 1', 'Info 2', 'Info 3'] 5 23.0 20.0, 30.0, nan, 60.0, 34.0, 33.0,
------------------------------ Name: Info 2, dtype: float64, 0 50.0, 20.0, 70.0, nan, 14.0, 23.0,
0 20.0 40.0 40.0, 70.0, 10.0, 20.0, 24.0, nan,
1 30.0 1 70.0 ------------------------------
2 NaN 2 10.0
3 60.0 3 20.0
Case 2
Example 1:
import pandas as pd df = pd.DataFrame(dict)
import numpy as np print(df)
dict = {'Student 1':[20,30,60,34,33], print("-"*30)
'Student 2': [50,20,70,14,23], print(df.fillna(0))
'Student 3':[70,40,50,40,13]} print("-"*30)
Page 5 of 23 EDUCATION FOR EVERYONE
DoyPyEdu PLAY WITH PYTHON
Output:
Student 1 Student 2 Student 3 Student 1 Student 2 Student 3
0 20 50 70 0 20 50 70
1 30 20 40 1 30 20 40
2 60 70 50 2 60 70 50
3 34 14 40 3 34 14 40
4 33 23 13 4 33 23 13
------------------------------ ------------------------------
Example 2:
import pandas as pd
import numpy as np
dict = { 'Info 1':[20,30,np.nan,60,34,33],
'Info 2': [50,20,70,np.nan,14,23],
'Info 3':[70,np.nan,40,50,40,13],
'Info 3':[40,70,10,20,24,np.nan],
'Info 4':[np.nan,np.nan,np.nan,np.nan,np.nan,np.nan]}
df = pd.DataFrame(dict)
print(df)
print("-"*30)
print(df.fillna(0))
print("-"*30)
print(df.fillna(1))
print("-"*30)
Output:
Info 1 Info 2 Info 3 Info 4 Info 1 Info 2 Info 3 Info 4 Info 1 Info 2 Info 3 Info 4
0 20.0 50.0 40.0 NaN 0 20.0 50.0 40.0 0.0 0 20.0 50.0 40.0 1.0
1 30.0 20.0 70.0 NaN 1 30.0 20.0 70.0 0.0 1 30.0 20.0 70.0 1.0
2 NaN 70.0 10.0 NaN 2 0.0 70.0 10.0 0.0 2 1.0 70.0 10.0 1.0
3 60.0 NaN 20.0 NaN 3 60.0 0.0 20.0 0.0 3 60.0 1.0 20.0 1.0
4 34.0 14.0 24.0 NaN 4 34.0 14.0 24.0 0.0 4 34.0 14.0 24.0 1.0
5 33.0 23.0 NaN NaN 5 33.0 23.0 0.0 0.0 5 33.0 23.0 1.0 1.0
------------------------------ ------------------------------ ------------------------------
Example 3:
import pandas as pd
import numpy as np
dict = {'Info 1':[20,30,np.nan,60,34,33],
'Info 2': [50,20,70,np.nan,14,23],
'Info 3':[70,np.nan,40,50,40,13],
'Info 3':[40,70,10,20,24,np.nan],
'Info 4':[np.nan,np.nan,np.nan,np.nan,np.nan,np.nan]}
df = pd.DataFrame(dict)
print(df)
print("-"*30)
print(df.isnull())
print("-"*30)
Output:
Info 1 Info 2 Info 3 Info 4 Info 1 Info 2 Info 3 Info 4
0 20.0 50.0 40.0 NaN 0 False False False True
1 30.0 20.0 70.0 NaN 1 False False False True
2 NaN 70.0 10.0 NaN 2 True False False True
3 60.0 NaN 20.0 NaN 3 False True False True
4 34.0 14.0 24.0 NaN 4 False False False True
5 33.0 23.0 NaN NaN 5 False False True True
------------------------------ ------------------------------
Page 6 of 23 EDUCATION FOR EVERYONE
DoyPyEdu PLAY WITH PYTHON
------------------------------
Example 3:
import pandas as pd print(r)
import numpy as np print("-"*30)
DATA =[[1, 2, 3], [5, 4, 6], [11, 120, 310]] print("Display Before update :")
Rows=[1, 2, 3] print(df)
Columns =['C1', 'C2', 'C3'] print("P/Insert at df.at[2, 'C1']=41:")
df = pd.DataFrame(DATA, Rows, Columns) df.at[2, 'C2']=41
print("All Data :") df.at[1, 'C1']=39
print(df) df.at[0, 'C3']=51
print("-"*30) print("Display After update :")
print("Display at :") print(df)
r=df.at[1, 'C2'] print("-"*30)
Output:
All Data : ------------------------------ C1 C2 C3
C1 C2 C3 Display Before update : 1 39.0 2.0 3.0
1 1 2 3 C1 C2 C3 2 5.0 41.0 6.0
2 5 4 6 1 1 2 3 3 11.0 120.0 310.0
3 11 120 310 2 5 4 6 0 NaN NaN 51.0
------------------------------ 3 11 120 310 ------------------------------
Display at : P/Insert at df.at[2, 'C1']=41:
2 Display After update :
Example 4:
import pandas as pd print(df)
import numpy as np print("-"*30)
DATA =[[1, 2, 3], [5, 4, 6], [11, 120, 310]] print("Del Column C2:")
Rows=[1, 2, 3] del df['C2']
Columns =['C1', 'C2', 'C3'] print(df)
df = pd.DataFrame(DATA, Rows, Columns) print("-"*30)
print("All Data :")
Output: ------------------------------
All Data : Del Column C2:
C1 C2 C3 C1 C3
1 1 2 3 1 1 3
2 5 4 6 2 5 6
3 11 120 310 3 11 310
------------------------------
Working With min()/max()/mode()/mean()/median()
Methods in DataFrama
Example 1:
import pandas as pd print("-"*30)
import numpy as np print("df.min():")
DATA =[[11, 2, 3], [5, 0, 6], [101, 20, 310]] print(df.min())
Rows=[1, 2, 3] print("-"*30)
Columns =['C1', 'C2', 'C3'] print("df.max():")
df = pd.DataFrame(DATA, Rows, Columns) print(df.max())
print("All Data :") print("-"*30)
print(df)
Output:
All Data : ------------------------------ dtype: int64
C1 C2 C3 df.min(): ------------------------------
1 11 2 3 C1 5 df.max():
2 5 0 6 C2 0 C1 101
3 101 20 310 C3 3 C2 20
****************************** Round 3 2
df.min(axis = 1,skipna=True) : Round 4 6
Round 1 10 dtype: int64
Round 2 2
Example 3:
import pandas as pd print("df.mode(axis = 0) : ")
data=[[10, 41, 51, 17, 12], [15, 12, 4, 10, 2], print(df.mode(axis = 0))
[2, 6, 17, 13, 15], [24, 13, 17, 17, 6]] print("df.median(axis = 0) : ")
rows=["Round 1","Round 2","Round 3","Round 4"] print(df.median(axis = 0))
columns=["Game 1","Game 2","Game 3","Game 4","Game 5"] print("*"*30)
print(data) print("df.mean(axis = 1) : ")
print(rows) print(df.mean(axis = 1))
print(columns) print("df.mode(axis = 1) : ")
df = pd.DataFrame(data,rows,columns) print(df.mode(axis = 1))
print("*"*30) print("df.median(axis = 1) : ")
print("All Data:") print(df.median(axis = 1))
print(df) print("*"*30)
print("*"*30) print("df.mean(axis = 0,skipna=True) : ")
print("df.mean() : ") print(df.mean(axis = 0,skipna=True) )
print(df.mean()) print("df.median(axis = 0,skipna=True) : ")
print("df.mode() : ") print(df.median(axis = 0,skipna=True) )
print(df.mode()) print("*"*30)
print("df.median() : ") print("df.mean(axis = 1,skipna=True) : ")
print(df.median()) print(df.mean(axis = 1,skipna=True) )
print("*"*30) print("df.median(axis = 1,skipna=True) : ")
print("df.mean(axis = 0) : ") print(df.median(axis = 1,skipna=True) )
print(df.mean(axis = 0))
Output:
[[10, 41, 51, 17, 12], [15, 12, 4, 10, 2], Game 2 12.5
[2, 6, 17, 13, 15], [24, 13, 17, 17, 6]] Game 3 17.0
['Round 1', 'Round 2', 'Round 3', 'Round 4'] Game 4 15.0
['Game 1', 'Game 2', 'Game 3', 'Game 4', 'Game 5'] Game 5 9.0
****************************** dtype: float64
All Data: ******************************
Game 1 Game 2 Game 3 Game 4 Game 5 df.mean(axis = 0) :
Round 1 10 41 51 17 12 Game 1 12.75
Round 2 15 12 4 10 2 Game 2 18.00
Round 3 2 6 17 13 15 Game 3 22.25
Round 4 24 13 17 17 6 Game 4 14.25
****************************** Game 5 8.75
df.mean() : dtype: float64
Game 1 12.75 df.mode(axis = 0) :
Game 2 18.00 Game 1 Game 2 Game 3 Game 4 Game 5
Game 3 22.25 0 2 6 17.0 17.0 2
Game 4 14.25 1 10 12 NaN NaN 6
Game 5 8.75 2 15 13 NaN NaN 12
dtype: float64 3 24 41 NaN NaN 15
df.mode() : df.median(axis = 0) :
Game 1 Game 2 Game 3 Game 4 Game 5 Game 1 12.5
0 2 6 17.0 17.0 2 Game 2 12.5
1 10 12 NaN NaN 6 Game 3 17.0
2 15 13 NaN NaN 12 Game 4 15.0
3 24 41 NaN NaN 15 Game 5 9.0
df.median() : dtype: float64
Game 1 12.5 ******************************
Game 1 51 df.sum(axis = 1) :
Game 2 72 Round 1 131
Game 3 89 Round 2 43
Game 4 57 Round 3 53
Game 5 35 Round 4 77
dtype: int64 dtype: int64
df.count(axis = 0) : df.count(axis = 1) :
Game 1 4 Round 1 5
Game 2 4 Round 2 5
Game 3 4 Round 3 5
Game 4 4 Round 4 5
Game 5 4 dtype: int64
dtype: int64 ******************************
******************************
Working With var()/quantile() Methods in DataFrama
Example 1:
import pandas as pd print(df)
data=[[10, 41, 51, 17, 12], [15, 12, 4, 10, 2], print("*"*30)
[2, 6, 17, 13, 15], [24, 13, 17, 17, 6]] print("df.quantile(.2, axis = 0): ")
rows=["Round 1","Round 2","Round 3","Round 4"] print(df.quantile(.2, axis = 0))
columns=["Game 1","Game 2","Game 3","Game 4","Game 5"]
print("*"*30)
print(data)
print("df.quantile(.2, axis = 1): ")
print(rows)
print(df.quantile(.2, axis = 1))
print(columns)
print("*"*30)
df = pd.DataFrame(data,rows,columns)
print("df.quantile(.4, axis = 0): ")
print("*"*30)
print(df.quantile(.4, axis = 0))
print("All Data:")
print("*"*30)
Output:
[[10, 41, 51, 17, 12], [15, 12, 4, 10, 2], Name: 0.2, dtype: float64
[2, 6, 17, 13, 15], [24, 13, 17, 17, 6]] ******************************
['Round 1', 'Round 2', 'Round 3', 'Round 4'] df.quantile(.2, axis = 1):
['Game 1', 'Game 2', 'Game 3', 'Game 4', 'Game 5'] Round 1 11.6
****************************** Round 2 3.6
All Data: Round 3 5.2
Game 1 Game 2 Game 3 Game 4 Game 5 Round 4 11.6
Round 1 10 41 51 17 12 Name: 0.2, dtype: float64
Round 2 15 12 4 10 2 ******************************
Round 3 2 6 17 13 15 df.quantile(.4, axis = 0):
Round 4 24 13 17 17 6 Game 1 11.0
****************************** Game 2 12.2
df.quantile(.2, axis = 0): Game 3 17.0
Game 1 6.8 Game 4 13.8
Game 2 9.6 Game 5 7.2
Game 3 11.8 Name: 0.4, dtype: float64
Game 4 11.8 ******************************
Game 5 4.4
Example 2:
import pandas as pd df = pd.DataFrame(data,rows,columns)
data=[[10, 41, 51, 17, 12], [15, 12, 4, 10, 2], print("*"*30)
[2, 6, 17, 13, 15], [24, 13, 17, 17, 6]] print("All Data:")
rows=["Round 1","Round 2","Round 3","Round 4"] print(df)
columns=["Game 1","Game 2","Game 3","Game 4","Game 5"] print("*"*30)
print(data) print("df.quantile(.2, axis = 0): ")
print(rows) print(df.quantile(.2, axis = 0))
print(columns) print("*"*30)
Page 12 of 23 EDUCATION FOR EVERYONE
DoyPyEdu PLAY WITH PYTHON
foo
one 1 2 3
two 4 5 6
>>> df.pivot(index='foo', columns='bar', values=['baz', 'zoo'])
baz zoo
bar A B C A B C
foo
one 1 2 3 x y z
two 4 5 6 q w t
A ValueError is raised if there are any duplicates.
>>> df = pd.DataFrame({"foo": ['one', 'one', 'two', 'two'],"bar": ['A', 'A', 'B', 'C'],"baz": [1, 2, 3, 4]})
>>> df
foo bar baz
0 one A 1
1 one A 2
2 two B 3
3 two C 4
More Examples of pivot_table() method:
>>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo",
... "bar", "bar", "bar", "bar"],
... "B": ["one", "one", "one", "two", "two",
... "one", "one", "two", "two"],
... "C": ["small", "large", "large", "small",
... "small", "large", "small", "small",
... "large"],
... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
... "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]})
>>> df
A B C D E
0 foo one small 1 2
1 foo one large 2 4
2 foo one large 2 5
3 foo two small 3 5
4 foo two small 3 6
5 bar one large 4 6
6 bar one small 5 8
7 bar two small 6 9
8 bar two large 7 9
This first example aggregates values by taking the sum.
bar one 4 5
two 7 6
foo one 4 1
two 0 6
The next example aggregates by taking the mean across multiple columns.
Dictionary:
import pandas as p
print("Data Dictionary 1: \n")
Page 16 of 23 EDUCATION FOR EVERYONE
DoyPyEdu PLAY WITH
PYTHON
Output:
Data Dictionary 1: R4 Sachin 13
Name Age ----------------------------
0 Sumit 17 DataFrame from List of Dicts, Dictionary 3:
1 Ravi 17 a b c
2 Kali 18 0 1 2 NaN
3 Tarun 13 1 5 10 20.0
---------------------------- ----------------------------
Data Dictionary 2: DataFrame from List of Dicts, Dictionary 4:
Name Age a b c
R1 Raju 15 first 1 2 NaN
R2 Jatin 14 second 5 10 20.0
R3 Rahul 17 ----------------------------
print("="*30)
import pandas as pd
d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']),
'three' : pd.Series([10,20,30], index=['a','b','c'])}
df = pd.DataFrame(d)
print ("Our dataframe is:")
print (df)
print("="*30)
# using del function
print ("Deleting the first column using DEL function:")
del df['one']
print (df)
print("="*30)
# using pop function
print ("Deleting another column using POP function:")
df.pop('two')
print (df)
print("="*30)
print("Row Selection, Addition, and Deletion")
print("DataFrame from List ofSelection by Label 10: \n")
d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(d)
print (df.loc['b'])
print("="*30)
print("DataFrame from List of Selection by integer location 11: \n")
d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(d)
print (df.iloc[2])
print("="*30)
print("DataFrame from List of Slice Rows 12: \n")
d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(d)
print (df[2:4])
print("="*30)
print("DataFrame from List of Addition of Rows 13: \n")
df = pd.DataFrame([[1, 2], [3, 4]], columns = ['a','b'])
df2 = pd.DataFrame([[5, 6], [7, 8]], columns = ['a','b'])
df = df.append(df2)
print (df)
print("="*30)
print("DataFrame from List of Deletion of Rows 14: \n")
df = pd.DataFrame([[1, 2], [3, 4]], columns = ['a','b'])
df2 = pd.DataFrame([[5, 6], [7, 8]], columns = ['a','b'])
df = df.append(df2)
# Drop rows with label 0
df = df.drop(0)
print (df)
print("----------------------------")
Output:
Data Dictionary 1: ==============================
Name Age DataFrame from List of Dicts, Dictionary 3:
0 Sumit 17 a b c
1 Ravi 17 0 1 2 NaN
2 Kali 18 1 5 10 20.0
3 Tarun 13 ----------------------------
============================== DataFrame from List of Dicts, Dictionary 4:
Data Dictionary 2: a b c
Name Age first 1 2 NaN
R1 Raju 15 second 5 10 20.0
R2 Jatin 14 ==============================
R3 Rahul 17 DataFrame from List of Dicts, Dictionary 5:
R4 Sachin 13
Page 19 of 23 EDUCATION FOR
EVERYONE
DoyPyEdu PLAY WITH
PYTHON
a b ==============================
first 1 2 Deleting the first column using DEL function:
second 5 10 two three
a b1 a 1 10.0
first 1 NaN b 2 20.0
second 5 NaN c 3 30.0
---------------------------- d 4 NaN
DataFrame from List of Dicts, Dictionary 6: ==============================
one two Deleting another column using POP function:
a 1.0 1 three
b 2.0 2 a 10.0
c 3.0 3 b 20.0
d NaN 4 c 30.0
============================== d NaN
DataFrame from List of Dicts, Dictionary 7: ==============================
a 1.0 Row Selection, Addition, and Deletion
b 2.0 DataFrame from List ofSelection by Label 10:
c 3.0 one 2.0
d NaN two 2.0
Name: one, dtype: float64 Name: b, dtype: float64
---------------------------- ==============================
DataFrame from List of Dicts, Dictionary 8: DataFrame from List of Selection by integer
============================== location 11:
Adding a new column by passing as Series:
one two three one 3.0
a 1.0 1 10.0 two 3.0
b 2.0 2 20.0 Name: c, dtype: float64
c 3.0 3 30.0 ==============================
d NaN 4 NaN DataFrame from List of Slice Rows 12:
---------------------------- one two
Adding a new column using the existing c 3.0 3
columns in DataFrame: d NaN 4
one two three four ==============================
a 1.0 1 10.0 11.0 DataFrame from List of Addition of Rows 13:
b 2.0 2 20.0 22.0 a b
c 3.0 3 30.0 33.0 0 1 2
d NaN 4 NaN NaN 1 3 4
---------------------------- 0 5 6
DataFrame from List of Dicts, Dictionary 9: 1 7 8
============================== ==============================
Our dataframe is: DataFrame from List of Deletion of Rows 14:
one two three a b
a 1.0 1 10.0 1 3 4
b 2.0 2 20.0 1 7 8
c 3.0 3 30.0 ----------------------------
d NaN 4 NaN
NOTES
d={'one':p.Series([1,2,3],index=['a','b','c']),'two':p.Series([1,2,3,4],index=['a','b','c','d'])}
data=p.DataFrame(d)
print(data)
data['three']=p.Series([10,20,30],index=['a','b','c'])
print(data)
data['four']=data['one']+data['three']
print(data)
#****pandaIMPORTANTfunctions10.py********
import pandas as p
d={'one':p.Series([1,2,5],index=['a','b','c']),'two':p.Series([1,9,3],index=['a','b','c'])}
data=p.DataFrame(d)
print(data)
print("SUM : \n",data.sum())
print("MIN :\n",data.min())
print("MAX :\n",data.max())
print("MIN INDEX :\n",data.idxmin())
print("MAX INDEX :\n",data.idxmax())
print(data.describe())
#*************pandalist11.py****************
import pandas as p
l=[['Alex',10],['bob',12],['Clark',13]]
data1=p.DataFrame(l,columns=['NAME','AGE'],dtype=float)
print(data1)
#*************pandalist12.py****************
import pandas as p
l=[1,2,3]
data1=p.DataFrame(l)
print(data1)
#*************pandalist13.py****************
import pandas as p
l=[[1,2,3],[4,5,6]]
data1=p.DataFrame(l)
print(data1)
#*************pandalist14.py****************
import pandas as p
l=[['Alex',10],['bob',12],['Clark',13]]
data1=p.DataFrame(l,columns=['NAME','AGE'])
print(data1)
#*************pandaDict15.py****************
import pandas as p
d={'one':p.Series([1,2,3],index=['a','b','c']),'two':p.Series([1,2,3,4],index=['a','b','c','d'])}
data=p.DataFrame(d)
print(data)
data['three']=p.Series([10,20,30],index=['a','b','c'])
print(data)
data['four']=data['one']+data['three']
print(data)
data.pop('three')
Page 22 of 23 EDUCATION FOR EVERYONE
DoyPyEdu PLAY WITH PYTHON
print(data)
#*************pandaDict16.py**************
import pandas as p
d={'one':p.Series([1,2,3],index=['a','b','c']),'two':p.Series([1,9,3,4],index=['a','b','c','d'])}
data=p.DataFrame(d)
print(data.loc['b'])
#*************pandaDict17.py****************
import pandas as p
d={'one':p.Series([1,2,5],index=['a','b','c']),'two':p.Series([1,9,3,4],index=['a','b','c','d'])}
data=p.DataFrame(d)
print(data.iloc[2])
#*************pandaDict18.py****************
import pandas as p
d={'one':p.Series([1,2,5,6],index=['a','b','c','e']),
'two':p.Series([1,9,3,4],index=['a','b','c','d'])
}
data=p.DataFrame(d)
print(data[2:4])
#***********pandaDict19.py************
import pandas as p
d1=p.DataFrame([[1,2],[3,4]],columns=['a','b'])
d2=p.DataFrame([[5,6],[7,8]],columns=['a','b'])
d1=d1.append(d2)
print(d1)
#*************pandaDict20.py****************
import pandas as p
d1=p.DataFrame([[1,2],[3,4]],columns=['a','b'])
d2=p.DataFrame([[5,6],[7,8]],columns=['a','b'])
d1=d1.append(d2)
print(d1)
d1=d1.drop(0)
print(d1)
NOTES