Practical File Questions With Answers
Practical File Questions With Answers
1. Write a code to create a Series object using the Python numpy ndarray [4,6,8,10].
#Code 1:
import pandas as pd
import numpy as np
ar=np.array([4,6,8,10])
sar=pd.Series(ar)
print(sar)
#Output:
0 4
1 6
2 8
3 10
dtype: int32
#Code 2:
import pandas as pd
Student={'Roll':1,'Name':'Amit','Çlass':12,'Section':"B"}
s=pd.Series(Student)
print(s)
#Output:
Roll 1
Name Amit
Çlass 12
Section B
dtype: object
3. Write a code to create a Series object using the Python sequence (80,65,31,41,78) and print all
the elements that are above 75.
#Code 3
import pandas as pd
s=pd.Series((80,65,31,41,78))
print(s)
print()
print('Elements above 75')
print(s[s>75])
#Output:
0 80
1 65
2 31
3 41
4 78
dtype: int64
Elements above 75
0 80
4 78
dtype: int64
OR
Given a Series, print all the elements that are above the 75th percentile.
#Code3
import pandas as pd
s=pd.Series((80,65,31,41,78))
print(s)
print()
sq=s.quantile(q=0.75)
print(sq)
print()
print('Elements above 75th percentile')
print(s[s>sq])
#Output:
0 80
1 65
2 31
3 41
4 78
dtype: int64
78.0
4. Create a Data Frame quarterly sales where each row contains the item category, item name, and
expenditure. Group the rows by the category, and print the total expenditure per category.
Items:
#Code 4
import pandas as pd
Items={'Itemcategory':['Drinks','Drinks','Sweets','Nuts','Nuts','Sweets'],
'itemName':['Pepsi','Coke','Chocolate','Cashew Nut','Almond','Cake'],
'Expenditure':[99000,60000,20000,30000,12000,30000]}
QSales=pd.DataFrame(Items)
print(QSales)
print()
Qsalesgroup=QSales.groupby('Itemcategory')
print(Qsalesgroup['Expenditure'].sum())
print()
print(Qsalesgroup[['Itemcategory','Expenditure']].sum())
#Output:
Itemcategory itemName Expenditure
0 Drinks Pepsi 99000
1 Drinks Coke 60000
2 Sweets Chocolate 20000
3 Nuts Cashew Nut 30000
4 Nuts Almond 12000
5 Sweets Cake 30000
Itemcategory
Drinks 159000
Nuts 42000
Sweets 50000
Name: Expenditure, dtype: int64
Expenditure
Itemcategory
Drinks 159000
Nuts 42000
Sweets 50000
5. Create a data frame for examination result and display row labels, column labels data types of
each column and the dimensions.
#Code 5
import pandas as pd
Roll=[1,2,3,4,5]
AvgMarks=[60,80,90.5,75.5,44]
Stu={'RollNo':Roll,'Avg':AvgMarks}
StuMarks=pd.DataFrame(Stu)
print( 'Students Marks')
print(StuMarks)
print( 'Data Types----')
print(StuMarks.dtypes)
print()
print( 'Data Type of each column--')
print(StuMarks.RollNo.dtype)
print(StuMarks.Avg.dtype)
print()
print( 'Dimension')
print(StuMarks.ndim)
#Ouput
Students Marks
RollNo Avg
0 1 60.0
1 2 80.0
2 3 90.5
3 4 75.5
4 5 44.0
Data Types--------
RollNo int64
Avg float64
dtype: object
Data Type of each column--
int64
float64
Dimension
2
6. Write code to create data frame and filter out rows based on different criteria such as
duplicate rows.
#Code 6
import pandas as pd
Roll=[1,2,3,2,5,3]
AvgMarks=[60,80,90.5,80,60,90.5]
Stu={'RollNo':Roll,'Avg':AvgMarks}
StuMarks1=pd.DataFrame(Stu)
StuMarks2=pd.DataFrame(Stu)
print(StuMarks1)
print()
print("Showing Duplicates")
sm=StuMarks1[StuMarks1.duplicated(keep=False)]
print(sm)
print("Removing Duplicates keeping Distinct")
StuMarks1.drop_duplicates(keep='first',inplace=True)
print(StuMarks1)
print()
print("Removing Duplicates")
StuMarks2.drop_duplicates(keep=False,inplace=True)
print(StuMarks2)
#Output
RollNo Avg
0 1 60.0
1 2 80.0
2 3 90.5
3 2 80.0
4 5 60.0
5 3 90.5
Showing Duplicates
RollNo Avg
1 2 80.0
2 3 90.5
3 2 80.0
5 3 90.5
Removing Duplicates keeping Distinct
RollNo Avg
0 1 60.0
1 2 80.0
2 3 90.5
4 5 60.0
Removing Duplicates
RollNo Avg
0 1 60.0
4 5 60.0
7. Find the sum of each column, or find the column with the lowest mean.
#Code 7
import pandas as pd
dict = {'PT1':[15,16,14,12,16],'PT2':[17,18,15,10,12]}
Markdf= pd.DataFrame(dict)
print(Markdf)
print()
print('Total Marks of Periodoc Test1')
print(Markdf['PT1'].sum())
print()
print('Total Marks of Periodoc Test2')
print(Markdf['PT2'].sum())
print()
print('Total Marks of Periodoc Test 1 & 2')
print(Markdf[['PT1','PT2']].sum())
print()
print('Total Marks of Each Column of Data Frame')
print(Markdf.sum())
print()
print('Mean value of Each Column of Data Frame')
print(Markdf.mean())
#Output
PT1 PT2
0 15 17
1 16 18
2 14 15
3 12 10
4 16 12
#Output
NorthTemp SouthTemp
0 15 22
1 -4 17
2 5 25
3 -2 20
4 -1 12
#Code 9
import pandas as pd
import numpy as np
Student={'AdmNo':['A111','A42','B33','B112'],'Name':['Amit','Anil','Bharti','Sumit'],
'Stream':['Sc','Comm',np.nan,'Sc'],'English':[22,np.nan,20,23]}
df=pd.DataFrame(Student)
print(df)
print('\n****Replacing NaN with 999****')
df.fillna({'Stream':999,'English':999},inplace=True)
print(df)
#Output
AdmNo Name Stream English
0 A111 Amit Sc 22.0
1 A42 Anil Comm NaN
2 B33 Bharti NaN 20.0
3 B112 Sumit Sc 23.0
Output:
Name Phy Maths IP
0 Amit 35 NaN 28
1 Bhanu 75 90.0 68
2 Chirag 60 70.0 43
#Code 10-b
import pandas as pd
df=pd.read_csv('c:/student.csv',usecols=['Name','IP'])
print(df)
***************************