Ip-12-2023-24 Practical File
Ip-12-2023-24 Practical File
#2. Write a Pandas program to perform arithmetic operations on two Pandas Series.
import pandas as pd
ds1 = pd.Series([3, 6, 9, 12, 15])
ds2 = pd.Series([2, 4, 6, 8, 10])
ds = ds1 + ds2
print("Add two Series:")
print(ds)
print("Subtract two Series:")
ds = ds1 - ds2
print(ds)
print("Multiply two Series:")
ds = ds1 * ds2
print(ds)
print("Divide Series1 by Series2:")
ds = ds1 / ds2
print(ds)
#4. Write a Pandas program to select the rows where the percentage greater than 70.
import pandas as pd
import numpy as np
exam_data = {'name': ['Aman', 'Kamal', 'Amjad', 'Rohan', 'Amit', 'Sumit',
'Matthew', 'Kartik', 'Kavita', 'Pooja'],
'perc': [79.5, 29, 90.5, np.nan, 32, 65, 56, np.nan, 29, 89],
'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no',
'yes']}
labels = ['A', 'B', 'C', 'B', 'E', 'F', 'G', 'H', 'I', 'J']
df = pd.DataFrame(exam_data , index=labels)
print("Number of student whoes percentage more than 70:")
print(df[df['perc'] > 70])
#5. Write a Pandas program to select the rows the percentage is between 70 and 90
(inclusive)
import pandas as pd
import numpy as np
exam_data = {'name': ['Aman', 'Kamal', 'Amjad', 'Rohan', 'Amit', 'Sumit',
'Matthew', 'Kartik', 'Kavita', 'Pooja'],
'perc': [79.5, 29, 90.5, np.nan, 32, 65, 56, np.nan, 29, 89],
'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no',
'yes']}
labels = ['A', 'B', 'C', 'B', 'E', 'F', 'G', 'H', 'I', 'J']
df = pd.DataFrame(exam_data , index=labels)
print("Number of student whoes percentage more than 70:")
print(df[df['perc'].between(70,90)])
#6. Write a Pandas program to change the percentage in given row by user.
import pandas as pd
import numpy as np
exam_dic = {'name': ['Aman', 'Kamal', 'Amjad', 'Rohan', 'Amit', 'Sumit',
'Matthew', 'Kartik', 'Kavita', 'Pooja'],
'perc': [79.5, 29, 90.5, np.nan, 32, 65, 56, np.nan, 29, 89],
'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no',
'yes']}
labels = ['A', 'B', 'C', 'B', 'E', 'F', 'G', 'H', 'I', 'J']
df = pd.DataFrame(exam_dic , index=labels)
print("\nOriginal data frame:")
print(df)
ch = input("Enter the index of row : ")
per = float(input("Enter percentage to be changed: "))
print('\nChange the percentage in row '+ch+ ' to',per)
df.loc[ch, 'perc'] = per
print(df)
#7. Write a Pandas program to join the two given dataframes along rows and assign
all data.
import pandas as pd
import numpy as np
exam_dic1 = {'name': ['Aman', 'Kamal', 'Amjad', 'Rohan', 'Amit', 'Sumit',
'Matthew', 'Kartik', 'Kavita', 'Pooja'],
'perc': [79.5, 29, 90.5, np.nan, 32, 65, 56, np.nan, 29, 89],
'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no',
'yes']}
exam_data1 = pd.DataFrame(exam_dic1)
exam_dic2 = {'name': ['Parveen', 'Ahil', 'Ashaz', 'Shifin', 'Hanash'],
'perc': [89.5, 92, 90.5, 91.5, 90],
'qualify': ['yes', 'yes', 'yes', 'yes', 'yes']}
exam_data2 = pd.DataFrame(exam_dic2)
print("Original DataFrames:")
print(exam_data1)
print("-------------------------------------")
print(exam_data2)
print("\nJoin the said two dataframes along rows:")
result_data = pd.concat([exam_data1, exam_data2])
print(result_data)
#8. Write a Pandas program to join the two given dataframes along columns and
assign all data.
import pandas as pd
import numpy as np
exam_dic1 = {'name': ['Aman', 'Kamal', 'Amjad', 'Rohan', 'Amit', 'Sumit',
'Matthew', 'Kartik', 'Kavita', 'Pooja'],
'perc': [79.5, 29, 90.5, np.nan, 32, 65, 56, np.nan, 29, 89],
'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no',
'yes']}
exam_data1 = pd.DataFrame(exam_dic1)
exam_dic2 = {'name': ['Parveen', 'Ahil', 'Ashaz', 'Shifin', 'Hanash'],
'perc': [89.5, 92, 90.5, 91.5, 90],
'qualify': ['yes', 'yes', 'yes', 'yes', 'yes']}
exam_data2 = pd.DataFrame(exam_dic2)
print("Original DataFrames:")
print(exam_data1)
print("-------------------------------------")
print(exam_data2)
print("\nJoin the said two dataframes along rows:")
result_data = pd.concat([exam_data1, exam_data2],axis=1)
print(result_data)
#9. Write a Pandas program to append a list of dictioneries or series to a
existing. DataFrame and display the combined data.
import pandas as pd
import numpy as np
exam_dic1 = {'name': ['Aman', 'Kamal', 'Amjad', 'Rohan', 'Amit', 'Sumit',
'Matthew', 'Kartik', 'Kavita', 'Pooja'],
'perc': [79.5, 29, 90.5, np.nan, 32, 65, 56, np.nan, 29, 89],
'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no',
'yes']}
exam_data1 = pd.DataFrame(exam_dic1)
s = pd.Series(['Sukhvir', 54,'yes'], index=['name', 'perc','qualify'])
dicts = [{'name': 'Krish', 'perc': 45,'qualify':'yes'},
{'name': 'Kumar', 'perc': 67,'qualify':'yes'}]
print("Original DataFrames:")
print(exam_data1)
print("\nDictionary:")
print(s)
# Add Series
combined_data = exam_data1.append(s, ignore_index=True, sort=False)
# Add Dictionary
combined_info = combined_data.append(dicts, ignore_index=True, sort=False)
print("\nCombined Data:")
# Print Combined Data/info
print(combined_info)
#10. Program to select or filter rows from a DataFrame based on values in columns
in pandas.( Use of Relational and Logical Operators)
import pandas as pd
import numpy as np
exam_dic1 = {'name': ['Aman', 'Kamal', 'Amjad', 'Rohan', 'Amit', 'Sumit',
'Matthew', 'Kartik', 'Kavita', 'Pooja'],
'perc': [79.5, 29, 90.5, np.nan, 32, 65, 56, np.nan, 29, 89],
'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']}
exam_data1 = pd.DataFrame(exam_dic1)
print("Original DataFrames:")
print(exam_data1)
print("\nUse == operator\n")
print(exam_data1.loc[exam_data1['name'] == 'Rohan'])
print("\nUse < operator\n")
print(exam_data1.loc[exam_data1['perc'] < 40])
print("\n Use != operator\n")
print(exam_data1.loc[exam_data1['qualify'] != 'no'])
print("\n Multiple Conditions\n")
print(exam_data1.loc[(exam_data1['qualify'] != 'yes') & (exam_data1['perc'] <40)])
#11. Filter out rows based on different criteria such as duplicate rows
import pandas as pd
data={'Name':['Aman','Rohit','Deepika','Aman','Deepika','Sohit','Geeta'],
'Sales':[8500,4500,9200,8500,9200,9600,8400]}
sales=pd.DataFrame(data)
# Find duplicate rows
duplicated = sales[sales.duplicated(keep=False)]
print("duplicate Row:\n",duplicated)
#12. Importing and exporting data between pandas and CSV file. To create and open a
data frame using ‘Student_result.csv’ file using Pandas. To display row labels,
column labels data types of each column and the dimensions. To display the shape
(number of rows and columns) of the CSV file.
import pandas as pd
import csv
df = pd.read_csv("student_result.csv") #Reading the Data
print(df.columns) # Display Name of Columns
print(df.shape) # Display no of rows and column
print(df.info())# Display Column Names and their types
#13. Read the ‘Student_result.csv’ to create a data frame and do the following
operation:
# To display Adm_No, Gender and Percentage from ‘student_result.csv’ file.
# To display the first 5 and last 5 records from ‘student_result.csv’ file.
import pandas as pd
import csv
#To display Adm_No, Gender and Percentage from ‘student_result.csv’ file.
df = pd.read_csv("student_result.csv",usecols = ['ADM_NO','GENDER', 'PERCENTAGE'])
print("To display Adm_No, Gender and Percentage from ‘student_result.csv’ file.")
print(df)
#To display first 5 and last 5 records from ‘student_result.csv’ file.
df1 = pd.read_csv("student_result.csv")
print(df1.head())
print(df1.tail())
#14. Read the ‘Student_result.csv’ to create a data frame and do the following
operation:
# To display Student_result file with new column names.
# To modify the Percentage of student below 40 with NaN value in dataframe.
import pandas as pd
import numpy as np
import csv
df = pd.read_csv("student_result.csv")
print(df)
#To display Student_result file with new column names.
df1 = pd.read_csv("student_result.csv",skiprows = 1,
names = ['Adno','Sex','Name','Eng','Hin',
'Maths','Sc.','SSt','San','IT','Perc'])
print("To display Student_result file with new column names")
print(df1)
# To modify the Percentage of student below 40 with NaN value.
df2 = pd.read_csv("student_result.csv")
print(df2)
print("To modify the Percentage of student below 40 with NaN value.")
df2.loc[(df2['PERCENTAGE'] <40, 'PERCENTAGE')] = np.nan
print(df2)
#15. Read the ‘Student_result.csv’ to create a data frame and do the following
operation:
# To create a duplicate file for ‘student_result.csv’ containing Adm_No, Name and
Percentage.
# Write the statement in Pandas to find the highest percentage and also print the
student’s name and percentage.
import pandas as pd
import numpy as np
import csv
# To create a duplicate file for ‘student_result.csv’ containing Adm_No, Name and
Percentage.
df = pd.read_csv("student_result.csv")
df.to_csv('copyStudent_result.csv',columns=['ADM_NO',"STUDENT'S_NAME","PERCENTAGE"]
)
# Display Copied Dataframe
df2=pd.read_csv("copyStudent_result.csv")
print(df2)
# find the highest percentage and also print the student’s name and percentage.
df1 = pd.read_csv("student_result.csv")
df1 = df1[["STUDENT'S_NAME",'PERCENTAGE']]
[df1.PERCENTAGE== df1['PERCENTAGE'].max()]
print(df1)
#16. Importing and exporting data between pandas and MySQL database
import pymysql
import pandas as pd
import mysql.connector
from sqlalchemy import types, create_engine
# Create dataframe
dic={
'EMPNO':[7369,7499,7566,7654,7698,7782,7788,7839,7844,7900,7902,7934],
'ENMAE':['JAMES','ADAMS','CLARK','KING','WARD','JONES','ADAMS','SCOTT','FORD',
'BLAKE','MARTIN','TURNER'],
'JOB':['CLERK','CLERK','ANALYST','MANAGER','MANAGER','PRESIDENT','ANALYST',
'CLERK','MANAGER','ANALYST','SALESMAN','CLERK'],
'MGR':[7876,7876,7782,7900,7900 ,7900,7782,7876,7900,7782,7900,7876],
'HIREDATE':['2005/02/18','2005/01/04','2001/05/18','2003/04/19','2001/07/02',
'2006/09/21','2007/03/13','2005/03/06', '2007/01/12','2009/07/19','2009/01/05',
'2004/11/30'],
'SAL':[11400,19200,29400,60000,15000,95700,13200,36000,36000,34200,15000,18000],
'COMM':[4000,5000,5000,4000,2500,4000,2500,3000 ,3000,2500,2000 ,6000],
'DEPTT':[20,30,20,30,30,10,20,10,30,30,20,10]
}
data = pd.DataFrame(dic)
print('Our DataFrame is:\n',data)
tableName="employeedata"
# create sqlalchemy engine
sqlEngine = create_engine("mysql+pymysql://root:@localhost/Company")
dbConnection = sqlEngine.connect()
try:
# Exporting dataframe to SQl
frame = data.to_sql(tableName, dbConnection, if_exists='fail');
except ValueError as vx:
print(vx)
except Exception as ex:
print(ex)
else:
print("Table %s created successfully.\n"%tableName);
finally:
dbConnection.close()
# – Read a MySQL Database Table and write into a Pandas DataFrame:
sqlEngine = create_engine('mysql+pymysql://root:@127.0.0.1')
dbConnection= sqlEngine.connect()
dframe = pd.read_sql("select * from Company.employeedata", dbConnection);
print("After importing data from MySql:\n")
print(dframe)
dbConnection.close()
#17. Find the sum of each column, or find the column with the lowest mean
import pandas as pd
Pass_Perc ={'Phy': {'2017':95.4,'2018':96.4,'2019':99.2,'2020':97.4},
'Che': {'2017':96.5,'2018':97.4,'2019':100,'2020':99.2},
'Maths': {'2017':90.2,'2018':92.6,'2019':97.4,'2020':98.0},
'Eng': {'2017':99.2,'2018':100,'2019':100,'2020':100},
'IP': {'2017':95.6,'2018':100,'2019':100,'2020':100}}
df=pd.DataFrame(Pass_Perc)
print(df)
print()
print('Column wise sum in datframe is :')
print(df.sum(axis=0))
# Print mean vaLue of each coLumn
print()
print('Column wise mean value are:')
print(df.mean(axis=0).round(1))
# Returns CoLumn with minimum mean vaLue
print()
print('Column with minimum mean value is:')
print(df.mean(axis=0).idxmin())
#18. Locate the 3 largest values in a data frame.
import pandas as pd
data={'Name':['Aman','Rohit','Deepika','Kamal','Deva','Ramesh','Adnan'],
'Sales':[8500,4500,9300,8600,9200,9600,8400]}
sales=pd.DataFrame(data)
# Find 3 Largest Value for MarksinlP Column
print(sales.nlargest(3,['Sales']))
#19. Subtract the mean of a row from each element of the row in a Data Frame
import pandas as pd
Pass_Perc ={'Phy': {'2017':95.4,'2018':96.4,'2019':99.2,'2020':97.4},
'Che': {'2017':96.5,'2018':97.4,'2019':100,'2020':99.2},
'Maths': {'2017':90.2,'2018':92.6,'2019':97.4,'2020':98.0},
'Eng': {'2017':99.2,'2018':100,'2019':100,'2020':100},
'IP': {'2017':95.6,'2018':100,'2019':100,'2020':100}}
df=pd.DataFrame(Pass_Perc)
print(df)
print()
print('Mean of each row is:')
print(df.mean(axis=1))
print()
print('Datafranie after Subtracting mean value of\
each row from each element of that Row is:')
print(df.sub(df.mean(axis=1), axis=0))
#22. Given a Series, print all the elements that are above the 75th percentile.
import pandas as pd
import numpy as np
s=pd.Series(np.array([2,4,5,10,18,20,25]))
print(s)
res=s.quantile(q=0.75)
print()
print('75th Percentile of the series is::')
print(res)
print()
print('The elements that above the 75th percentile:')
print(s[s>res])
#23. Create a Data Frame quarterly sales where each row contains the item category,
#item name, and expenditure. Group the rows by the category and print the total
#expenditure per category.
import pandas as pd
# initialize list of lists
data =
[['CAR','Maruti',1000000],['AC','Hitachi',55000],['AIRCOLLER','Bajaj',12000],
['WASHING
MACHINE','LG',15000],['CAR','Ford',7000000],['AC','SAMSUNG',45000],['AIRCOLLER','Sy
mphony',20000],['WASHING MACHINE','Wirlpool',25000]]
Col=['itemcat','itemname','expenditure']
# Create the pandas DataFrame
qrtsales = pd.DataFrame(data,columns=Col)
# print dataframe.
print (qrtsales)
qs=qrtsales.groupby('itemcat')
print('Result after Filtering Dataframe')
print(qs['itemcat','expenditure'].sum())
#24. Create a data frame based on ecommerce data and generate descriptive
statistics # (mean, median,mode, quartile, and variance)
import pandas as pd
sales = {'InvoiceNo': [1001,1002,1903,1004,1085,1006,1007],
'ProductName': ['LCD','AC','Deodrant','leans','Books','Shoes','Jacket'],
'Quantity': [2,1,2,1,2,1,1],
'Price':[65000,55000,500,3000,958,3000,2200]}
df=pd.DataFrame(sales)
print(df)
print("Mean price of Item:", df['Price']. mean ().round (2))
print("Median price of Item:", df['Price']. median ().round (2))
print("Mode of price:\n", df[['Price']]. mode ())
print("Quartile of price:\n",df[['Price']].quantile([.1,.25,.5,.75],axis=0))
print("Variance of Price:\n",df[['Price']].var())
#25. Given the school result data, analyses the performance of the students on
#different parameters, e.g subject wise or class wise.
# x-axis is shows the subject and y -axis
# shows the markers in each subject
# import pandas and matplotlib
import pandas as pd
import matplotlib.pyplot as plt
# Simple Line Chart with setting of Label of X and Y axis,
# title for chart line and color of line
subject = ['Physic','Chemistry','Mathematics', 'Biology','Computer']
marks =[80,75,70,78,82]
# To draw line in red colour
plt.plot(subject,marks,'r',marker ='*')
# To Write Title of the Line Chart
plt.title('Marks Scored')
# To Put Label At Y Axis
plt.xlabel('SUBJECT')
# To Put Label At X Axis
plt.ylabel('MARKS')
plt.show()
#26. Write a program to plot a bar chart in python to display the result of a
school for five consecutive years.
import matplotlib.pyplot as pl
year=['2015','2016','2017','2018','2019'] # list of years
p=[98.50,70.25,55.20,90.5,61.50] #list of pass percentage
j=['b','g','r','m','c'] # color code of bar charts
pl.bar(year, p, width=0.2, color=j) # bar( ) function to create the bar chart
pl.xlabel("year") # label for x-axis
pl.ylabel("Pass%") # label for y-axis
pl.show( ) # function to display bar chart
#27. For the Data frames created above, analyze, and plot appropriate charts with #
#title and legend.
#• Number of Students against Scores in all the 7 subjects
#• Show the Highest score of each subject
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import csv
df = pd.read_csv("student_result.csv")
#Number of Students against Scores in all the 7 subjects
plt.hist([df['ENG'],df['HINDI'],df['MATHS'],df['SCIENCE'],df['SSC'],df['SANSK'],df[
'CA']],color=['red', 'yellow', 'blue','green','orange','black','pink'])
plt.title('Number of Students against Scores')
plt.xlabel('Score')
plt.ylabel('Number of Students')
plt.legend(['English', 'Hindi', 'Maths','Science','S.Sc.','Sanskrit','CA'])
plt.show()
# Show the Highest score of each subject.
y = ['ENGG','HINNDI','MATHS','SCIENCE','SSC','SANSK','CA']
width =
[df['ENG'].max(),df['HINDI'].max(),df['MATHS'].max(),df['SCIENCE'].max(),df['SSC'].
max(),df['SANSK'].max(),df['CA'].max()]
plt.figure(figsize = (12,2))
plt.barh(y = y, width = width)
plt.title('Average Scores')
plt.xlabel('Average Score')
plt.ylabel('Subjects')
for i,v in enumerate(width):
plt.text(v, i, " "+str(round(v,2)), color='blue', va='center',
fontweight='bold')
plt.show()
#28. For the Data frames created above, analyze, and plot appropriate charts with
title and legend.
# • Show the Average score of each subject
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import csv
df = pd.read_csv("student_result.csv")
# Show the Average score of each subject
y = ['ENGG','HINNDI','MATHS','SCIENCE','SSC','SANSK','CA']
width =
[df['ENG'].mean(),df['HINDI'].mean(),df['MATHS'].mean(),df['SCIENCE'].mean(),
df['SSC'].mean(),df['SANSK'].mean(),df['CA'].mean()]
plt.figure(figsize = (12,2))
plt.barh(y = y, width = width)
plt.title('Average Scores')
plt.xlabel('Average Score')
plt.ylabel('Subjects')
for i,v in enumerate(width):
plt.text(v, i, " "+str(round(v,2)), color='blue', va='center',
fontweight='bold')
plt.show()
#29. For the Data frames created above, analyze, and plot appropriate charts
# with title and legend.
# • Number of Females and Males
# • Average Percentage of Females and Males
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import csv
df = pd.read_csv("student_result.csv")
# Analyzing Scores based on Gender
df_gender = df.groupby('GENDER')
#Number of Females and Males
y = df_gender['GENDER'].count().keys()
width = df_gender['GENDER'].count()
plt.figure(figsize = (12,2))
plt.barh(y = y, width = width)
plt.title('No. of Females and Males')
plt.xlabel('Count')
plt.ylabel('Gender')
for i,v in enumerate(width):
plt.text(v, i, " "+str(v), color='blue', va='center', fontweight='bold')
plt.show()
#Average Percentage of Females and Males
y = df_gender['PERCENTAGE'].mean().keys()
width = df_gender['PERCENTAGE'].mean()
plt.figure(figsize = (12,2))
plt.barh(y = y,
width = width)
plt.title('Av Percentage of Female and Males')
plt.xlabel('Av. total Percentage ')
plt.ylabel('Gender')
for i,v in enumerate(width):
plt.text(v, i, " "+str(round(v,2)), color='blue', va='center',
fontweight='bold')
plt.show()
Source:-
https://www.learnpython4cbse.com/practical-file-programs-xii-ip
THANKS
https://vargaurav.wordpress.com/
Gaurav Varma
PGT Comp-Sc
7985749707
#30. Create a panda’s series from a dictionary of values and a ndarray.
To create a Pandas Series from a dictionary of values, use the pd.Series() function and pass the
dictionary as an argument. Here's an example:
import pandas as pd
# Define a dictionary
data = {'a': 1, 'b': 2, 'c': 3}
# Create a Pandas Series from the dictionary
s = pd.Series(data)
# Print the Series
print(s)
output-
a 1
b 2
c 3
dtype: int64
To create a Pandas Series from a NumPy ndarray, use the pd.Series() function and pass the
ndarray as an argument. Here's an example:
import numpy as np
import pandas as pd
# Define a NumPy ndarray
arr = np.array([1, 2, 3])
# Create a Pandas Series from the ndarray
s = pd.Series(arr)
# Print the Series
print(s)
output-
0 1
1 2
2 3
dtype: int64
#31. Given a Series, print all the elements that are above the 75th percentile.
To print all the elements that are above the 75th percentile of a Pandas Series, use the quantile()
method to calculate the 75th percentile and then use boolean indexing to select the elements above
this value. Here's an example:
import pandas as pd
import numpy as np
output-
4 1.262685
8 1.295014
11 0.998903
12 1.475878
13 0.824401
...
88 1.245179
91 0.864652
93 0.843725
94 1.347569
99 1.138407
Length: 25, dtype: float64
In this example, we first create a Pandas Series with 100 random values using NumPy's
random.randn() function. We then use the quantile() method to calculate the 75th percentile of
the Series. Next, we use boolean indexing to select the elements of the Series that are greater than
the 75th percentile and assign the result to the variable above_75th. Finally, we print the selected
elements.
#32. Create a Data Frame quarterly sales where each row contains the item category, item name, and
expenditure. Group the rows by the category and print the total expenditure per category.
To create a DataFrame of quarterly sales with item categories, item names, and expenditures, and
then group the rows by category and print the total expenditure per category, you can use the
groupby() method. Here's an example:
import pandas as pd
# Create a dictionary of quarterly sales data
data = {'category': ['A', 'A', 'B', 'B', 'C', 'C'],
'item': ['item1', 'item2', 'item3', 'item4', 'item5', 'item6'],
'Q1': [100, 200, 150, 250, 300, 350],
'Q2': [150, 250, 200, 300, 350, 400],
'Q3': [200, 300, 250, 350, 400, 450],
'Q4': [250, 350, 300, 400, 450, 500]}
df = pd.DataFrame(data) # Create a DataFrame from the dictionary
# Melt the DataFrame to reshape it into long format
df = pd.melt(df, id_vars=['category', 'item'], var_name='quarter', value_name='expenditure')
# Group the DataFrame by category and sum the expenditure
total_expenditure = df.groupby('category')['expenditure'].sum()
print(total_expenditure) # Print the total expenditure per category
output-
category
A 1000
B 1050
C 1500
Name: expenditure, dtype: int64
In this example, we first create a dictionary of quarterly sales data with item categories, item names,
and expenditures. We then create a DataFrame from the dictionary using the pd.DataFrame()
function. We reshape the DataFrame into long format using the pd.melt() function to make it
easier to aggregate by quarter. Next, we group the DataFrame by category using the groupby()
method and sum the expenditure for each category using the sum() method. Finally, we print the
total expenditure per category.
#33. Create a data frame for examination result and display row labels, column labels data types of each
column and the dimensions.
To create a DataFrame for examination results and display row labels, column labels, data types of
each column, and the dimensions, you can use the pd.DataFrame() function to create the
DataFrame and then use the DataFrame attributes to display the information you need. Here's an
example:
import pandas as pd
output-
Row Labels: [0, 1, 2, 3, 4]
Column Labels: ['Name', 'Maths', 'Science', 'English']
Data Types: Name object
Maths int64
Science int64
English int64
dtype: object
Dimensions: (5, 4)
In this example, we first create a dictionary of examination results with student names and their
scores in Maths, Science, and English. We then create a DataFrame from the dictionary using the
pd.DataFrame() function. We display the row labels using the index.tolist() attribute. We
display the column labels using the columns.tolist() attribute and the data types of each column
using the dtypes attribute. Finally, we display the dimensions of the DataFrame using the shape
attribute.
#34. Filter out rows based on different criteria such as duplicate rows.
To filter out rows based on different criteria such as duplicate rows, you can use the
drop_duplicates() method for removing duplicate rows or the query() method for filtering rows
based on a condition. Here's an example:
import pandas as pd
# Create a dictionary of examination results
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Emma', 'David'],
'Maths': [75, 80, 90, 85, 95, 85],
'Science': [80, 85, 95, 90, 100, 90],
'English': [70, 75, 85, 80, 90, 80]}
# Create a DataFrame from the dictionary
df = pd.DataFrame(data)
# Remove duplicate rows based on all columns
df_no_duplicates = df.drop_duplicates()
# Filter rows based on a condition
df_filtered = df.query('Maths >= 90')
# Print the original DataFrame, the DataFrame without duplicates, and the filtered DataFrame
print("Original DataFrame:")
print(df)
print("\nDataFrame without duplicates:")
print(df_no_duplicates)
print("\nFiltered DataFrame:")
print(df_filtered)
output-
Original DataFrame:
Name Maths Science English
0 Alice 75 80 70
1 Bob 80 85 75
2 Charlie 90 95 85
3 David 85 90 80
4 Emma 95 100 90
5 David 85 90 80
Filtered DataFrame:
Name Maths Science English
2 Charlie 90 95 85
4 Emma 95 100 90
In this example, we first create a dictionary of examination results with student names and their scores in Maths,
Science, and English. We then create a DataFrame from the dictionary using the pd.DataFrame() function. We
remove duplicate rows based on all columns using the drop_duplicates() method and assign the result to a
new DataFrame df_no_duplicates. We filter rows based on a condition (Maths score greater than or equal to
90) using the query() method and assign the result to a new DataFrame df_filtered. Finally, we print the
original DataFrame, the DataFrame without duplicates, and the filtered DataFrame to compare the differences.
#35. Importing and exporting data between pandas and CSV file.
Pandas provides built-in functions to import and export data between CSV files and DataFrames.
Here's how you can import and export data:
To import data from a CSV file, you can use the read_csv() function from pandas. Here's an
example:
import pandas as pd
# Import data from CSV file
df = pd.read_csv('data.csv')
# Display the DataFrame
print(df)
In this example, we first import the pandas library. We then use the read_csv() function to import
data from a CSV file named data.csv and assign it to a DataFrame called df. Finally, we print the
DataFrame using the print() function.
To export data from a DataFrame to a CSV file, you can use the to_csv() method. Here's an
example:
import pandas as pd
# Create a DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Emma'],
'Maths': [75, 80, 90, 85, 95],
'Science': [80, 85, 95, 90, 100],
'English': [70, 75, 85, 80, 90]}
df = pd.DataFrame(data)
# Export data to CSV file
df.to_csv('results.csv', index=False)
# Display a message confirming the export
print('Data exported to results.csv.')
MySQL
1. Create a student table with the student id, name, and marks as attributes
where the student id is the primary key.
CREATE TABLE student (
student_id INT PRIMARY KEY,
name VARCHAR(50),
marks FLOAT
);
In this example, we create a table named "student" with three attributes:
"student_id", "name", and "marks". The "student_id" attribute is defined as
an integer and set as the primary key using the PRIMARY KEY keyword. The
"name" attribute is defined as a varchar with a maximum length of 50
characters. The "marks" attribute is defined as a float.
2. Insert the details of a new student in the above table.
INSERT INTO student (student_id, name, marks)
VALUES (1, 'John Doe', 85.5);
In this example, we insert a new record into the "student" table with the
student ID of 1, name of "John Doe", and marks of 85.5.
insert multiple records at once by separating the values with commas and
enclosing them in parentheses, like this:
INSERT INTO student (student_id, name, marks)
VALUES (1, 'John Doe', 85.5), (2, 'Jane Smith', 92.3), (3, 'Bob Johnson',
78.9);
This will insert three records into the "student" table at once.
3. Delete the details of a student in the above table.
DELETE FROM student WHERE student_id = 1;
In this example, we use the DELETE statement to remove the record of the
student with a student ID of 1 from the "student" table.
You can modify the WHERE clause to specify which student record you want to
delete. For example, if you want to delete the record of the student with the
name "John Doe", you can use the following code:
DELETE FROM student WHERE name = 'John Doe';
Note that once a record is deleted from the table, it cannot be retrieved.
Therefore, it is important to make sure you have selected the correct record
to delete before executing the delete statement.
4. Use the select command to get the details of the students with marks more
than 80.
SELECT * FROM student WHERE marks > 80;
In this example, we use the SELECT statement to retrieve all the columns (*)
from the "student" table where the "marks" value is greater than 80.
You can modify the WHERE clause to filter the data based on different
conditions. For example, if you want to retrieve the details of students with
marks between 70 and 90, you can use the following code:
SELECT * FROM student WHERE marks BETWEEN 70 AND 90;
This will retrieve all the columns from the "student" table where the "marks"
value is between 70 and 90, inclusive.
5. Find the min, max, sum, and average of the marks in a student marks table.
SELECT MIN(marks) AS min_marks, MAX(marks) AS max_marks, SUM(marks) AS
sum_marks, AVG(marks) AS avg_marks FROM student;
In this example, we use the MIN, MAX, SUM, and AVG aggregate functions to
find the minimum, maximum, sum, and average of the "marks" column in the
"student" table. We also use the AS keyword to rename the columns in the
output.
Note that you can modify the SQL query to perform aggregate functions on a
subset of data using the WHERE clause. For example, if you want to find the
minimum, maximum, sum, and average of the marks for students with a "name"
starting with the letter "J", you can use the following code:
SELECT MIN(marks) AS min_marks, MAX(marks) AS max_marks, SUM(marks) AS
sum_marks, AVG(marks) AS avg_marks FROM student WHERE name LIKE 'J%';
This will find the minimum, maximum, sum, and average of the "marks" column
for all the students with a "name" starting with the letter "J".
6. Find the total number of customers from each country in the table (customer
ID, customer Name, country) using group by.
SELECT country, COUNT(*) as total_customers
FROM customers
GROUP BY country;
In this example, we use the GROUP BY clause to group the data by the
"country" column in the "customers" table. We then use the COUNT(*) aggregate
function to count the number of customers in each group. Finally, we use the
AS keyword to rename the output column to "total_customers".
This SQL query will give you the total number of customers from each country
in the "customers" table.
7. Write a SQL query to order the (student ID, marks) table in descending order
of the marks.
SELECT student_id, marks
FROM student
ORDER BY marks DESC;
In this example, we use the ORDER BY clause to sort the data in the "student"
table by the "marks" column in descending order. The DESC keyword is used to
specify the descending order.This SQL query will retrieve the "student_id"
and "marks" columns from the "student" table and sort the data in descending
order of the "marks" column. The result will show the student ID and their
marks, with the students who scored the highest marks appearing first in the
result set.
https://vargaurav.wordpress.com/
Gaurav Varma
PGT Comp-Sc
7985749707