0% found this document useful (0 votes)
4 views

PW4 python solution

Uploaded by

Aya
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
4 views

PW4 python solution

Uploaded by

Aya
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 6

Ministry of Higher Education and Scientific Research

University of Oum El Bouaghi


Faculty of Exact Sciences and Natural and Life Sciences
Department of Mathematics and Computer Science

Advanced Python Language


1st Master - AI & Data Science

Practical Work 04 Solutions (Part 1)


Q1.
import pandas as pd
from sklearn.datasets import load_iris
#Question 1
# Load the Iris dataset
iris = load_iris()
# Convert to DataFrame
data = pd.DataFrame(data=iris.data, columns=iris.feature_names)
data['species'] = iris.target_names[iris.target]
print(data.head())
print(len(data))

Output:

Q2.
#Question 2
# Summary statistics for numerical columns
print(data.describe())

Output:
Q3.
#Question 3
# Unique species
print(data['species'].unique())

Output:

['setosa' 'versicolor' 'virginica']

Q4.
#Question 4
# Check for missing values
print(data.isnull().sum())

# Fill missing values for numeric columns only (if any)


numeric_cols = data.select_dtypes(include=['float64', 'int64']).columns
data[numeric_cols] = data[numeric_cols].fillna(data[numeric_cols].mean())

Output:

sepal length (cm) 0


sepal width (cm) 0
petal length (cm) 0
petal width (cm) 0
species 0
dtype: int64

Q5.
#Question 5
# Add a new column for sepal area
data['sepal_area'] = data['sepal length (cm)'] * data['sepal width (cm)']
print(data.head())

Output:

Q6.
#Question 6
# Filter rows for setosa and virginica species
filtered_data = data[data['species'].isin(['setosa', 'virginica'])]
print(len(filtered_data))
Output:
100
Q7.
#Question 7
# Mean petal length for each species
mean_petal_length = data.groupby('species')['petal length (cm)'].mean()
print(mean_petal_length)
Output:
species
setosa 1.462
versicolor 4.260
virginica 5.552
Name: petal length (cm), dtype: float64

Q8.
#Question 8
# Scatter plot for sepal length vs sepal width
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))
for species in data['species'].unique():
subset = data[data['species'] == species]
plt.scatter(subset['sepal length (cm)'], subset['sepal width (cm)'],
label=species)

plt.title('Sepal Length vs Sepal Width by Species')


plt.xlabel('Sepal Length (cm)')
plt.ylabel('Sepal Width (cm)')
plt.legend()
plt.show()

Output:
Q9.
#Question 9
# Box plot for petal length across species
data.boxplot(column='petal length (cm)', by='species', figsize=(8, 6))
plt.title('Petal Length by Species')
plt.suptitle('') # Remove the default title
plt.xlabel('Species')
plt.ylabel('Petal Length (cm)')
plt.show()

Output:

Q10.

#Question 10
# Histogram of sepal area
plt.hist(data['sepal_area'], bins=10, color='skyblue', edgecolor='black')
plt.title('Distribution of Sepal Area')
plt.xlabel('Sepal Area (cm²)')
plt.ylabel('Frequency')
plt.show()

Output:
Q11.
#Question 11
# Pair plot for numerical features
from pandas.plotting import scatter_matrix

scatter_matrix(data.iloc[:, :4], figsize=(10, 10), alpha=0.7, diagonal='hist',


color='b')
plt.suptitle('Pairwise Relationships')
plt.show()

Output:

You might also like