PW4 python solution
PW4 python solution
Output:
Q2.
#Question 2
# Summary statistics for numerical columns
print(data.describe())
Output:
Q3.
#Question 3
# Unique species
print(data['species'].unique())
Output:
Q4.
#Question 4
# Check for missing values
print(data.isnull().sum())
Output:
Q5.
#Question 5
# Add a new column for sepal area
data['sepal_area'] = data['sepal length (cm)'] * data['sepal width (cm)']
print(data.head())
Output:
Q6.
#Question 6
# Filter rows for setosa and virginica species
filtered_data = data[data['species'].isin(['setosa', 'virginica'])]
print(len(filtered_data))
Output:
100
Q7.
#Question 7
# Mean petal length for each species
mean_petal_length = data.groupby('species')['petal length (cm)'].mean()
print(mean_petal_length)
Output:
species
setosa 1.462
versicolor 4.260
virginica 5.552
Name: petal length (cm), dtype: float64
Q8.
#Question 8
# Scatter plot for sepal length vs sepal width
import matplotlib.pyplot as plt
plt.figure(figsize=(8, 6))
for species in data['species'].unique():
subset = data[data['species'] == species]
plt.scatter(subset['sepal length (cm)'], subset['sepal width (cm)'],
label=species)
Output:
Q9.
#Question 9
# Box plot for petal length across species
data.boxplot(column='petal length (cm)', by='species', figsize=(8, 6))
plt.title('Petal Length by Species')
plt.suptitle('') # Remove the default title
plt.xlabel('Species')
plt.ylabel('Petal Length (cm)')
plt.show()
Output:
Q10.
#Question 10
# Histogram of sepal area
plt.hist(data['sepal_area'], bins=10, color='skyblue', edgecolor='black')
plt.title('Distribution of Sepal Area')
plt.xlabel('Sepal Area (cm²)')
plt.ylabel('Frequency')
plt.show()
Output:
Q11.
#Question 11
# Pair plot for numerical features
from pandas.plotting import scatter_matrix
Output: