Indexml Merged
Indexml Merged
Program:-
import matplotlib.pyplot as plt
import numpy as np
unemployment_rates = [2.4, 2.8, 2.9, 3.0, 3.4, 3.6, 3.6, 3.9, 4.1, 4.4, 4.6, 4.6, 4.7, 4.7, 4.8, 5.4,5.5, 5.6, 5.9, 5.9,
6.0, 6.0, 6.3, 6.3, 6.4, 6.8, 6.8, 6.9, 7.0, 7.1, 7.1, 7.1,7.2, 7.2, 7.5, 7.5, 7.5, 7.6, 7.6, 7.6, 7.7, 7.8, 8.0, 8.1, 8.3, 8.4,
8.8, 9.1,9.5, 9.6, 9.7, 10.3, 10.4, 10.6, 11.0, 11.2, 11.3, 11.4, 12.0]
# Create a Histogram
plt.ylabel('Frequency')
plt.show()
plt.ylabel('Frequency')
plt.show()
# Create an Ogive
cdf = np.cumsum(counts)
plt.plot(bins[1:], cdf)
plt.ylabel('Cumulative Frequency')
plt.show()
OUTPUT :-
Practical 2
Write a Python Program to generate some random data. Plot Box plots using this
random data.
Program:-
import numpy as np
np.random.seed(42)
data1 = np.random.randn(100)
data2 = np.random.randn(150) * 2 + 10
plt.xlabel('Data Set')
plt.ylabel('Value')
plt.show()
OUTPUT :-
Practical 3
Write Python Program to generate data from Binomial distribution and show the pdf
plot for various of n and p.
Program:-
import numpy as np
return pmf
for i in range(len(n_values)):
for j in range(len(p_values)):
n = n_values[i]
p = p_values[j]
x = np.arange(0, n + 1)
result = binomial_dist(n, p, x)
axs[i, j].set_ylabel('Probability')
plt.show()
OUTPUT:-
Practical 4
Write Python Program to generate Poisson distribution and show the pdf
Program:-
import numpy as np
import math
# lam : lambda
u = []
z = []
for i in range(len(x)):
u.append(math.factorial(i))
z.append(lam**i)
prob_density = wz * np.exp(-lam)
return prob_density
lam = 1
x = np.arange(0, 40, 1)
axs[0, 0].set_title('lam=1')
axs[0, 0].set_xlabel('x')
axs[0, 0].set_ylabel('Probability')
lam = 2
axs[0, 1].set_xlabel('x')
axs[0, 1].set_ylabel('Probability')
lam = 5
axs[0, 2].set_title('lam=5')
axs[0, 2].set_xlabel('x')
axs[0, 2].set_ylabel('Probability')
lam = 10
axs[1, 0].set_title('lam=10')
axs[1, 0].set_xlabel('x')
axs[1, 0].set_ylabel('Probability')
lam = 15
axs[1, 1].set_title('lam=15')
axs[1, 1].set_xlabel('x')
axs[1, 1].set_ylabel('Probability')
lam = 20
axs[1, 2].set_xlabel('x')
axs[1, 2].set_ylabel('Probability')
plt.show()
OUTPUT:-
Practical 5
Write Python Program to generate data from Normal distribution and show the pdf
plot for various of µ, σ.
Program :-
import numpy as np
return prob_density
# Create subplots
# Generate and plot the normal distribution PDF for each combination of mean and standard deviation
for j, sd in enumerate(sds):
axs[i, j].plot(x, y)
axs[i, j].grid(True)
for ax in axs.flat:
# Hide x labels and tick labels for top plots and y ticks for right plots.
for ax in axs.flat:
ax.label_outer()
plt.tight_layout()
plt.show()
OUTPUT:-
Practical 6
Write Python Program to generate exponential distribution and show the pdf plot for
various values of Lambda.
Program:-
import numpy as np
return prob_density
# Generate and plot the exponential distribution for various values of Lambda
axs[i].scatter(x, result)
axs[i].set_title(f'λ={lam}')
axs[i].set_xlabel('x')
axs[i].set_ylabel('Probability Density')
fig.tight_layout()
plt.show()
plot_exp_dist(lambdas, x_range)
OUTPUT:-
Practical 7
Write a Python Program to import and export data using Pandas. Demonstrate various
data pre-processing techniques.
Program:-
import pandas as pd
import numpy as np
data = pd.read_csv(‘Salary_Data1.csv')
print("Initial Data:")
print(data.head())
data.to_csv('exported_data.csv', index=False)
numeric_columns = data.select_dtypes(include=[np.number]).columns
data['Age'] = data['Age'].fillna(data['Age'].mean())
data['EstimatedSalary'] = data['EstimatedSalary'].fillna(data['EstimatedSalary'].mean())
# 2. Feature scaling
# Min-Max Scaling
# 4. Removing duplicates
data.drop_duplicates(inplace=True)
# 6. One-hot encoding
# Assuming there is a categorical column named 'Gender'
print("\nPre-processed Data:")
print(data.head())
OUTPUT:-
Practical 8
Write a Python Program to implement Simple and Multiple Linear Regression.
Program:-
import numpy as np
# Sample data
x = np.array([1, 2, 3, 4, 5])
y = np.array([2, 4, 5, 4, 5])
b = np.mean(y) - m*np.mean(x)
# Predict values
y_pred = m*x + b
plt.scatter(x, y)
plt.xlabel('x')
plt.ylabel('y')
plt.show()
OUTPUT:-
Multiple Linear Regression
Program:-
import numpy as np
# Sample data
model = LinearRegression()
model.fit(X, y)
# Make predictions
y_pred = model.predict(X)
print("Coefficients:", model.coef_)
print("Intercept:", model.intercept_)
plt.scatter(X[:, 1], y)
plt.xlabel('x1')
plt.ylabel('y')
plt.show()
OUTPUT
Practical 09
Write a Python Program to implement Logistic Regression on a given dataset.
Program:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.colors import ListedColormap
for i, j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
c=np.array([ListedColormap(('red', 'green'))(i)]), label=j)
plt.title(title)
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()
# Load dataset
dataset = pd.read_csv('Program_09.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = split_dataset(X, y, test_size=0.25, random_state=0)
# Feature Scaling
X_train, mean_train, std_train = scale_features(X_train)
X_test = (X_test - mean_train) / std_train
# Logistic Regression
classifier = LogisticRegression()
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
random.seed(0)
train_df, test_df = train_test_split(df, test_size=20)
# Visualization
sns.FacetGrid(df, hue="label", height=5, aspect=1.5).map(plt.scatter, "sepal_width", "sepal_length").add_legend()
plt.show()
# Descriptive statistics
statistics = ['min', 'max', 'mean', 'median', 'std']
df.groupby('label').agg({
'sepal_length': statistics,
'sepal_width': statistics,
'petal_length': statistics,
'petal_width': statistics
}).round(2).T
def get_potential_splits(data):
potential_splits = {}
_, n_columns = data.shape
for column_index in range(n_columns - 1):
values = data[:, column_index]
unique_values = np.unique(values)
potential_splits[column_index] = [(unique_values[i] + unique_values[i-1]) / 2 for i in range(1, len(unique_values))]
return potential_splits
def calculate_entropy(data):
label_column = data[:, -1]
_, counts = np.unique(label_column, return_counts=True)
probabilities = counts / counts.sum()
return sum(probabilities * -np.log2(probabilities))
else:
counter += 1
potential_splits = get_potential_splits(data)
split_column, split_value = determine_best_split(data, potential_splits)
data_below, data_above = split_data(data, split_column, split_value)
feature_name = COLUMN_HEADERS[split_column]
question = "{} <= {}".format(feature_name, split_value)
sub_tree = {question: []}
if yes_answer == no_answer:
return yes_answer
else:
sub_tree[question].append(yes_answer)
sub_tree[question].append(no_answer)
return sub_tree
for _ in range(max_iters):
# Assign each data point to the nearest centroid
labels = np.argmin(np.linalg.norm(X[:, np.newaxis] - centroids, axis=-1), axis=-1)
# Update centroids
new_centroids = np.array([X[labels == i].mean(axis=0) for i in range(k)])
# Check convergence
if np.all(centroids == new_centroids):
break
centroids = new_centroids
Output:
Experiment 12
Write a program for ANOVA test.
Program:
import numpy as np
def one_way_anova(*args):
# Number of groups
k = len(args)
# Total number of observations
N = sum(len(group) for group in args)
# Group means
group_means = [np.mean(group) for group in args]
# Overall mean
grand_mean = np.mean([item for group in args for item in group])
# F statistic
F = MSB / MSW
# Example usage:
# Data for three groups
group1 = [6, 8, 4, 5, 3, 4]
group2 = [8, 12, 9, 11, 6, 8]
group3 = [13, 9, 11, 8, 7, 12]
return {
'SS_total': SS_total,
'SS_factor_A': SS_factor_A,
'SS_factor_B': SS_factor_B,
'SS_error': SS_error,
'df_total': df_total,
'df_factor_A': df_factor_A,
'df_factor_B': df_factor_B,
'df_error': df_error,
'MS_factor_A': MS_factor_A,
'MS_factor_B': MS_factor_B,
'MS_error': MS_error,
'F_factor_A': F_factor_A,
'F_factor_B': F_factor_B
}
# Example data for two-way ANOVA
data = [
[[3, 2, 1], [4, 5, 6]],
[[5, 6, 7], [8, 9, 10]],
[[7, 8, 9], [10, 11, 12]]
]
print("\nTwo-way ANOVA")
# Perform two-way ANOVA
results = two_way_anova(data)
for key, value in results.items():
print(f"{key}: {value}")
Output:
Practical 13
Write a program for z-testing and t-testing.
Program:
#Program 13
# Import the necessary libraries
import numpy as np
import scipy.stats as stats
print("A school claimed that the students who study there are more intelligent than the average school. "
"On calculating the IQ scores of 50 students, the average turns out to be 110. "
"The mean of the population IQ is 100 and the standard deviation is 15. "
"State whether the claim of the principal is right or not at a 5% significance level.")
# Given information
sample_mean = 110
population_mean = 100
population_std = 15
sample_size = 50
alpha = 0.05
# Critical Z-Score
z_critical = stats.norm.ppf(1 - alpha)
print('Critical Z-Score:', z_critical)
# Hypothesis
if z_score > z_critical:
print("Reject Null Hypothesis")
else:
print("Fail to Reject Null Hypothesis")
print('P-value:', p_value)
# Hypothesis
if p_value < alpha:
print("Reject Null Hypothesis")
else:
print("Fail to Reject Null Hypothesis")
print("\nTwo-sampled z-test:\n")
print("There are two groups of students preparing for a competition: Group A and Group B. Group A has studied offline classes, "
"while Group B has studied online classes. After the examination, the score of each student comes. "
"Now we want to determine whether the online or offline classes are better.\n"
"Group A: Sample size = 50, Sample mean = 75, Sample standard deviation = 10\n"
"Group B: Sample size = 60, Sample mean = 80, Sample standard deviation = 12\n"
"Assuming a 5% significance level, perform a two-sample z-test to determine if there is a significant difference between the
online and offline classes.")
# Sample
sample_A = np.array([1, 2, 4, 4, 5, 5, 6, 7, 8, 8])
sample_B = np.array([1, 2, 2, 3, 3, 4, 5, 6, 7, 7])
# Decision
print('With T-value:')
if np.abs(t_statistic) > critical_t:
print('There is significant difference between two groups')
else:
print('No significant difference found between two groups')
print('With P-value:')
if p_value > alpha:
print('No evidence to reject the null hypothesis of a significant difference between the two groups')
else:
print('Evidence found to reject the null hypothesis of a significant difference between the two groups')
# Decision
print('With T-value:')
if np.abs(t_statistic) > critical_t:
print('There is significant difference between math1 and math2')
else:
print('No significant difference found between math1 and math2')
print('With P-value:')
if p_value > alpha:
print('No evidence to reject the null hypothesis of a significant difference between math1 and math2')
else:
print('Evidence found to reject the null hypothesis of a significant difference between math1 and math2')
# Decision
print('With T-value:')
if t_statistic > critical_t:
print("There is a significant difference in weight before and after the camp. The fitness camp had an effect.")
else:
print("There is no significant difference in weight before and after the camp. The fitness camp did not have a significant
effect.")
print('With P-value:')
if p_value > alpha:
print("There is a significant difference in weight before and after the camp. The fitness camp had an effect.")
else:
print("There is no significant difference in weight before and after the camp. The fitness camp did not have a significant
effect.")
Output: