Aayushi ML File
Aayushi ML File
PRACTICAL FILE
B. Tech.
Index
Sr. Titl Remarks
No. e
1 Calculating the basic statistical
parameters for a normally
distributed dataset.
2 Plotting a Normal Distribution
as a Scatter Plot.
3 Plotting a Normal Distribution
as a Histogram.
4 Plotting a Histogram of a
Uniform Distribution.
5 Predicting the House Prices of a
neighborhood using Liner
Regression.
6 Reading a csv file and plotting
subplots for the various
variables.
7 Predicting the average
Canadian Per Capita
Income
using Linear Regression.
8 Using a multivariate Linear
Regression model for
predicting
House Prices.
9 Predicting Salaries for possible
candidates for a position
using Linear Regression.
1 Training and Testing a
0 simple Logistic Regression
Model.
11 Saving the contents of a model
in a pickle file.
12 Performing K-Means Clustering
on a Dataset.
13 Plotting an ROC curve for a
prediction model.
14 Calculating the F1 Score for
a given dataset and results.
15 Classifying Objects in the Iris
Dataset.
1. Calculating the basic statistical
parameters for a normally
distributed dataset.
import statistics
import numpy as
np
Output:
2. Plotting a Normal Distribution as a
Scatter Plot.
import numpy as
np import
statistics
import matplotlib.pyplot as plt
plt.grid()
plt.scatter(x, x)
plt.text(0, 0.1, x_mean, fontsize=14,
transform=plt.gcf().transFigure)
plt.text(0, 0.05, x_mode, fontsize=14,
transform=plt.gcf().transFigure)
plt.text(0, 0, x_median, fontsize=14,
transform=plt.gcf().transFigure)
plt.subplots_adjust(bottom=0.25)
plt.show()
Output:
3. Plotting a Normal Distribution as a
Histogram.
import numpy as
np import
statistics
import matplotlib.pyplot as plt
plt.grid()
plt.hist(x)
plt.text(0, 0.1, x_mean, fontsize=14,
transform=plt.gcf().transFigure)
plt.text(0, 0.05, x_mode, fontsize=14,
transform=plt.gcf().transFigure)
plt.text(0, 0, x_median, fontsize=14,
transform=plt.gcf().transFigure)
plt.subplots_adjust(bottom=0.25)
plt.show()
Output:
4. Plotting a histogram of a Uniform
Distribution.
import numpy as
np import
statistics
import matplotlib.pyplot as plt
plt.grid()
plt.hist(x)
plt.text(0, 0.1, x_mean, fontsize=14,
transform=plt.gcf().transFigure)
plt.text(0, 0.05, x_mode, fontsize=14,
transform=plt.gcf().transFigure)
plt.text(0, 0, x_median, fontsize=14,
transform=plt.gcf().transFigure)
plt.subplots_adjust(bottom=0.25)
plt.show()
Output:
5. Predicting the House Prices of a
neighborhood using Liner
Regression.
import numpy as np
import matplotlib.pyplot as plt
from sklearn import
linear_model
# print(data)
reg = linear_model.LinearRegression()
reg.fit(x, y)
plt.grid()
plt.plot(x, y)
plt.show()
Output:
6. Reading a csv file and plotting
subplots for the various variables.
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
from sklearn import linear_model
df = pd.read_csv("HR_comma_sep.csv")
print(df.head())
plt.subplot(3, 3, 1)
plt.scatter(df.left, df.satisfaction_level)
plt.subplot(3, 3, 2)
plt.scatter(df.left, df.last_evaluation)
plt.subplot(3, 3, 3)
plt.scatter(df.left, df.number_project)
plt.subplot(3, 3, 4)
plt.scatter(df.left, df.average_monthly_hours)
plt.subplot(3, 3, 5)
plt.scatter(df.left, df.time_spend_company)
plt.subplot(3, 3, 6)
plt.scatter(df.left, df.Work_accident)
plt.subplot(3, 3, 7)
plt.scatter(df.left, df.promotion_last_5years)
plt.show()
Output:
7. Predicting the average Canadian Per
Capita Income using Linear
Regression.
import pandas as
pd import numpy as
np
import matplotlib.pyplot as plt
from sklearn import
linear_model
from sklearn.impute import SimpleImputer
df = pd.read_csv("canada_per_capita_income.csv")
Output:
8. Using a multivariate Linear
Regression model for predicting
House Prices.
import pandas as
pd import numpy as
np
from sklearn import linear_model
df = pd.read_csv("homeprices.csv")
print(df)
df.bedrooms = df.bedrooms.fillna(df.bedrooms.median())
reg = linear_model.LinearRegression()
reg.fit(df.drop("price", axis="columns"), df.price)
cf = reg.coef_
icept = reg.intercept_
print("Coefficients:\n", cf)
print("Intercept:\n", icept)
Output:
9. Predicting Salaries for possible
candidates for a position using
Linear Regression.
import pandas as
pd import numpy as
np
from sklearn import
linear_model df =
pd.read_csv("hiring.csv")
'''list_of_columns = list(df.columns)
print("Columns:\n",
list_of_columns)'''
df.test_score = df.test_score.fillna(df.test_score.median())
df.experience = df.experience.fillna(df.experience.median())
print(df)
reg = linear_model.LinearRegression()
reg.fit(df.drop("salary", axis="columns"),
df.salary)
cf = reg.coef_
icept = reg.intercept_
print("Coefficients:\n", cf)
print("Intercept:\n", icept)
plt.scatter(age, ins_bgt)
plt.show()
X_train, X_test, y_train, y_test = train_test_split(age, ins_bgt,
train_size=0.8)
model = LogisticRegression()
model.fit(X_train, y_train)
print("X_train:\n", X_train)
print("y_train:\n", y_train)
print("X_test:\n", X_test)
print("y_test:\n", y_test)
y_predicted = model.predict(X_test)
print("Prediction Probability:\n", model.predict_proba(X_test))
print("Score:\n", model.score(X_test, y_test))
print("y_predicted:\n", y_predicted)
print("X_test:\n", X_test)
print("Coefficients:\n", model.coef_)
print("Intercept:\n",
model.intercept_)
Output:
11. Saving the contents of a model in a
pickle file.
import numpy as np
import matplotlib.pyplot as
plt import os
import cv2
import random
import pickle
DATADIR = r"C:/Documents/LabWork"
CATEGORIES = ["Me", "Anyone Other than
me"]
training_data = []
def create_training_data():
for category in CATEGORIES:
path = os.path.join(DATADIR, category
class_num = CATEGORIES.index(category)
for img in os.listdir(path):
img_array = cv2.imread(os.path.join(path, img),
cv2.IMREAD_GRAYSCALE)
training_data.append([img_array, class_num])
create_training_data()
print(len(training_data))
random.shuffle(training_data)
for sample in training_data:
print(sample[1])
X = []
y = []
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import
KMeans
# The Data
X = -2 * np.random.rand(100, 2)
X1 = 1 + 2 * np.random.rand(50, 2)
X[50:100, :] = X1
# Using K-Means
# Taking in "K" as 2
Kmean = KMeans(n_clusters=2)
Kmean.fit(X)
# Testing
sample_test = np.array([-3.0, -3.0])
second_test = sample_test.reshape(1, -1)
test_prediction = Kmean.predict(second_test)
print(test_prediction)
Output:
data = pd.read_csv("default.csv")
X = data[['student', 'balance', 'income']]
y = data['default']
X_train, X_test, y_train, y_test = train_test_split(X, y,
train_size=0.8)
model = LogisticRegression()
model.fit(X_train, y_train)
print("X_train:\n", X_train)
print("y_train:\n", y_train)
print("X_test:\n", X_test)
print("y_test:\n", y_test)
y_predicted = model.predict(X_test)
print("Prediction Probability:\n", model.predict_proba(X_test))
print("Score:\n", model.score(X_test, y_test))
print("y_predicted:\n", y_predicted)
print("X_test:\n", X_test)
print("Coefficients:\n", model.coef_)
print("Intercept:\n",
model.intercept_)
y_pred_proba = model.predict_proba(X_test)[::, 1]
fpr, tpr, _ = metrics.roc_curve(y_test,
y_pred_proba) auc = metrics.roc_auc_score(y_test,
y_pred_proba)
Output:
15. Classifying Objects in the Iris
Dataset.
import pandas as pd
import numpy as np
import seaborn as
sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from pandas.plotting import parallel_coordinates
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn import metrics
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import
LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
data = pd.read_csv("iris_data.csv")
n_bins = 10
fig, axs = plt.subplots(2, 2)
axs[0, 0].hist(train['sepal_length'], bins=n_bins)
axs[0, 0].set_title('Sepal Length')
axs[0, 1].hist(train['sepal_width'], bins=n_bins)
axs[0, 1].set_title('Sepal Width')
axs[1, 0].hist(train['petal_length'], bins=n_bins)
axs[1, 0].set_title('Petal Length')
axs[1, 1].hist(train['petal_width'], bins=n_bins)
axs[1, 1].set_title('Petal Width')
fig.tight_layout(pad=1.0)
plt.show()
corrmat = train.corr()
sns.heatmap(corrmat, annot=True, square=True)
plt.show()
X_train = train[['sepal_length', 'sepal_width', 'petal_length',
'petal_width']]
y_train = train.species
X_test = test[['sepal_length', 'sepal_width', 'petal_length',
'petal_width']]
y_test = test.species
plt.figure(figsize=(10, 8))
plot_tree(mod_dt, feature_names=fn, class_names=cn, filled=True)
plt.show()
Output: