0% found this document useful (0 votes)
23 views

MLRecord

.

Uploaded by

ranadheer5221
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
23 views

MLRecord

.

Uploaded by

ranadheer5221
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 24

Aim: Introduction to Python Libraries- NumPy, Pandas, Matplotlib, Scikit

Code:
MatplotLibIntro.py
import matplotlib.pyplot as plt

plt.plot([2, 4, 6, 4])
plt.ylabel("Numbers")
plt.xlabel("Indices")
plt.title("MyPlot")
plt.show()

plt.plot([1, 2, 3, 4], [1, 4, 9, 16])


plt.ylabel("Squares")
plt.xlabel("Numbers")
plt.grid()
plt.show()

plt.plot([1, 2, 3, 4], [1, 4, 9, 16], 'ro')


plt.ylabel("Squares")
plt.xlabel("Numbers")
plt.grid()
plt.show()

plt.plot([2.5, 1.5, 1.5, 2, 2.5, 3, 3.5, 3.5, 2.5], [0, 4, 8, 8, 6, 8, 8, 4, 0])


plt.fill_between([2.5, 1.5, 1.5, 2, 2.5, 3, 3.5, 3.5, 2.5], [0, 4, 8, 8, 6, 8, 8, 4, 0], color = 'b')

MatplotLibIntro2.py
import matplotlib.pyplot as plt
import numpy as np

t = np.arange(0., 5., 0.2)

#rs = red square, b-- = blue dash, g^ = green triangle


plt.plot(t, t ** 2, 'b--', label = '^2')
plt.plot(t, t ** 2.2, 'rs', label = '^2.2')
plt.plot(t, t ** 2.5, 'g^', label = '^2.5')
plt.grid()
plt.legend()
plt.show()
x = [1, 2, 3, 4]
y = [1, 4, 9, 16]
plt.plot(x, y, linewidth = 5.0)
plt.show()

x1 = [1, 2, 3, 4]
y1 = [1, 4, 9, 16]
x2 = [1, 2, 3, 4]
y2 = [2, 4, 6, 8]
lines = plt.plot(x1, y1, x2, y2)
#use keyword args
plt.setp(lines[0], color = 'r', linewidth = 2.0)
#or MATLAb style string value pairs
plt.setp(lines[1], 'color', 'g', 'linewidth', 2.0)
plt.grid()

MatplotLibIntro3.py
import matplotlib.pyplot as plt
import numpy as np

def f(t):
return np.exp(-t) * np.cos(2 * np.pi * t)

t1 = np.arange(0.0, 5.0, 0.1)


t2 = np.arange(0.0, 5.0, 0.02)

plt.figure(1)

plt.subplot(211)
plt.grid()
plt.plot(t1, f(t1), 'b-')

plt.subplot(212)
plt.grid()
plt.plot(t2, np.cos(2 * np.pi * t2), 'r--')

MatplotLibIntro4.py
import matplotlib.pyplot as plt
import numpy as np

plt.figure(1)
plt.subplot(211)
plt.plot([1, 2, 3])
plt.subplot(212)
plt.plot([4, 5, 6])

plt.figure(2)
plt.plot([4, 5, 6])

plt.figure(1)
plt.subplot(211)
plt.title('Easy as 1, 2, 3')
plt.show()

PandasIntro.py
import pandas as pd
df1 = pd.read_csv("C:/Users/Admin/Desktop/20071A0504/ML/nyc_weather.csv")
print(df1)

#identifying maximum temperature


print(df1['Temperature'].max())

#to know which day it rains


print(df1['EST'][df1['Events'] == 'Rain'])

# average wind speed


print(df1['WindSpeedMPH'].mean())

df = pd.read_csv("C:/Users/Admin/Desktop/20071A0504/ML/weather_data.csv")
print(df)

#Save dataframe to CSV


df.to_csv('new.csv')
df.to_csv('new_noIndex.csv', index = False)
df.to_excel('new.xlsx', sheet_name='weather_data')

PandasIntro2.py
import pandas as pd

df = pd.read_csv("C:/Users/Admin/Desktop/20071A0504/ML/weather_data_cities.csv")
g = df.groupby('city')
print(g)
for city, city_df in g:
print(city)
print(city_df)
print('-------------------------------------------------------------------------------')
print(g.get_group('paris'))
print('-------------------------------------------------------------------------------')
print(g.max())
print('-------------------------------------------------------------------------------')
print(g.mean())
print('-------------------------------------------------------------------------------')
print(g.describe())
print('-------------------------------------------------------------------------------')

PandasIntro3.py
# Concatenating different dataframes
import pandas as pd

india_weather = pd.DataFrame({
"city": ["mumbai", "delhi", "bangalore"],
"temperature": [32, 45, 30],
"humidity": [80, 60, 78]
})

us_weather = pd.DataFrame({
"city": ["new york", "chicago", "orlando"],
"temperature": [21, 15, 35],
"humidity": [68, 65, 75]
})

df = pd.concat([india_weather, us_weather], ignore_index = True)


print(df)

df = pd.concat([india_weather, us_weather], axis = 1)


print(df)

PandasIntro4.py
import pandas as pd

temperature_df = pd.DataFrame({
"city": ["mumbai", "delhi", "bangalore", "hyderabad"],
"temperature": [32, 45, 30, 40]
})

humidity_df = pd.DataFrame({
"city": ["mumbai", "delhi", "bangalore", "hyderabad", "vijayawada"],
"humidity": [68, 65, 75, 72, 80]
})

df = pd.merge(temperature_df, humidity_df, on = 'city')


print(df)

df = pd.merge(temperature_df, humidity_df, on = 'city', how = 'outer')


print(df)

df = pd.merge(temperature_df, humidity_df, on = 'city', how = 'inner')


print(df)
Output
Aim: To perform Exploratory Data Analysis and Haberman Exploration

Code:
ExploratoryDataAnalysis.py
#Classify new flower into one of the available 3 classes

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

iris = pd.read_csv("iris.csv")
# print(iris)
print(iris.shape)
print(iris.columns)
print(iris["variety"].value_counts()) #Hence this is a balanced data set

# 2D Scatter Plot

iris.plot(kind ='scatter', x = 'sepal.length', y = 'sepal.width')


plt.show()
# Here, we cannot identify which dot indiactes which class, hence we choose seaborn

sns.set_style("whitegrid")
sns.FacetGrid(iris, hue="variety", size=4).map(plt.scatter, "sepal.length", "sepal.width").add_legend()
plt.show()
#hue denotes the column on which we want to apply colors

#Pair Plot
plt.close()
sns.set_style("whitegrid")
sns.pairplot(iris, hue="variety", size=3)
plt.show()

#Histogram, PDF, CDF


iris_setosa = iris.loc[iris["variety"] == "Setosa"]
iris_virginica = iris.loc[iris["variety"] == "Virginica"]
iris_versicolor = iris.loc[iris["variety"] == "Versicolor"]

plt.plot(iris_setosa["petal.length"], np.zeros_like(iris_setosa['petal.length']), 'o')


plt.plot(iris_versicolor["petal.length"], np.zeros_like(iris_versicolor['petal.length']),'o')
plt.plot(iris_virginica["petal.length"], np.zeros_like(iris_virginica['petal.length']),'o')
plt.show()

sns.FacetGrid(iris, hue="variety", size=5).map(sns.distplot, "petal.length").add_legend()


plt.show()

HabermanExploration.py
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

haberman = pd.read_csv("haberman.csv", sep=',', names=['Age', 'Year', 'positive_axillary',


'survival_status'])
# haberman.columns = ['Age', 'Year', 'positive_axillary', 'survival_status']
# print(haberman)

print(haberman.shape)
print(haberman.columns)
print(haberman["survival_status"].value_counts())

haberman.plot(kind ='scatter', x = 'Age', y = 'survival_status')


plt.show()

sns.set_style("whitegrid")
sns.FacetGrid(haberman, hue="survival_status", size=4).map(plt.scatter, "Age", "Year").add_legend()
plt.show()

Output:
Aim: To perform Feature Engineering and Feature Selection Methods.

Code:
PDFandCDF.py
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

iris = pd.read_csv("iris.csv")
iris_setosa = iris.loc[iris["variety"] == "Setosa"]
counts, bin_edges = np.histogram(iris_setosa['petal.length'], bins=10, density=True)
pdf = counts / (sum(counts))
print(pdf)
print(bin_edges)
cdf = np.cumsum(pdf)
plt.plot(bin_edges[1:], pdf)
plt.plot(bin_edges[1:],cdf)

counts, bin_edges = np.histogram(iris_setosa['petal.length'], bins=20, density=True)


pdf = counts / (sum(counts))
plt.plot(bin_edges[1:], pdf)

plt.show()

PDFandCDF2.py
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

iris = pd.read_csv("iris.csv")
iris_setosa = iris.loc[iris["variety"] == "Setosa"]
counts, bin_edges = np.histogram(iris_setosa['petal.length'], bins=10, density=True)
pdf = counts / (sum(counts))
print(pdf)
print(bin_edges)
cdf = np.cumsum(pdf)
plt.plot(bin_edges[1:], pdf)
plt.plot(bin_edges[1:],cdf)
iris_virginica = iris.loc[iris["variety"] == "Virginica"]
counts, bin_edges = np.histogram(iris_virginica['petal.length'], bins=10, density=True)
pdf = counts / (sum(counts))
print(pdf)
print(bin_edges)
cdf = np.cumsum(pdf)
plt.plot(bin_edges[1:], pdf)
plt.plot(bin_edges[1:],cdf)

iris_versicolor = iris.loc[iris["variety"] == "Versicolor"]


counts, bin_edges = np.histogram(iris_versicolor['petal.length'], bins=10, density=True)
pdf = counts / (sum(counts))
print(pdf)
print(bin_edges)
cdf = np.cumsum(pdf)
plt.plot(bin_edges[1:], pdf)
plt.plot(bin_edges[1:],cdf)

Output:
Aim: To implement regularised Linear regression

Code:
LinearRegression.py
import numpy
import matplotlib.pyplot as plot
import pandas
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

dataset = pandas.read_csv('salaryData.csv')
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 1].values

xTrain, xTest, yTrain, yTest = train_test_split(x, y, test_size = 1/3, random_state=0)


linearRegressor = LinearRegression()
linearRegressor.fit(xTrain, yTrain)

yPrediction = linearRegressor.predict(xTest)
plot.scatter(xTrain, yTrain, color='red')
plot.plot(xTrain, linearRegressor.predict(xTrain), color = 'blue')
plot.title('Salary vs Experience(Training set)')
plot.xlabel('Years of Experience')
plot.ylabel('Salary')
plot.show()

###########################################################################
plot.scatter(xTest, yTest, color='red')
plot.plot(xTrain, linearRegressor.predict(xTrain), color='blue')
plot.title('Salary vs Experience(Test set)')
plot.xlabel('Years of Experience')
plot.ylabel('Salary')
plot.show()

LinearRegression2.py
import pandas as pd
import numpy as np
import math
import operator
import matplotlib.pyplot as plt
data = pd.read_csv('headbrain.csv')

X = data['Head Size(cm^3)'].values
Y = data['Brain Weight(grams)'].values

mean_x = np.mean(X)
mean_y = np.mean(Y)

m = len(X)

numer = 0
denom = 0
for i in range(m):
numer += (X[i] - mean_x) * (Y[i] - mean_y)
denom += (X[i] - mean_x) ** 2
m = numer / denom
c = mean_y - (m * mean_x)

print (f'm = {m} \nc = {c}')

max_x = np.max(X) + 100


min_x = np.min(Y) - 100

x = np.linspace (min_x, max_x, 100)


y=c+m*x

ss_t = 0
ss_r = 0

val_count = len(X)
for i in range(int(val_count)):
y_pred = c + m * X[i]
ss_t += (Y[i] - mean_y) ** 2
ss_r += (Y[i] - y_pred) ** 2
r2 = 1 - (ss_r/ss_t)

print(r2)

plt.plot(x, y, color='#58b970', label='Regression Line')


plt.scatter(X, Y, c='#ef5423', label='data points')

plt.xlabel('Head Size in cm')


plt.ylabel('Brain Weight in grams')
plt.legend()
plt.show()
Output:
Aim: To build models using Decision trees.

Code:
DecisionTree.py
# Load libraries
import pandas
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
from sklearn import metrics

pima = pd.read_csv("C:/Users/admin/Downloads/diabetes.csv")
pima

X=pima.drop('Outcome',axis=1)
X

y=pima['Outcome']
y

features = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI',


'DiabetesPedigreeFunction', 'Age']

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=1)
dtree = DecisionTreeClassifier()
dtree = dtree.fit(X, y)
tree.plot_tree(dtree, feature_names=features)
y_pred=dtree.predict(X_test)
#print(dtree.predict([[40, 10, 7, 1,6,8]]))
print("Accuracy:",metrics.accuracy_score(y_test,y_pred))

Output:
Aim: To implement K-NN algorithm to classify a dataset

Code:
KNN.py
#### KNN Algorithm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import make_blobs


from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

X, y = make_blobs(n_samples = 500, n_features = 2, centers = 4,cluster_std = 1.5, random_state = 4)

plt.style.use('seaborn')
plt.figure(figsize = (10,10))
plt.scatter(X[:,0], X[:,1], c=y, marker= '*',s=100,edgecolors='black')
plt.show()

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 0)

knn5 = KNeighborsClassifier(n_neighbors = 5)
knn1 = KNeighborsClassifier(n_neighbors=1)

knn5.fit(X_train, y_train)
knn1.fit(X_train, y_train)

y_pred_5 = knn5.predict(X_test)
y_pred_1 = knn1.predict(X_test)

from sklearn.metrics import accuracy_score


print("Accuracy with k=5", accuracy_score(y_test, y_pred_5)*100)
print("Accuracy with k=1", accuracy_score(y_test, y_pred_1)*100)

plt.figure(figsize = (15,5))
plt.subplot(1,2,1)
plt.scatter(X_test[:,0], X_test[:,1], c=y_pred_5, marker= '*', s=100,edgecolors='black')
plt.title("Predicted values with k=5", fontsize=20)

plt.subplot(1,2,2)
plt.scatter(X_test[:,0], X_test[:,1], c=y_pred_1, marker= '*', s=100,edgecolors='black')
plt.title("Predicted values with k=1", fontsize=20)
plt.show()

Output:

You might also like