MLRecord
MLRecord
Code:
MatplotLibIntro.py
import matplotlib.pyplot as plt
plt.plot([2, 4, 6, 4])
plt.ylabel("Numbers")
plt.xlabel("Indices")
plt.title("MyPlot")
plt.show()
MatplotLibIntro2.py
import matplotlib.pyplot as plt
import numpy as np
x1 = [1, 2, 3, 4]
y1 = [1, 4, 9, 16]
x2 = [1, 2, 3, 4]
y2 = [2, 4, 6, 8]
lines = plt.plot(x1, y1, x2, y2)
#use keyword args
plt.setp(lines[0], color = 'r', linewidth = 2.0)
#or MATLAb style string value pairs
plt.setp(lines[1], 'color', 'g', 'linewidth', 2.0)
plt.grid()
MatplotLibIntro3.py
import matplotlib.pyplot as plt
import numpy as np
def f(t):
return np.exp(-t) * np.cos(2 * np.pi * t)
plt.figure(1)
plt.subplot(211)
plt.grid()
plt.plot(t1, f(t1), 'b-')
plt.subplot(212)
plt.grid()
plt.plot(t2, np.cos(2 * np.pi * t2), 'r--')
MatplotLibIntro4.py
import matplotlib.pyplot as plt
import numpy as np
plt.figure(1)
plt.subplot(211)
plt.plot([1, 2, 3])
plt.subplot(212)
plt.plot([4, 5, 6])
plt.figure(2)
plt.plot([4, 5, 6])
plt.figure(1)
plt.subplot(211)
plt.title('Easy as 1, 2, 3')
plt.show()
PandasIntro.py
import pandas as pd
df1 = pd.read_csv("C:/Users/Admin/Desktop/20071A0504/ML/nyc_weather.csv")
print(df1)
df = pd.read_csv("C:/Users/Admin/Desktop/20071A0504/ML/weather_data.csv")
print(df)
PandasIntro2.py
import pandas as pd
df = pd.read_csv("C:/Users/Admin/Desktop/20071A0504/ML/weather_data_cities.csv")
g = df.groupby('city')
print(g)
for city, city_df in g:
print(city)
print(city_df)
print('-------------------------------------------------------------------------------')
print(g.get_group('paris'))
print('-------------------------------------------------------------------------------')
print(g.max())
print('-------------------------------------------------------------------------------')
print(g.mean())
print('-------------------------------------------------------------------------------')
print(g.describe())
print('-------------------------------------------------------------------------------')
PandasIntro3.py
# Concatenating different dataframes
import pandas as pd
india_weather = pd.DataFrame({
"city": ["mumbai", "delhi", "bangalore"],
"temperature": [32, 45, 30],
"humidity": [80, 60, 78]
})
us_weather = pd.DataFrame({
"city": ["new york", "chicago", "orlando"],
"temperature": [21, 15, 35],
"humidity": [68, 65, 75]
})
PandasIntro4.py
import pandas as pd
temperature_df = pd.DataFrame({
"city": ["mumbai", "delhi", "bangalore", "hyderabad"],
"temperature": [32, 45, 30, 40]
})
humidity_df = pd.DataFrame({
"city": ["mumbai", "delhi", "bangalore", "hyderabad", "vijayawada"],
"humidity": [68, 65, 75, 72, 80]
})
Code:
ExploratoryDataAnalysis.py
#Classify new flower into one of the available 3 classes
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
iris = pd.read_csv("iris.csv")
# print(iris)
print(iris.shape)
print(iris.columns)
print(iris["variety"].value_counts()) #Hence this is a balanced data set
# 2D Scatter Plot
sns.set_style("whitegrid")
sns.FacetGrid(iris, hue="variety", size=4).map(plt.scatter, "sepal.length", "sepal.width").add_legend()
plt.show()
#hue denotes the column on which we want to apply colors
#Pair Plot
plt.close()
sns.set_style("whitegrid")
sns.pairplot(iris, hue="variety", size=3)
plt.show()
HabermanExploration.py
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
print(haberman.shape)
print(haberman.columns)
print(haberman["survival_status"].value_counts())
sns.set_style("whitegrid")
sns.FacetGrid(haberman, hue="survival_status", size=4).map(plt.scatter, "Age", "Year").add_legend()
plt.show()
Output:
Aim: To perform Feature Engineering and Feature Selection Methods.
Code:
PDFandCDF.py
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
iris = pd.read_csv("iris.csv")
iris_setosa = iris.loc[iris["variety"] == "Setosa"]
counts, bin_edges = np.histogram(iris_setosa['petal.length'], bins=10, density=True)
pdf = counts / (sum(counts))
print(pdf)
print(bin_edges)
cdf = np.cumsum(pdf)
plt.plot(bin_edges[1:], pdf)
plt.plot(bin_edges[1:],cdf)
plt.show()
PDFandCDF2.py
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
iris = pd.read_csv("iris.csv")
iris_setosa = iris.loc[iris["variety"] == "Setosa"]
counts, bin_edges = np.histogram(iris_setosa['petal.length'], bins=10, density=True)
pdf = counts / (sum(counts))
print(pdf)
print(bin_edges)
cdf = np.cumsum(pdf)
plt.plot(bin_edges[1:], pdf)
plt.plot(bin_edges[1:],cdf)
iris_virginica = iris.loc[iris["variety"] == "Virginica"]
counts, bin_edges = np.histogram(iris_virginica['petal.length'], bins=10, density=True)
pdf = counts / (sum(counts))
print(pdf)
print(bin_edges)
cdf = np.cumsum(pdf)
plt.plot(bin_edges[1:], pdf)
plt.plot(bin_edges[1:],cdf)
Output:
Aim: To implement regularised Linear regression
Code:
LinearRegression.py
import numpy
import matplotlib.pyplot as plot
import pandas
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
dataset = pandas.read_csv('salaryData.csv')
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 1].values
yPrediction = linearRegressor.predict(xTest)
plot.scatter(xTrain, yTrain, color='red')
plot.plot(xTrain, linearRegressor.predict(xTrain), color = 'blue')
plot.title('Salary vs Experience(Training set)')
plot.xlabel('Years of Experience')
plot.ylabel('Salary')
plot.show()
###########################################################################
plot.scatter(xTest, yTest, color='red')
plot.plot(xTrain, linearRegressor.predict(xTrain), color='blue')
plot.title('Salary vs Experience(Test set)')
plot.xlabel('Years of Experience')
plot.ylabel('Salary')
plot.show()
LinearRegression2.py
import pandas as pd
import numpy as np
import math
import operator
import matplotlib.pyplot as plt
data = pd.read_csv('headbrain.csv')
X = data['Head Size(cm^3)'].values
Y = data['Brain Weight(grams)'].values
mean_x = np.mean(X)
mean_y = np.mean(Y)
m = len(X)
numer = 0
denom = 0
for i in range(m):
numer += (X[i] - mean_x) * (Y[i] - mean_y)
denom += (X[i] - mean_x) ** 2
m = numer / denom
c = mean_y - (m * mean_x)
ss_t = 0
ss_r = 0
val_count = len(X)
for i in range(int(val_count)):
y_pred = c + m * X[i]
ss_t += (Y[i] - mean_y) ** 2
ss_r += (Y[i] - y_pred) ** 2
r2 = 1 - (ss_r/ss_t)
print(r2)
Code:
DecisionTree.py
# Load libraries
import pandas
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
from sklearn import metrics
pima = pd.read_csv("C:/Users/admin/Downloads/diabetes.csv")
pima
X=pima.drop('Outcome',axis=1)
X
y=pima['Outcome']
y
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=1)
dtree = DecisionTreeClassifier()
dtree = dtree.fit(X, y)
tree.plot_tree(dtree, feature_names=features)
y_pred=dtree.predict(X_test)
#print(dtree.predict([[40, 10, 7, 1,6,8]]))
print("Accuracy:",metrics.accuracy_score(y_test,y_pred))
Output:
Aim: To implement K-NN algorithm to classify a dataset
Code:
KNN.py
#### KNN Algorithm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('seaborn')
plt.figure(figsize = (10,10))
plt.scatter(X[:,0], X[:,1], c=y, marker= '*',s=100,edgecolors='black')
plt.show()
knn5 = KNeighborsClassifier(n_neighbors = 5)
knn1 = KNeighborsClassifier(n_neighbors=1)
knn5.fit(X_train, y_train)
knn1.fit(X_train, y_train)
y_pred_5 = knn5.predict(X_test)
y_pred_1 = knn1.predict(X_test)
plt.figure(figsize = (15,5))
plt.subplot(1,2,1)
plt.scatter(X_test[:,0], X_test[:,1], c=y_pred_5, marker= '*', s=100,edgecolors='black')
plt.title("Predicted values with k=5", fontsize=20)
plt.subplot(1,2,2)
plt.scatter(X_test[:,0], X_test[:,1], c=y_pred_1, marker= '*', s=100,edgecolors='black')
plt.title("Predicted values with k=1", fontsize=20)
plt.show()
Output: