Data Mining & Data Science Practical Slips
Data Mining & Data Science Practical Slips
Q.2 Write a python program the Categorical values in numeric format for a
given dataset.
[15]
Solution:-
import pandas as pd
from sklearn.preprocessing import LabelEncoder
# Sample dataset
1
data = {
'Category': ['A', 'B', 'A', 'C', 'B', 'A']
}
# Creating a DataFrame
df = pd.DataFrame(data)
# Initialize the LabelEncoder
label_encoder = LabelEncoder()
# Apply label encoding to the 'Category' column
df ['Category_encoded'] = label_encoder.fit_transform(df
['Category'])
print(df)
Slip 2
Q.1 Consider the student data set It can be downloaded from:
https://drive.google.com/open?id=1oakZCv7g3mlmCSdv9J8kdSaqO5_6dIOw
Write a programme in python to apply simple linear regression and find out
mean
absolute error, mean squared error and root mean squared error.
[15]
Solution:-
import numpy as nm
import pandas as pd
2
data_set= pd.read_csv('student_scores.csv')
print(data_set)
y = data_set['Scores'].values.reshape(-1, 1)
X = data_set['Hours'].values.reshape(-1, 1)
print(X)
print(y)
print(X.shape)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size =
0.2)
print(X_train)
print(X_test)
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)
print(regressor.intercept_)
print(regressor.coef_)
score = regressor.predict([[9.5]])
print(score)
y_pred = regressor.predict(X_test)
print(y_pred)
from sklearn.metrics import mean_absolute_error,
mean_squared_error
3
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = nm.sqrt(mse)
print(mae)
print(mse)
print(rmse)
print('Actual',y_test)
print('Predicted',y_pred)
Q.2 Write a R program to reverse a number and also calculate the sum of
digits of that
number. [15]
Solution:-
x = as.integer(readline("Enter any number:- "))
temp=x
rev=0
while(temp>0)
{
rem = temp%%10
rev=(rev*10)+rem
temp=floor(temp/10)
}
4
cat("Reverse of number is ",rev)
sum=0
while(x>0)
{
rem = x%%10
sum=sum+rem
x=floor(x/10)
}
cat("Sum of digits of the number is ",sum)
Slip 3
Q.1 Write a python program the Categorical values in numeric format for a
given dataset.
[15]
Solution:-
import pandas as pd
from sklearn.preprocessing import LabelEncoder
# Sample dataset
data = {
'Category': ['A', 'B', 'A', 'C', 'B', 'A']
}
# Creating a DataFrame
df = pd.DataFrame(data)
5
# Initialize the LabelEncoder
label_encoder = LabelEncoder()
# Apply label encoding to the 'Category' column
df ['Category_encoded'] = label_encoder.fit_transform(df
['Category'])
print(df)
Q.2 Write a R program to create a data frame using two given vectors and
display the
duplicate elements [15]
Solution:-
vector1 <- c(1,2,3,4,5,6,7,8,6,4)
vector2 <- c(1, 'B', 'C', 'D', 'E', 'D', 'F', 'G',2,3)
data<-data.frame(vector1,vector2)
duplicates =
data[duplicated(data$vector1)|duplicated(data$vector1,fromLast
=TRUE),]
cat("Original Data Frame:\n")
print(data)
cat("\nDuplicate Elements:\n")
print(duplicates)
Slip 4
6
Q.1 Write a R program to calculate the multiplication table using a function.
[15]
Solution:-
for (i in 1:10) {
cat(i*n,"\n")
}
multiplication_table(n)
Solution:-
weather=['sunny','sunny','overcast','rainy','rainy','rainy','overcast','sunny'
,'sunny','rainy','su
nny','overcast','overcast','rainy']
temp=['hot','hot','hot','mild','cool','cool','cool','mild','cool','mild','mild','mi
ld','hot','mild']
play=['No','No','Yes','Yes','Yes','No','Yes','No','Yes','Yes','Yes','Yes','Yes',
'No']
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
wheather_encoded = le.fit_transform(weather)
print(wheather_encoded)
temp_encoded = le.fit_transform(temp)
label = le.fit_transform(play)
print("Temp:",temp_encoded)
print("Play:",label)
features = list(zip(wheather_encoded,temp_encoded))
print(features)
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(features,label)
predicted = model.predict([[0,2]])
print("Predicted Value:",predicted)
Slip 5
Q.1 Write a python program to find all null values in a given data set
and remove them.
(Download dataset from github.com)
[15]
8
Solution:-
/* For this copy and paste diabetes dataset in same folder (not in
jupyter folder) , delete 2 or 3
values where 0 is written (means now it becomes null values) ,
rename it as diabetes_null_values and
then copy and paste in ur jupyter folder */
import pandas as pd
# Load the dataset
df =pd.read_csv('diabetes_null_values.csv')
print(df)
# Display the number of null values in each column
null_counts = df.isnull().sum()
print("Null value counts:\n", null_counts)
# Remove rows with any null values
df_cleaned = df.dropna()
# Display the cleaned dataset
print("\nCleaned dataset:\n", df_cleaned)
Q.2 Consider the student data set It can be downloaded from:
https://drive.google.com/open?id=1oakZCv7g3mlmCSdv9J8kdSaqO5_6dIOw
Write a programme in python to apply simple linear regression and find out
mean
absolute error, mean squared error and root mean squared error.
[15]
Solution:-
import numpy as nm
9
import pandas as pd
data_set= pd.read_csv('student_scores.csv')
print(data_set)
y = data_set['Scores'].values.reshape(-1, 1)
X = data_set['Hours'].values.reshape(-1, 1)
print(X)
print(y)
print(X.shape)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size =
0.2)
print(X_train)
print(X_test)
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)
print(regressor.intercept_)
print(regressor.coef_)
score = regressor.predict([[9.5]])
print(score)
y_pred = regressor.predict(X_test)
print(y_pred)
10
from sklearn.metrics import mean_absolute_error,
mean_squared_error
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = nm.sqrt(mse)
print(mae)
print(mse)
print(rmse)
print('Actual',y_test)
print('Predicted',y_pred)
Slip 6
Q.1 Write a python program to splitting the dataset into training and
testing set. [15]
Solution:-
(
// numpy for mathematical operations
// pandas to use .csv or .xl file, or to import column from dataset
// Scikit-Learn, also known as sklearn is a python library to
implement machine learning models
and statistical modelling. Through scikit-learn, we can implement
various machine learning
11
models for regression, classification, clustering, and statistical
tools for analyzing these models.
// The encode() function in Python is responsible for returning the
encoded form of any given
string
// The fit_transform () method is used to fit the data into a model
and transform it into a form
that is more suitable for the model in a single step.
//: means all row, : -1 means excluding last column
)
Solution:
import numpy as np
import pandas as pd
dataset = pd.read_csv("play_tennis.csv")
dataset
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
dataset['outlook'] = le.fit_transform(dataset.outlook)
dataset['temp'] = le.fit_transform(dataset.temp)
dataset['humidity'] = le.fit_transform(dataset.humidity)
dataset['wind'] = le.fit_transform(dataset.wind)
dataset['play'] = le.fit_transform(dataset.play)
x=dataset.iloc[:,:-1].values
12
print(x)
y=dataset.iloc[:,4].values
print(y)
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test=train_test_split(x,y,test_size=0.2)
print(x_train)
print(x_test)
Solution:-
14
Solution:-
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
#Load the diabetes dataset (downloaded from the provided URL)
#dataset_url = 'https://raw.githubusercontent.com/uciml/pima-
indians-
#diabetes-database/master/diabetes.csv'
df=pd.read_csv("diabetes.csv")
# Split features (X) and target (y)
X = df.drop('Outcome', axis=1)
y = df['Outcome']
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2,
random_state=42)
# Create and train the Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)
# Make predictions on the test set
y_pred = clf.predict(X_test)
# Calculate accuracy
15
accuracy = accuracy_score(y_test, y_pred)
print ("Accuracy:", accuracy)
Slip 8
Q.1 Write a python program to splitting the dataset into training and
testing set. [15]
Solution:--
(
// numpy for mathematical operations
// pandas to use .csv or .xl file, or to import column from dataset
// Scikit-Learn, also known as sklearn is a python library to
implement machine learning models
and statistical modelling. Through scikit-learn, we can implement
various machine learning
models for regression, classification, clustering, and statistical
tools for analyzing these models.
// The encode() function in Python is responsible for returning the
encoded form of any given
string
// The fit_transform () method is used to fit the data into a model
and transform it into a form
that is more suitable for the model in a single step.
//: means all row, : -1 means excluding last column
16
)
Solution:
import numpy as np
import pandas as pd
dataset = pd.read_csv("play_tennis.csv")
dataset
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
dataset['outlook'] = le.fit_transform(dataset.outlook)
dataset['temp'] = le.fit_transform(dataset.temp)
dataset['humidity'] = le.fit_transform(dataset.humidity)
dataset['wind'] = le.fit_transform(dataset.wind)
dataset['play'] = le.fit_transform(dataset.play)
x=dataset.iloc[:,:-1].values
print(x)
y=dataset.iloc[:,4].values
print(y)
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test=train_test_split(x,y,test_size=0.2)
print(x_train)
print(x_test)
Q.2 Consider following dataset
weather=['Sunny','Sunny','Overcast','Rainy','Rainy','Rainy','Overcast','S
17
unny','Sunny','Rainy','Sunny','Overcast','Overcast','Rainy']
temp=['Hot','Hot','Hot','Mild','Cool','Cool','Cool','Mild','Cool','Mild','Mi
ld','Mild','Hot','Mild']
play=['No','No','Yes','Yes','Yes','No','Yes','No','Yes','Yes','Yes','Yes','Y
es','No']. Use Naïve Bayes algorithm to predict[ 0:Overcast, 2:Mild]
tuple belongs to which class whether to play the sports or not.
[15]
Solution:-
weather=['sunny','sunny','overcast','rainy','rainy','rainy','overcas
t','sunny','sunny','rainy','su
nny','overcast','overcast','rainy']
temp=['hot','hot','hot','mild','cool','cool','cool','mild','cool','mild',
'mild','mild','hot','mild']
play=['No','No','Yes','Yes','Yes','No','Yes','No','Yes','Yes','Yes','
Yes','Yes','No']
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
wheather_encoded = le.fit_transform(weather)
print(wheather_encoded)
temp_encoded = le.fit_transform(temp)
label = le.fit_transform(play)
print("Temp:",temp_encoded)
print("Play:",label)
features = list(zip(wheather_encoded,temp_encoded))
print(features)
from sklearn.naive_bayes import GaussianNB
18
model = GaussianNB()
model.fit(features,label)
predicted = model.predict([[0,2]])
print("Predicted Value:",predicted)
Slip 9
Q.1 Write a R program to reverse a number and also calculate the sum of
digits of that
number. [15]
Solution:-
x = as.integer(readline("Enter any number:- "))
temp=x
rev=0
while(temp>0)
{
rem = temp%%10
rev=(rev*10)+rem
temp=floor(temp/10)
}
cat("Reverse of number is ",rev)
sum=0
19
while(x>0)
{
rem = x%%10
sum=sum+rem
x=floor(x/10)
}
cat("Sum of digits of the number is ",sum)
20
dataset
transactions=[]
for i in range(0, 150): transactions.append([str(dataset.values[i,j])
for j in range(0,5)])
from apyori import apriori
rules= apriori(transactions= transactions, min_support=0.003,
min_confidence = 0.2,
min_lift=3, min_length=2, max_length=2)
results= list(rules)
results
for item in results:pair = item[0] , item = [x for x in pair]
print("Rule: " + item[0] + " -> " + item[1])
print("Support: " + str(item[1]))
print("Confidence: " + str(item[2][0][2]))
print("Lift: " + str(item[2][0][3]))
print("=====================================")
Slip 10
Solution:-
vector1 <- c(1,2,3,4,5,6,7,8,6,4)
vector2 <- c(1, 'B', 'C', 'D', 'E', 'D', 'F', 'G',2,3)
data<-data.frame(vector1,vector2)
duplicates =
data[duplicated(data$vector1)|duplicated(data$vector1,fromLast
=TRUE),]
cat("Original Data Frame:\n")
print(data)
22
cat("\nDuplicate Elements:\n")
print(duplicates)
Slip 11
Q.1 Write a R program to reverse a number and also calculate the sum
of digits of that
number. [15]
Solution:-
x = as.integer(readline("Enter any number:- "))
temp=x
rev=0
while(temp>0)
{
rem = temp%%10
rev=(rev*10)+rem
temp=floor(temp/10)
}
cat("Reverse of number is ",rev)
sum=0
while(x>0)
{
rem = x%%10
23
sum=sum+rem
x=floor(x/10)
}
cat("Sum of digits of the number is ",sum)
24
print('Slope:- ', model.coef_)
Slip 12
25
print(predictedCO2)
Q.2 Write a R program to calculate the sum of two matrices of given
size. [15]
Solution:-
# Define a function to calculate the sum of two matrices
matrix_sum <- function(matrix1, matrix2) {
if (dim(matrix1) != dim(matrix2)) {
stop("Matrices must have the same dimensions for addition.")
}
26
matrix1 <- matrix(nrow = n_rows, ncol = n_cols)
for (i in 1:n_rows) {
for (j in 1:n_cols) {
matrix1[i, j] <- as.integer(readline(paste("Enter element at
position [", i, ",", j, "]: ")))
}
}
27
print(result)
Slip 13
Q.1 Write a python programme to implement multiple linear regression model
for stock
market data frame as follows:
Stock_Market = {'Year':
[2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2016,2016,
20
16,2016,2016,2016,2016,2016,2016,2016,2016,2016],
'Month': [12, 11,10,9,8,7,6,5,4,3,2,1,12,11,10,9,8,7,6,5,4,3,2,1],
'Interest_Rate':
[2.75,2.5,2.5,2.5,2.5,2.5,2.5,2.25,2.25,2.25,2,2,2,1.75,1.75,1.75,1.75,1.75,1.75,1
.7
5,1.75,1.75,1.75,1.75],
'Unemployment_Rate':
[5.3,5.3,5.3,5.3,5.4,5.6,5.5,5.5,5.5,5.6,5.7,5.9,6,5.9,5.8,6.1,6.2,6.1,6.1,6.1,5.9,6.
2,6
.2,6.1],
'Stock_Index_Price':
[1464,1394,1357,1293,1256,1254,1234,1195,1159,1167,1130,1075,1047,965,9
43,
958,971,949,884,866,876,822,704,719] }
And draw a graph of stock market price verses interest rate.
[15]
Solution:-
import pandas as pd
from sklearn import linear_model
data = {'year':
28
[2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2
016,2016,2016,201
6,2016,2016,2016,2016,2016,2016,2016,2016],
'month': [12,11,10,9,8,7,6,5,4,3,2,1,12,11,10,9,8,7,6,5,4,3,2,1],
'interest_rate':
[2.75,2.5,2.5,2.5,2.5,2.5,2.5,2.25,2.25,2.25,2,2,2,1.75,1.75,1.75,1.75,1
.75,1.75,1.75,1.75,
1.75,1.75,1.75],
'unemployment_rate':
[5.3,5.3,5.3,5.3,5.4,5.6,5.5,5.5,5.5,5.6,5.7,5.9,6,5.9,5.8,6.1,6.2,6.1,6.1,
6.1,5.9,6.2,6.2,6.1],
'index_price':
[1464,1394,1357,1293,1256,1254,1234,1195,1159,1167,1130,1075,1
047,965,943,958,97
1,949,884,866,876,822,704,719] }
df = pd.DataFrame(data)
print(df)
x = df[['interest_rate','unemployment_rate']]
print(x)
y = df['index_price']
print(y)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size =
0.2)
29
print(X_train)
print(X_test)
regr = linear_model.LinearRegression()
regr.fit(X_train, y_train)
print('Intercept: \n', regr.intercept_)
print('Coefficients: \n', regr.coef_)
y_pred=regr.predict(X_test)
print(y_pred)
from sklearn.metrics import r2_score
Accuracy=r2_score(y_test,y_pred)*100
print(Accuracy)
import matplotlib.pyplot as plt
plt.scatter(y_test,y_pred);
plt.xlabel('Actual');
plt.ylabel('Predicted');
import seaborn as sns
sns.regplot(x=y_test,y=y_pred,ci=None,color ='red');
Q.2 Write a R program to concatenate two given factors.
[15]
Solution:-
data1 <- c("ABC","PQR","XYZ")
data2 <- c(1,2,3)
30
factor1<-factor(data1)
factor2<-factor(data2)
print(factor1)
print(factor2)
concatinated<-c(factor1,factor2)
print(concatinated)
Slip 14
Q.1 Write a script in R to create a list of employees and perform the following:
a. Display names of employees in the list.
b. Add an employee at the end of the list.
c. Remove the third element of the list.
[15]
Solution:-
Employee<-data.frame(
eno=c(1,2,3),
ename=c("Pratik","Rohan","Tushar"),
sal=c(10000,20000,30000)
)
print(Employee)
new_data<-rbind(Employee,c(4,"XYZ",2000))
print(new_data)
data<-new_data[-3,]
31
print(data)
32
Q.1 Write a R program to add, multiply and divide two vectors of integer
type. (vector
length should be minimum 4)
[15]
Solution: -
vector1<-c(1,2,3,4,5)
vector2<-c(6,7,8,9,10)
Addition<- vector1+vector2
print(Addition)
Multiplication<-vector1*vector2
print(Multiplication)
Division<-vector1/vector2
print(Division)
Q.2 Write a Python program build Decision Tree Classifier using Scikit-learn
package for diabetes data set (download database from
https://www.kaggle.com/uciml/pima-indians-diabetes-database)
[15]
Solution:-
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
#Load the diabetes dataset (downloaded from the provided URL)
#dataset_url = 'https://raw.githubusercontent.com/uciml/pima-
indians-
33
#diabetes-database/master/diabetes.csv'
df=pd.read_csv("diabetes.csv")
# Split features (X) and target (y)
X = df.drop('Outcome', axis=1)
y = df['Outcome']
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2,
random_state=42)
# Create and train the Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)
# Make predictions on the test set
y_pred = clf.predict(X_test)
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print ("Accuracy:", accuracy)
Slip 16
Q.1 Write a python program to implement multiple Linear Regression model
for a car
dataset. Dataset can be downloaded from:
https://www.w3schools.com/python/python_ml_multiple_regression.asp
[15]
34
/**** From the above link, copy data of car into excel file, save it
by .xls and then convert it into
.csv ***/
Solution:
import pandas
from sklearn import linear_model
df = pandas.read_csv("car.csv")
print(df)
X = df[['Weight', 'Volume']]
print(X)
y = df['CO2']
print(y)
regr = linear_model.LinearRegression()
regr.fit(X, y)
predictedCO2 = regr.predict([[2300, 1300]])
print(predictedCO2)
Q.2 Write a script in R to create a list of employees and perform the following:
a. Display names of employees in the list.
b. Add an employee at the end of the list.
c. Remove the third element of the list.
[15]
Solution:-
Employee<-data.frame(
eno=c(1,2,3),
35
ename=c("Pratik","Rohan","Tushar"),
sal=c(10000,20000,30000)
)
print(Employee)
new_data<-rbind(Employee,c(4,"XYZ",2000))
print(new_data)
data<-new_data[-3,]
print(data)
Slip 17
[15]
/* Write all the coding in Single ….. in Jupyter */
Solution :
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
data = make_blobs(n_samples=300, n_features=2, centers=5,
cluster_std=1.8,random_state=101)
data[0].shape
data[1]
36
plt.scatter(data[0][:,0],data[0][:,1],c=data[1],cmap='brg')
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=5)
kmeans.fit(data[0])
kmeans.cluster_centers_
kmeans.labels_f, (ax1, ax2) = plt.subplots(1, 2,
sharey=True,figsize=(10,6))
ax1.set_title('K Means')
ax1.scatter(data[0][:,0],data[0][:,1],c=kmeans.labels_,cmap='brg')
ax2.set_title("Original")
ax2.scatter(data[0][:,0],data[0][:,1],c=data[1],cmap='brg')
Q.2 Write a R program to sort a list of strings in ascending and descending
order.
[15]
Solution:-
list<-c("apple","banana","Pineapple","mango","Orange")
asc<-sort(list)
print(asc)
desc<-sort(list,decreasing = TRUE)
print(desc)
Slip 18
37
Q.1 Write a R program to reverse a number and also calculate the sum
of digits of that
number. [15]
Solution:-
x = as.integer(readline("Enter any number:- "))
temp=x
rev=0
while(temp>0)
{
rem = temp%%10
rev=(rev*10)+rem
temp=floor(temp/10)
}
cat("Reverse of number is ",rev)
sum=0
while(x>0)
{
rem = x%%10
sum=sum+rem
x=floor(x/10)
}
cat("Sum of digits of the number is ",sum)
38
Q.2 Write a python program to implement hierarchical Agglomerative
clustering algorithm. (Download Customer.csv dataset from github.com).
[15]
Solution :
import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd
dataset = pd.read_csv('Mall_Customers.csv')
x = dataset.iloc[:, [3, 4]].values
import scipy.cluster.hierarchy as shc
dendro = shc.dendrogram(shc.linkage(x, method="ward"))
mtp.title("Dendrogrma Plot")
mtp.ylabel("Euclidean Distances")
mtp.xlabel("Customers")
mtp.show()
from sklearn.cluster import AgglomerativeClustering
hc= AgglomerativeClustering(n_clusters=5, affinity='euclidean',
linkage='ward')
y_pred= hc.fit_predict(x)
mtp.scatter(x[y_pred == 0, 0], x[y_pred == 0, 1], s = 100, c =
'blue', label = 'Cluster 1')
mtp.scatter(x[y_pred == 1, 0], x[y_pred == 1, 1], s = 100, c =
'green', label = 'Cluster 2')
39
mtp.scatter(x[y_pred== 2, 0], x[y_pred == 2, 1], s = 100, c = 'red',
label = 'Cluster 3')
mtp.scatter(x[y_pred == 3, 0], x[y_pred == 3, 1], s = 100, c =
'cyan', label = 'Cluster 4')
mtp.scatter(x[y_pred == 4, 0], x[y_pred == 4, 1], s = 100, c =
'magenta', label = 'Cluster 5')
mtp.title('Clusters of customers')
mtp.xlabel('Annual Income (k$)')
mtp.ylabel('Spending Score (1-100)')
mtp.legend()
mtp.show()
Slip 19
40
x = dataset.iloc[:, [3, 4]].values
print(x)
from sklearn.cluster import KMeans
wcss_list= []
for i in range(1, 11):kmeans = KMeans(n_clusters=i, init='k-
means++', random_state= 42)
kmeans.fit(x)
wcss_list.append(kmeans.inertia_)
mtp.plot(range(1, 11), wcss_list)
mtp.title('The Elobw Method Graph')
mtp.xlabel('Number of clusters(k)')
mtp.ylabel('wcss_list')
mtp.show()
kmeans = KMeans(n_clusters=3, init='k-means++',
random_state= 42)
y_predict= kmeans.fit_predict(x)
mtp.scatter(x[y_predict == 0, 0], x[y_predict == 0, 1], s = 100, c =
'blue', label ='Cluster 1')
#for first cluster
mtp.scatter(x[y_predict == 1, 0], x[y_predict == 1, 1], s = 100, c =
'green', label ='Cluster 2')
#for second cluster
mtp.scatter(x[y_predict== 2, 0], x[y_predict == 2, 1], s = 100, c =
'red', label ='Cluster 3')
41
#for third cluster
mtp.scatter(kmeans.cluster_centers_[:, 0],
kmeans.cluster_centers_[:, 1], s = 300,
c = 'yellow', label = 'Centroid')
mtp.title('Clusters of Credit Card')
mtp.xlabel('V3')
mtp.ylabel('V4')
mtp.legend()
mtp.show()
42
print(data)
Slip 20
43
y_pred= hc.fit_predict(x)
mtp.scatter(x[y_pred == 0, 0], x[y_pred == 0, 1], s = 100, c =
'blue', label = 'Cluster 1')
mtp.scatter(x[y_pred == 1, 0], x[y_pred == 1, 1], s = 100, c =
'green', label = 'Cluster 2')
mtp.scatter(x[y_pred== 2, 0], x[y_pred == 2, 1], s = 100, c = 'red',
label = 'Cluster 3')
mtp.scatter(x[y_pred == 3, 0], x[y_pred == 3, 1], s = 100, c =
'cyan', label = 'Cluster 4')
mtp.scatter(x[y_pred == 4, 0], x[y_pred == 4, 1], s = 100, c =
'magenta', label = 'Cluster 5')
mtp.title('Clusters of customers')
mtp.xlabel('Milk')
mtp.ylabel('Grocery')
mtp.legend()
mtp.show()
Q.2 Write a R program to concatenate two given factors.
[15]
data1 <- c("ABC","PQR","XYZ")
data2 <- c(1,2,3)
factor1<-factor(data1)
factor2<-factor(data2)
print(factor1)
print(factor2)
44
concatinated<-c(factor1,factor2)
print(concatinated)
45