Data Science Practical
Data Science Practical
1952034
Certificate
Date: _________________
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
7/02/2020
3 Practical of Principal Component
Analysis
INDEX
rscript Install
couchdb first
Rscript code
install.packages('sofa')
#devtools::install_github("rop
ensci/sofa") library('sofa')
#create
connection
object x<-
Cushion$n
ew()
object created
x$ping()
#create
database ty
db_create(x,dbna
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
me = 'ty')
db_list(x)
doc1<-'{"rollno":"01","name":"ABC","GRADE":"A"}'
doc_create(x,doc1,dbname
= "ty",docid = "a_1")
doc2<-'{"rollno":"02","name":"
PQR","GRADE":"A"}'
doc3<-'{"rollno":"03","name":"xyz","GRADE":"B","REMARK":"PASS"}'
#CHANGES FEED
db_changes(x,"ty")
display
db_query(x,dbname
= "ty",
selector = list('_id'=list('$gt'=NULL)))$docs
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
db_query(x,dbname = "ty",selector =
list(rollno=list('$gt'='02')),fields=c("name","GRADE"))$docs
library("jsonlite")
res<-db_query(x,dbname = "ty",selector =
list('_id'=list('$gt'=NULL)),fields=c("name","rollno","GRADE","REMARK"),as="json")
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
#doc_delete(cushion,d
bname,docid)
doc_delete(x,dbname =
"ty",docid = "a_2")
doc_get(x,dbname =
"ty",docid = "a_2")
doc2<-'{"name":"Sdrink","beer":"TEST","note":"yummy","note2":"yay"}'
doc3<-'{"rollno":"0
1",
"name":"UZMA",
"GRADE":"A"}'
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Output:
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Once you are in the MongoDB shell, create the database in MongoDB
by typing this command:
use database_name
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
>show dbs
admin 0.000GB
config 0.000GB
local 0.000GB
tycs 0.000GB
O/P:
{ "dropped" : "Testdb", "ok" : 1 }
MongoDB Enterprise > show dbs
admin 0.000GB
config 0.000GB
local 0.000GB
tycs 0.000GB
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
O/P:
Syntax: db.collection_name.find()
MongoDB Enterprise > db.tycs.find()
o/p:
{ "_id" : ObjectId("5e410808e3755b1e06a63d1d"), "name" : "Asif khan",
"age" : 21, "website" : "www.google.com" }
show collections
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
O/P:
tycs
user
SYNTAX:
db.collection_name.drop()
db.collection_name.insert()
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
> db.tycs.insert(
... {
... name: "ASIF",
... age: 20,
... email: "[email protected]",
... course: [ { name: "MongoDB", duration: 7 }, { name: "Java",
duration: 30 } ]
... }
... )
O/P:
WriteResult({ "nInserted" : 1 })
Verification:
Syntax:
db.collection_name.find()
> db.tycs.find()
{ "_id" : ObjectId("5c2d37734fa204bd77e7fc1c"), "name" : "ASIF",
"age" : 20, "email" : "[email protected]", "course" : [ { "name" :
"MongoDB", "duration" : 7 }, { "name" : "Java", "duration" : 30 } ] }
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
{
"_id" : ObjectId("5e410f3fe3755b1e06a63d1e"),
"studentID" : 1001,
"studentName" : "Asif",
"age" : 20
}
{
"_id" : ObjectId("5c281c90c23e08d1515fd9cc"),
"StudentId" : 1001,
"StudentName" : "Asif",
"age" : 20
}
Syntax:
db.collection_name.update(criteria, update_data)
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
> db.got.find().pretty()
{
"_id" : ObjectId("59bd2e73ce524b733f14dd65"),
"name" : "steve",
"age" : 20
}
db.got.update({"name":"Jon Snow"},
{$set:{"name":"Kit Harington"}},{multi:true})
Syntax:
To get the _id of a document, you can either type this command:
db.got.find().pretty()
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
db.collection_name.remove(delete_criteria)
> db.students.find().pretty()
{
"_id" : ObjectId("59bcecc7668dcce02aaa6fed"),
"StudentId" : 1001,
"StudentName" : "Steve",
"age" : 30
}
db.students.remove({"StudentId": 3333})
Output:
WriteResult({ "nRemoved" : 1 })
To verify whether the document is actually deleted. Type the following
command:
db.students.find().pretty()
It will list all the documents of students collection.
db.collection_name.remove({})
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
MongoDB Projection
Syntax:
db.collection_name.find({},{field_key:1 or 0})
> db.students.find().pretty()
{
"_id" : ObjectId("5c281c90c23e08d1515fd9cc"),
"StudentId" : 1001,
"StudentName" : "Steve",
"age" : 20
}
> db.students.find({}, {"_id": 0, "StudentId" : 1})
{ "StudentId" : 1001 }
{ "StudentId" : 1002 }
Syntax:
db.collection_name.find().limit(number_of_documents)
db.studentdata.find({student_id : {$gt:2002}}).pretty()
db.studentdata.find({student_id : {$gt:2002}}).limit(1).pretty()
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
db.studentdata.find({student_id : {$gt:2002}}).limit(1).skip(1).pretty()
db.collecttion_name.find().sort({field_key:1 or -1})
1 is for ascending order and -1 is for descending order. The default
value is 1.
> db.studentdata.find().pretty()
{
"_id" : ObjectId("59bf63380be1d7770c3982af"),
"student_name" : "Steve",
"student_id" : 1001,
"student_age" :1002
}
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
db.collection_name.createIndex({field_name: 1 or -1})
The value 1 is for ascending order and -1 is for descending order.
db.studentdata.createIndex({student_name: 1})
Output:
{
"createdCollectionAutomatically" : false,
"numIndexesBefore" : 1,
"numIndexesAfter" : 2,
"ok" : 1
}
db.collection_name.getIndexes()
> db.studentdata.getIndexes()
[
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
{
"v" : 2,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "test.studentdata"
},
]
data_iris<-iris[1:4]
Cov_data<-cov(data_iris)
Eigen_data<-eigen(Cov_data)
PCA_data<-princomp(data_iris,cor="False")
Eigen_data$values
PCA_data$sdev^2
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
PCA_data$loadings[,1:4]
Eigen_data$vectors
summary(PCA_data)
biplot(PCA_data)
screeplot(PCA_data,type="lines")
model2=PCA_data$loadings[,1]
model2_scores<-as.matrix(data_iris)%*%model2
library(class)
install.packages("e1071")
library(e1071)
mod1<-naiveBayes(iris[,1:4],iris[,5])
mod2<-naiveBayes(model2_scores,iris[,5])
table(predict(mod1,iris[,1:4]),iris[,5])
table(predict(mod2,model2_scores),iris[,5])
Output:
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Output:
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Output:
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Output:
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
library(datasets)
ir_data<-iris
head(ir_data)
str(ir_data)
levels(ir_data$Species)
sum(is.na(ir_data))
ir_data<-ir_data[1:100,]
set.seed(100)
samp<-sample(1:100,80)
ir_test<-ir_data[samp,]
ir_ctrl<-ir_data[-samp,]
install.packages("ggplot2")
library(ggplot2)
library(ggplot2)
install.packages("GGally")
ggpairs(ir_test)
y<-ir_test$Species;
x<-ir_test$Sepal.Length
glfit<-glm(y~x,family='binomial')
summary(glfit)
newdata<-data.frame(x=ir_ctrl$Sepal.Length)
predicted_val<-predict(glfit,newdata,type="response")
prediction<-
data.frame(ir_ctrl$Sepal.Length,ir_ctrl$Species,predicted_val)
prediction
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
qplot(prediction[,1],round(prediction[,3]),col=prediction[,2],xlab='sepal.
Length',ylab='prediction using logistic Reg')
Output:
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
>test2<-t.test(x,y,alternative = "two.sided",mu=0,var.equal =
F,conf.level = 0.95)
>test2
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Output:
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
ftest<-read.csv(file.choose(),sep=",",header = T)
var.test(ftest$time_g1,ftest$time_g2,alternative = "two.sided")
//one way anova
names(data1)
summary(data1)
head(data1)
anv<-aov(formula=satindex~dept,data=data1)
summary(anv)
//two way anova
data2<-read.csv(file.choose(),sep=",",header=T)
names(data2)
summary(data2)
head(data2)
anv1<-aov(formula = satindex~dept+exp+dept*exp,data=data2)
summary(anv1)
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Output:
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
mydata<-data.frame(iris)
attach(mydata)
install.packages("rpart")
library(rpart)
model<-
rpart(Species~Sepal.Length+Sepal.Width+Petal.Length+Petal.Width,da
ta=mydata,method="class")
plot(model)
text(model,use.n=TRUE,all=TRUE,cex=0.8)
install.packages("tree")
library(tree)
model1<-
tree(Species~Sepal.Length+Sepal.Width+Petal.Length+Petal.Width,data
=mydata,method="class",split="gini")
plot(model1)
text(model,all=TRUE,cex=0.6)
install.packages("party")
library(party)
model2<-
ctree(Species~Sepal.Length+Sepal.Width+Petal.Length+Petal.Width,dat
a=mydata)
plot(model2)
library(tree)
mydata<-data.frame(iris)
attach(mydata)
model<-
tree(Species~Sepal.Length+Sepal.Width+Petal.Length+Petal.Width,data
=mydata,method="class",control=tree.control(nobs=150,mincut=10))
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
plot(model1)
text(model,all=TRUE,cex=0.6)
predict(model,iris)
model2<-
ctree(Species~Sepal.Length+Sepal.Width+Petal.Length+Petal.Width,dat
a=mydata,controls=ctree_control(maxdepth=2))
plot(model2)
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Output:
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Sub:DATA SCIENCE
TY.B.Sc[COMPUTER SCIENCE] Roll No. 1952034
Sub:DATA SCIENCE