DM Lab
DM Lab
(Autonomous)
Exercise No: 1A
Example:
Perform data cleaning technique using smoothing by BIN MEANS on
elements 4, 8, 15, 21, 21, 24, 25, 34, 28 whose bin size is 3.
Dept. of CSE 1
DATA MINING LAB (R15) RGMCET (Autonomous)
4 8 15
21 21 24
25 28 34
Calculate mean value for
each bin. Bin 1: 4 8
15
Bin 1 Mean: sum of elements of bin 1/bin size
= (4+8+15)/3
=9
Bin 2: 21 21 24
Bin 2 Mean: sum of elements of bin 2/bin size
= (21+21+24)/3
= 22
Bin 3: 25 28 34
Bin 3 Mean: sum of elements of bin 3/bin size
= (25+28+34)/3
= 29
Mean Values:
9 9 9
22 22 22
29 29 29
Dept. of CSE 2
DATA MINING LAB (R15) RGMCET (Autonomous)
AIM:
Write a C program to perform data cleaning techniques using smoothing
by binmeans.
SOURCE CODE:
#include<stdio.h>
void main()
{
}
printf("The mean values are\n");
for(i=0;i<bin;i++)
{
mean=0,sum=0;
for(k=0;k<n;k++)
{
sum=sum+a[k+i*n];
}
mean=(float)sum/n;
for(k=0;k<n;k++)
{
a[k+i*n]=mean;
printf("%f\t",mean);
}
printf("\n");
}
}
OUTPUT:
Dept. of CSE 4
DATA MINING LAB (R15) RGMCET (Autonomous)
Exercise No:1B
What is Smoothing by Bin Medians?
4 8 15
21 21 24
25 28 34
Bin 1: 4 8 15
Bin 2: 21 21 24
Bin 3: 25 28 34
Median Values:
8 8 8
21 21 21
28 28 28
Dept. of CSE 5
DATA MINING LAB (R15) RGMCET (Autonomous)
AIM:
#include<stdio.h>
void main()
{
int i,k,n,bin,a[50],temp,sum;
float median;
printf("Enter no of bins:");
scanf("%d",&bin);
printf("Enter no of values to be enter in each bin:");
scanf("%d",&n);
printf("Enter elements\n");
for(i=0;i<n*bin;i++)
{
scanf("%d",&a[i]);
}
printf("The given elements are\n");
for(i=0;i<n*bin;i++)
{
for(k=0;k<n*bin;k++)
{
if(a[i]<a[k])
{
temp=a[i];
a[i]=a[k];
a[k]=temp;
}
}
}
for(i=0;i<bin;i++)
{
for(k=0;k<n;k++)
{
Dept. of CSE 6
DATA MINING LAB (R15) RGMCET (Autonomous)
printf("%2d\t",a[k+i*n]);
}
printf("\n");
}
printf("The median values are\n");
for(i=0;i<bin;i++)
{
if(n%2==0)
{
median=(float)(a[n/2-1+i*n]+a[n/2+i*n])/2;
}
else
{
median=a[n/2+i*n];
}
for(k=0;k<n;k++)
{
a[k+i*n]=median;
printf("%f\t",median);
}
printf("\n");
}
}
OUTPUT:
Dept. of CSE 7
DATA MINING LAB (R15) RGMCET (Autonomous)
Exercise No:1C
4 8 15
21 21 24
25 28 34
Calculate boundary value for each element in the bins by finding difference of
that element with lower and upper boundary values. The boundary value which
gives minimum difference will be replaced in that element place in the bin.
Bin 1: 4 8 15
Bin 1:
For value 4, (4-4) < (15-4) so replace 4 with lower boundary value 4.
For value 8, (8-4) < (15-8) so replace 8 with lower boundary value 4.
For value 15, (15-4) > (15-15) so replace 15 with upper boundary value 15.
Bin 2: 21 21 24
Bin 2:
Dept. of CSE 8
DATA MINING LAB (R15) RGMCET (Autonomous)
For value 21, (21-21) < (24-21) so replace 21 with lower boundary value 21.
For value 21, (21-21) < (24-21) so replace 21 with lower boundary value 21.
For value 24, (24-21) > (24-24) so replace 24 with upper boundary value 24.
Bin 3: 25 28 34
Bin 3:
For value 25, (25-25) < (34-25) so replace 25 with lower boundary value 25.
For value 28, (28-25) < (34-28) so replace 28 with lower boundary value 25.
For value 34, (34-25) > (34-34) so replace 34 with upper boundary value 34.
Bin Boundaries:
4 4 15
21 21 24
25 25 34
Dept. of CSE 9
DATA MINING LAB (R15) RGMCET (Autonomous)
AIM:
#include<stdio.h>
void main()
{
int i,j,n,nb,a[50],temp,sum,mid,b[10][10];
printf("Enter no of bins:");
scanf("%d",&n);
printf("Enter values each bin:");
scanf("%d",&nb);
printf("Enter values\n");
for(i=0;i<n*nb;i++)
{
scanf("%d",&a[i]);
}
printf("The given values\n");
for(i=0;i<n*nb;i++)
{
for(j=0;j<n*nb;j++)
{
if(a[i]<a[j])
{
temp=a[i];
a[i]=a[j];
a[j]=temp;
}
}
}
for(i=0;i<n;i++)
{
for(j=0;j<nb;j++)
{
Dept. of CSE 10
DATA MINING LAB (R15) RGMCET (Autonomous)
printf("%2d\t",a[j+i*nb]);
b[i][j]=a[j+i*nb];
}
printf("\n");
}
printf("boundaries are\n");
for(i=0;i<n;i++)
{
sum=b[i][0]+b[i][nb-1];
mid=sum/2;
for(j=0;j<nb;j++)
{
if(b[i][j]<=mid)
b[i][j]=b[i][0];
else
b[i][j]=b[i][n-1];
printf("%d\t",b[i][j]);
}
printf("\n");
}
}
OUTPUT:
Dept. of CSE 11
DATA MINING LAB (R15) RGMCET (Autonomous)
Exercise No: 2A
Aim:
Write a C program to perform Data Transformation Technique using
Min-Max normalization.
Source Code:
#include<stdio.h>
void main()
{
float min,max,newmin,newmax,y,v;
printf("Enter min:");
scanf("%f",&min);
printf("Enter max:");
scanf("%f",&max);
scanf("%f",&newmin);
("%f",&newmax);
scanf("%f",&v);
y=((v-min)/(max-min))*(newmax-newmin)+newmin;
printf("Value of y:%f",y);
Dept. of CSE 12
DATA MINING LAB (R15) RGMCET (Autonomous)
OUTPUT:
Dept. of CSE 13
DATA MINING LAB (R15) RGMCET (Autonomous)
Exercise No: 2B
Aim:
#include<stdio.h>
#include<math.h>
int main()
{
int i,n;
float v,v1,sig,avg,sum=0,a[20];
scanf("%d",&n);
for(i=0;i<n;i++)
scanf("%f",&a[i]);
printf("enter v value\n");
scanf("%f",&v);
for(i=0;i<n;i++)
sum=sum+a[i];
avg=sum/n;
printf("sum is =%f\n",sum);
Dept. of CSE 14
DATA MINING LAB (R15) RGMCET (Autonomous)
printf("Avg =%f\n",avg);
sum=0;
for(i=0;i<n;i++)
{
sum=sum+pow(avg-a[i],2);
}
sig=sum/n;
printf("varience is %f\n",sqrt(sig));
v1=(v-avg)/sqrt(sig);
printf("v1 value=%f\n",v1);
}
OUTPUT:
Dept. of CSE 15
DATA MINING LAB (R15) RGMCET (Autonomous)
Exercise No: 2C
Aim:
#include<stdio.h>
#include<math.h>
#include<stdlib.h>
int main()
{
float vd=2;
int j=1,v;
printf("enter v value:");
scanf("%d",&v);
v=abs(v);
while(vd>=1)
{
vd=v/pow(10,j);
j++;
}
printf("value of v' is %f\n",vd);
Dept. of CSE 16
DATA MINING LAB (R15) RGMCET (Autonomous)
OUTPUT:
Dept. of CSE 17
DATA MINING LAB (R15) RGMCET (Autonomous)
Exercise No: 3
Aim:
Source Code:
#include<stdio.h>
static int row,col,min_freq=2;
void findFrequentSets(int items[][100],int transactions[][100],intno_of_items[],int
no_of_transactions,int result_item[][100]);
void generateSubsets(int items[][100],int setCount,int result_item[][100]);
void main()
{
int no_of_transactions,s=0,k=1,l=0,i,j,setCount=2,maxCount;
int transactions[100][100],no_of_items[100],items[100][100];
int result_item[100][100];
printf("enter no.of transactions\n");
scanf("%d",&no_of_transactions);
for(i=1;i<=no_of_transactions;i++)
{
printf("enter no.of items in transaction:%d\n",i);
scanf("%d",&no_of_items[i]);
printf("enter %d items for transaction:%d\n",no_of_items[i],i);
for(j=1;j<=no_of_items[i];j++)
scanf("%d",&transactions[i][j]);
}
k=0;
for(i=1;i<=no_of_transactions;i++)
{
for(j=1;j<=no_of_items[i];j++)
{
for(l=1;l<=k;l++)
if(items[l][1]==transactions[i][j])
break;
if(l>k)
items[++k][1] = transactions[i][j];
}
}
Dept. of CSE 18
DATA MINING LAB (R15) RGMCET (Autonomous)
row=k;
col=1;
maxCount=k;
findFrequentSets(items,transactions,no_of_items,no_of_transactions,res ult_item);
printf("\n");
for(i=1;i<=row;i++)
{
for(j=1;j<=col;j++)
printf("%d ",result_item[i][j]);
printf("\n");
}
while(setCount<=maxCount)
{
generateSubsets(result_item,setCount,items);
findFrequentSets(items,transactions,no_of_items,no_of_transactions,res ult_item);
for(i=1;i<=row;i++)
{
for(j=1;j<=col;j++)
printf("%d ",result_item[i][j]);
printf("\n");
}
setCount++;
}
}
void findFrequentSets(int items[][100],int transactions[][100],int
no_of_items[],int no_of_transactions,int result_item[][100])
{
int i,j,k,l,p,q,m,count;
k=1;l=1;p=1;q=1;
while(l<=row)
{
count=0;
for(i=1;i<=no_of_transactions;i++)
{
m=1;
for(j=1;j<=no_of_items[i];j++)
{
if(m<=col)
Dept. of CSE 19
DATA MINING LAB (R15) RGMCET (Autonomous)
{
if(transactions[i][j]==items[l][m])
{
m++;
j=0;
}
}
else
break;
if(m>col)
++count;
}
if(count>=min_freq)
{
q=1;
for(m=1;m<=col;m++)
result_item[p][q++]=items[l][m];
p++;
}
l++;
}
for(i=1;i<=row;i++)
for(j=1;j<=col;j++)
items[i][j]=0;
row=p-1;col=q-1;
}
void generateSubsets(int items[][100],int setCount,int result_item[][100])
{
int i=1,j,k,l,count=0,newRow=0,newCol=0,p,q,m,n,r=1,b[row+1];
while(i<=row)
{
j=i;
k=1;
while(j<=row)
{
if(j==row)
{
if(count+1==2)
{
Dept. of CSE 20
DATA MINING LAB (R15) RGMCET (Autonomous)
b[k++]=j;
newRow++;
newCol=1;
for(l=1;l<k;l++)
{
for(p=1;p<=col;p++)
{
for(q=1;q<newCol;q++)
if(result_item[newRow][q]==items[b[l]][p])
break;
if(q>=newCol)
result_item[newRow][newCol++]=items[b[l]][p];
}
}
if(newCol-1!=setCount)
{
for(m=1;m<newCol;m++)
result_item[newRow][m]=0;
newRow--;
}
else
{
for(m=1;m<newRow;m++)
{
r=1;
for(n=1;n<newCol;n++)
{
if(result_item[m][n]==result_item[newRow][r])
{
n=1;
r++;
if(r>setCount)
{
for(p=1;p<newCol;p++)
result_item[newRow][p]=0;
newRow--;
break;
}
}
if(n<=setCount)
Dept. of CSE 21
DATA MINING LAB (R15) RGMCET (Autonomous)
break;
}
}
k--;
b[k]=0;
}
if(k-1<=1)
{
count=0;
for(l=1;l<k;l++)
b[l]=0;
break;
}
k--;
count--;
j=b[k]+1;
b[k]=0;
}
else if(count+1<2)
{
count++;
b[k++]=j;
j++;
}
else if(count+1==2)
{
b[k++]=j;
newRow++;
newCol=1;
for(l=1;l<k;l++)
{
for(p=1;p<=col;p++)
{
for(q=1;q<newCol;q++)
if(result_item[newRow][q]==items[b[l]][p])
break;
if(q>=newCol)
result_item[newRow][newCol++]=items[b[l]][p];
}
}
Dept. of CSE 22
DATA MINING LAB (R15) RGMCET (Autonomous)
if(newCol-1!=setCount)
{
for(m=1;m<newCol;m++)
result_item[newRow][m]=0;
newRow--;
}
else
{
for(m=1;m<newRow;m++)
{
r=1;
for(n=1;n<newCol;n++)
{
if(result_item[m][n]==result_item[newRow][r])
{
n=1;
r++;
if(r>setCount)
{
for(p=1;p<newCol;p++)
result_item[newRow][p]=0;
newRow--;
break;
}
}
}
if(n<=setCount)
break;
}
}
k--;
b[k]=0;
j++;
}
else
j++;
}
i++;
}
row=newRow;
col=newCol-1;
}
Dept. of CSE 23
DATA MINING LAB (R15) RGMCET (Autonomous)
OUTPUT:
Dept. of CSE 24
DATA MINING LAB (R15) RGMCET (Autonomous)
Exercise No: 4A
Aim:
Source Code:
#include<stdio.h>
#include<string.h>
char cls[10][20],titems[50][20][20],attr[10][20];
int pcount[20],count[10],fc=0,c=0;
float p[10],prob[20],pre[10],result[10];
int main()
{
char tup[15][20];
int i,j,n,tuples,k,ans=0,t=0;
printf("enter no of attributes:");
scanf("%d",&n);
printf("enter no of tuples:");
scanf("%d",&tuples);
printf("enter %d attributes\n",n);
for(i=0;i<n;i++)
scanf("%s",attr[i]);
for(i=0;i<tuples;i++)
{
printf("enter tuple%d\n",i+1);
for(j=0;j<n;j++)
scanf("%s",titems[i][j]);
}
Dept. of CSE 25
DATA MINING LAB (R15) RGMCET (Autonomous)
scanf("%s",tup[i]);
class(n,tuples);
for(i=0;i<fc;i++)
p[i]=count[i]/(float)tuples;
for(i=0;i<fc;i++)
{
for(j=1;j<n-1;j++)
{
pcount[j]=0;
for(k=0;k<tuples;k++)
{
if(strcmp(titems[k][j],tup[j])==0 &&strcmp(cls[i],titems[k][n-
1])==0) pcount[j]+=1;
}
if(pcount[j]!=0 && t==0)
prob[c++]=pcount[j]/(float)count[i];
else
{
t=1;
prob[c++]=(pcount[j]+1)/(float)count[i];
}
}
}
j=0;
for(i=0;i<fc;i++)
{
pre[i]=1.0;
for( ;j<((i+1)*(c/fc));j++)
pre[i]*=prob[j];
}
for(i=0;i<fc;i++)
{
result[i]=pre[i]*p[i];
if(i>0 && result[i]>result[i-1])
ans=i;
}
printf("The test tuple belongs to %s class",cls[ans]);
}
Dept. of CSE 26
DATA MINING LAB (R15) RGMCET (Autonomous)
strcpy(cls[fc++],titems[0][p-1]);
for(i=1;i<q;i++)
{
t=0;
for(k=0;k<fc;k++)
{
if(strcmp(titems[i][p-1],cls[k])==0)
{
t=-1;
break;
}
}
if(t!=-1)
strcpy(cls[fc++],titems[i][p-1]);
}
for(i=0;i<fc;i++)
{
count[i]=0;
for(k=0;k<q;k++)
{
if(strcmp(titems[k][p-1],cls[i])==0)
count[i]+=1;
}
Dept. of CSE 27
DATA MINING LAB (R15) RGMCET (Autonomous)
OUTPUT:
Dept. of CSE 28
DATA MINING LAB (R15) RGMCET (Autonomous)
Exercise No: 4B
Aim:
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
int tuples,attr;
float near[20];
int isNear()
int main()
{
int i,j,k,data[20][20],test[1][20],s;
printf("enter no of tuples:");
scanf("%d",&tuples);
printf("enter no of attributes:");
scanf("%d",&attr);
printf("enter %d tuples\n",tuples);
for(i=0;i<tuples;i++)
{
for(j=0;j<attr;j++)
{
scanf("%d",&data[i][j]);
}
}
printf("enter test tuple\n");
Dept. of CSE 29
DATA MINING LAB (R15) RGMCET (Autonomous)
for(i=0;i<attr;i++)
{
scanf("%d",&test[0][i]);
printf("enter k value:");
scanf("%d",&k);
NNC(data,test);
for(i=0;i<k;i++)
{
s=isNear();
printf("(");
for(j=0;j<attr;j++)
{
printf("%3d",data[s][j]);
}
printf(")\n");
}
}
void NNC(int d[][20],int t[][20])
{
int i,j,sum=0;
for(i=0;i<tuples;i++)
for(j=0;j<attr;j++)
{
sum+=((d[i][j]-t[0][j])*(d[i][j]-t[0][j]));
}
near[i]=sqrt(sum);
Dept. of CSE 30
DATA MINING LAB (R15) RGMCET (Autonomous)
sum=0;
}
int isNear()
{
float t=near[0];
int s=0,i;
for(i=1;i<tuples;i++)
{
t=near[i];
s=i;
}
near[s]=999;
return s;
Dept. of CSE 31
DATA MINING LAB (R15) RGMCET (Autonomous)
OUTPUT:
Dept. of CSE 32
DATA MINING LAB (R15) RGMCET (Autonomous)
Exercise No: 5
Aim:
Source Code:
#include<stdio.h>
void main()
{
int i1,i2,i3,t1,t2,m1,m2,om1,om2;
int k0[10],k1[10],k2[10];
printf("Enter 10 numbers:");
for(i1=0;i1<10;i1++)
{
scanf("%d",&k0[i1]);
}
printf("Enter intial mean 1:");
scanf("%d",&m1);
scanf("%d",&m2);
do
{
om1=m1;
om2=m2;
i1= i2=i3=0;
for(i1=0;i1<10;i1++)
{
t1=k0[i1]-m1;
Dept. of CSE 33
DATA MINING LAB (R15) RGMCET (Autonomous)
if(t1<0)
{
t1=-t1;
t2=k0[i1]-m2;
if(t2<0)
{
t2=-t2;
}
if(t1<t2)
{
k1[i2]=k0[i1];
i2++;
}
else
{
k2[i3]=k0[i1];
i3++;
t2=0;
for(t1=0;t1<i2;t1++)
{
t2=t2+k1[t1];
Dept. of CSE 34
DATA MINING LAB (R15) RGMCET (Autonomous)
m1=t2/i2;
t2=0;
for(t1=0;t1<i3;t1++)
{
t2=t2+k2[t1];
}
m2=t2/i3;
printf("\nCluster 1:");
for(t1=0;t1<i2;t1++)
{
printf("%d ",k1[t1]);
}
printf("\nm1=%d",m1);
printf("\nCluster 2:");
for(t1=0;t1<i3;t1++)
{
printf("%d ",k2[t1]);
}
printf("\nm2=%d",m2);
}
while(m1!=om1 && m2!=om2);
printf("\nClusters created");
Dept. of CSE 35
DATA MINING LAB (R15) RGMCET (Autonomous)
OUTPUT:
Dept. of CSE 36
RAJEEV GANDHI MEMORIAL COLLEGE OF ENGINEERING & TECHNOLOGY
(AUTONOMOUS)
DEPARTMENT OF COMPUTER SCIENCE & ENGINEERING
1. Of the 25 marks for internal, 10 marks will be awarded for day-to-day work and 10 marks
to be awarded for the Record work and 5 marks to be awarded by conducting an internal
laboratory test.
2. Concerned Teachers have to do necessary corrections with explanations.
3. Concerned Lab teachers should enter marks in index page.
4. Internal exam will be conducted by two Staff members.
1. For Practical subjects there is a continuous evaluation during the semester for 25 Sessional
marks and 50 end examination marks.
2. The end examination shall be conducted by the teacher concerned (Internal Examiner) and
another External Examiner, recommended by Head of the Department with the approval of
principal.