0% found this document useful (0 votes)
4 views

Campus Placement Prediction Using ML.ipynb

The document contains Python code for data analysis using the Pandas library, specifically loading a CSV file named 'Placement.csv'. It includes commands to display the top and bottom five rows of the dataset, which consists of student placement data with various attributes such as gender, scores, specialization, and salary. Additionally, it suppresses warnings during execution for cleaner output.

Uploaded by

Abhinandan Tomar
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
4 views

Campus Placement Prediction Using ML.ipynb

The document contains Python code for data analysis using the Pandas library, specifically loading a CSV file named 'Placement.csv'. It includes commands to display the top and bottom five rows of the dataset, which consists of student placement data with various attributes such as gender, scores, specialization, and salary. Additionally, it suppresses warnings during execution for cleaner output.

Uploaded by

Abhinandan Tomar
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 54

{

"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv('Placement.csv')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"import warnings"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"warnings.filterwarnings('ignore')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1. Display Top 5 Rows of The Dataset"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sl_no</th>\n",
" <th>gender</th>\n",
" <th>ssc_p</th>\n",
" <th>ssc_b</th>\n",
" <th>hsc_p</th>\n",
" <th>hsc_b</th>\n",
" <th>hsc_s</th>\n",
" <th>degree_p</th>\n",
" <th>degree_t</th>\n",
" <th>workex</th>\n",
" <th>etest_p</th>\n",
" <th>specialisation</th>\n",
" <th>mba_p</th>\n",
" <th>status</th>\n",
" <th>salary</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>67.00</td>\n",
" <td>Others</td>\n",
" <td>91.00</td>\n",
" <td>Others</td>\n",
" <td>Commerce</td>\n",
" <td>58.00</td>\n",
" <td>Sci&amp;Tech</td>\n",
" <td>No</td>\n",
" <td>55.0</td>\n",
" <td>Mkt&amp;HR</td>\n",
" <td>58.80</td>\n",
" <td>Placed</td>\n",
" <td>270000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>79.33</td>\n",
" <td>Central</td>\n",
" <td>78.33</td>\n",
" <td>Others</td>\n",
" <td>Science</td>\n",
" <td>77.48</td>\n",
" <td>Sci&amp;Tech</td>\n",
" <td>Yes</td>\n",
" <td>86.5</td>\n",
" <td>Mkt&amp;Fin</td>\n",
" <td>66.28</td>\n",
" <td>Placed</td>\n",
" <td>200000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>65.00</td>\n",
" <td>Central</td>\n",
" <td>68.00</td>\n",
" <td>Central</td>\n",
" <td>Arts</td>\n",
" <td>64.00</td>\n",
" <td>Comm&amp;Mgmt</td>\n",
" <td>No</td>\n",
" <td>75.0</td>\n",
" <td>Mkt&amp;Fin</td>\n",
" <td>57.80</td>\n",
" <td>Placed</td>\n",
" <td>250000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>56.00</td>\n",
" <td>Central</td>\n",
" <td>52.00</td>\n",
" <td>Central</td>\n",
" <td>Science</td>\n",
" <td>52.00</td>\n",
" <td>Sci&amp;Tech</td>\n",
" <td>No</td>\n",
" <td>66.0</td>\n",
" <td>Mkt&amp;HR</td>\n",
" <td>59.43</td>\n",
" <td>Not Placed</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>85.80</td>\n",
" <td>Central</td>\n",
" <td>73.60</td>\n",
" <td>Central</td>\n",
" <td>Commerce</td>\n",
" <td>73.30</td>\n",
" <td>Comm&amp;Mgmt</td>\n",
" <td>No</td>\n",
" <td>96.8</td>\n",
" <td>Mkt&amp;Fin</td>\n",
" <td>55.50</td>\n",
" <td>Placed</td>\n",
" <td>425000.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sl_no gender ssc_p ssc_b hsc_p hsc_b hsc_s degree_p \\\
n",
"0 1 0 67.00 Others 91.00 Others Commerce 58.00 \n",
"1 2 0 79.33 Central 78.33 Others Science 77.48 \n",
"2 3 0 65.00 Central 68.00 Central Arts 64.00 \n",
"3 4 0 56.00 Central 52.00 Central Science 52.00 \n",
"4 5 0 85.80 Central 73.60 Central Commerce 73.30 \n",
"\n",
" degree_t workex etest_p specialisation mba_p status salary \
n",
"0 Sci&Tech No 55.0 Mkt&HR 58.80 Placed 270000.0 \
n",
"1 Sci&Tech Yes 86.5 Mkt&Fin 66.28 Placed 200000.0 \
n",
"2 Comm&Mgmt No 75.0 Mkt&Fin 57.80 Placed 250000.0 \
n",
"3 Sci&Tech No 66.0 Mkt&HR 59.43 Not Placed NaN \
n",
"4 Comm&Mgmt No 96.8 Mkt&Fin 55.50 Placed 425000.0 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2. Check Last 5 Rows of The Dataset"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sl_no</th>\n",
" <th>gender</th>\n",
" <th>ssc_p</th>\n",
" <th>ssc_b</th>\n",
" <th>hsc_p</th>\n",
" <th>hsc_b</th>\n",
" <th>hsc_s</th>\n",
" <th>degree_p</th>\n",
" <th>degree_t</th>\n",
" <th>workex</th>\n",
" <th>etest_p</th>\n",
" <th>specialisation</th>\n",
" <th>mba_p</th>\n",
" <th>status</th>\n",
" <th>salary</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>210</th>\n",
" <td>211</td>\n",
" <td>0</td>\n",
" <td>80.6</td>\n",
" <td>Others</td>\n",
" <td>82.0</td>\n",
" <td>Others</td>\n",
" <td>Commerce</td>\n",
" <td>77.6</td>\n",
" <td>Comm&amp;Mgmt</td>\n",
" <td>No</td>\n",
" <td>91.0</td>\n",
" <td>Mkt&amp;Fin</td>\n",
" <td>74.49</td>\n",
" <td>Placed</td>\n",
" <td>400000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>211</th>\n",
" <td>212</td>\n",
" <td>0</td>\n",
" <td>58.0</td>\n",
" <td>Others</td>\n",
" <td>60.0</td>\n",
" <td>Others</td>\n",
" <td>Science</td>\n",
" <td>72.0</td>\n",
" <td>Sci&amp;Tech</td>\n",
" <td>No</td>\n",
" <td>74.0</td>\n",
" <td>Mkt&amp;Fin</td>\n",
" <td>53.62</td>\n",
" <td>Placed</td>\n",
" <td>275000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>212</th>\n",
" <td>213</td>\n",
" <td>0</td>\n",
" <td>67.0</td>\n",
" <td>Others</td>\n",
" <td>67.0</td>\n",
" <td>Others</td>\n",
" <td>Commerce</td>\n",
" <td>73.0</td>\n",
" <td>Comm&amp;Mgmt</td>\n",
" <td>Yes</td>\n",
" <td>59.0</td>\n",
" <td>Mkt&amp;Fin</td>\n",
" <td>69.72</td>\n",
" <td>Placed</td>\n",
" <td>295000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>213</th>\n",
" <td>214</td>\n",
" <td>1</td>\n",
" <td>74.0</td>\n",
" <td>Others</td>\n",
" <td>66.0</td>\n",
" <td>Others</td>\n",
" <td>Commerce</td>\n",
" <td>58.0</td>\n",
" <td>Comm&amp;Mgmt</td>\n",
" <td>No</td>\n",
" <td>70.0</td>\n",
" <td>Mkt&amp;HR</td>\n",
" <td>60.23</td>\n",
" <td>Placed</td>\n",
" <td>204000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>214</th>\n",
" <td>215</td>\n",
" <td>0</td>\n",
" <td>62.0</td>\n",
" <td>Central</td>\n",
" <td>58.0</td>\n",
" <td>Others</td>\n",
" <td>Science</td>\n",
" <td>53.0</td>\n",
" <td>Comm&amp;Mgmt</td>\n",
" <td>No</td>\n",
" <td>89.0</td>\n",
" <td>Mkt&amp;HR</td>\n",
" <td>60.22</td>\n",
" <td>Not Placed</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sl_no gender ssc_p ssc_b hsc_p hsc_b hsc_s degree_p \\\
n",
"210 211 0 80.6 Others 82.0 Others Commerce 77.6 \
n",
"211 212 0 58.0 Others 60.0 Others Science 72.0 \
n",
"212 213 0 67.0 Others 67.0 Others Commerce 73.0 \
n",
"213 214 1 74.0 Others 66.0 Others Commerce 58.0 \
n",
"214 215 0 62.0 Central 58.0 Others Science 53.0 \
n",
"\n",
" degree_t workex etest_p specialisation mba_p status salary
\n",
"210 Comm&Mgmt No 91.0 Mkt&Fin 74.49 Placed 400000.0
\n",
"211 Sci&Tech No 74.0 Mkt&Fin 53.62 Placed 275000.0
\n",
"212 Comm&Mgmt Yes 59.0 Mkt&Fin 69.72 Placed 295000.0
\n",
"213 Comm&Mgmt No 70.0 Mkt&HR 60.23 Placed 204000.0
\n",
"214 Comm&Mgmt No 89.0 Mkt&HR 60.22 Not Placed NaN
"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.tail()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3. Find Shape of Our Dataset (Number of Rows And Number of Columns)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(215, 15)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.shape"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of Rows 215\n",
"Number of Columns 15\n"
]
}
],
"source": [
"print(\"Number of Rows\",data.shape[0])\n",
"print(\"Number of Columns\",data.shape[1])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 4. Get Information About Our Dataset Like the Total Number of Rows, Total
Number of Columns, Datatypes of Each Column And Memory Requirement"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 215 entries, 0 to 214\n",
"Data columns (total 15 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 sl_no 215 non-null int64 \n",
" 1 gender 215 non-null int64 \n",
" 2 ssc_p 215 non-null float64\n",
" 3 ssc_b 215 non-null object \n",
" 4 hsc_p 215 non-null float64\n",
" 5 hsc_b 215 non-null object \n",
" 6 hsc_s 215 non-null object \n",
" 7 degree_p 215 non-null float64\n",
" 8 degree_t 215 non-null object \n",
" 9 workex 215 non-null object \n",
" 10 etest_p 215 non-null float64\n",
" 11 specialisation 215 non-null object \n",
" 12 mba_p 215 non-null float64\n",
" 13 status 215 non-null object \n",
" 14 salary 148 non-null float64\n",
"dtypes: float64(6), int64(2), object(7)\n",
"memory usage: 25.3+ KB\n"
]
}
],
"source": [
"data.info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 5. Check Null Values In The Dataset"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"sl_no 0\n",
"gender 0\n",
"ssc_p 0\n",
"ssc_b 0\n",
"hsc_p 0\n",
"hsc_b 0\n",
"hsc_s 0\n",
"degree_p 0\n",
"degree_t 0\n",
"workex 0\n",
"etest_p 0\n",
"specialisation 0\n",
"mba_p 0\n",
"status 0\n",
"salary 67\n",
"dtype: int64"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.isnull().sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 6. Get Overall Statistics About The Dataset"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sl_no</th>\n",
" <th>gender</th>\n",
" <th>ssc_p</th>\n",
" <th>hsc_p</th>\n",
" <th>degree_p</th>\n",
" <th>etest_p</th>\n",
" <th>mba_p</th>\n",
" <th>salary</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>215.000000</td>\n",
" <td>215.000000</td>\n",
" <td>215.000000</td>\n",
" <td>215.000000</td>\n",
" <td>215.000000</td>\n",
" <td>215.000000</td>\n",
" <td>215.000000</td>\n",
" <td>148.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>108.000000</td>\n",
" <td>0.353488</td>\n",
" <td>67.303395</td>\n",
" <td>66.333163</td>\n",
" <td>66.370186</td>\n",
" <td>72.100558</td>\n",
" <td>62.278186</td>\n",
" <td>288655.405405</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>62.209324</td>\n",
" <td>0.479168</td>\n",
" <td>10.827205</td>\n",
" <td>10.897509</td>\n",
" <td>7.358743</td>\n",
" <td>13.275956</td>\n",
" <td>5.833385</td>\n",
" <td>93457.452420</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>40.890000</td>\n",
" <td>37.000000</td>\n",
" <td>50.000000</td>\n",
" <td>50.000000</td>\n",
" <td>51.210000</td>\n",
" <td>200000.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>54.500000</td>\n",
" <td>0.000000</td>\n",
" <td>60.600000</td>\n",
" <td>60.900000</td>\n",
" <td>61.000000</td>\n",
" <td>60.000000</td>\n",
" <td>57.945000</td>\n",
" <td>240000.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>108.000000</td>\n",
" <td>0.000000</td>\n",
" <td>67.000000</td>\n",
" <td>65.000000</td>\n",
" <td>66.000000</td>\n",
" <td>71.000000</td>\n",
" <td>62.000000</td>\n",
" <td>265000.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>161.500000</td>\n",
" <td>1.000000</td>\n",
" <td>75.700000</td>\n",
" <td>73.000000</td>\n",
" <td>72.000000</td>\n",
" <td>83.500000</td>\n",
" <td>66.255000</td>\n",
" <td>300000.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>215.000000</td>\n",
" <td>1.000000</td>\n",
" <td>89.400000</td>\n",
" <td>97.700000</td>\n",
" <td>91.000000</td>\n",
" <td>98.000000</td>\n",
" <td>77.890000</td>\n",
" <td>940000.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sl_no gender ssc_p hsc_p degree_p
etest_p \\\n",
"count 215.000000 215.000000 215.000000 215.000000 215.000000
215.000000 \n",
"mean 108.000000 0.353488 67.303395 66.333163 66.370186
72.100558 \n",
"std 62.209324 0.479168 10.827205 10.897509 7.358743
13.275956 \n",
"min 1.000000 0.000000 40.890000 37.000000 50.000000
50.000000 \n",
"25% 54.500000 0.000000 60.600000 60.900000 61.000000
60.000000 \n",
"50% 108.000000 0.000000 67.000000 65.000000 66.000000
71.000000 \n",
"75% 161.500000 1.000000 75.700000 73.000000 72.000000
83.500000 \n",
"max 215.000000 1.000000 89.400000 97.700000 91.000000
98.000000 \n",
"\n",
" mba_p salary \n",
"count 215.000000 148.000000 \n",
"mean 62.278186 288655.405405 \n",
"std 5.833385 93457.452420 \n",
"min 51.210000 200000.000000 \n",
"25% 57.945000 240000.000000 \n",
"50% 62.000000 265000.000000 \n",
"75% 66.255000 300000.000000 \n",
"max 77.890000 940000.000000 "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 7. EDA"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['sl_no', 'gender', 'ssc_p', 'ssc_b', 'hsc_p', 'hsc_b', 'hsc_s',\n",
" 'degree_p', 'degree_t', 'workex', 'etest_p', 'specialisation',
'mba_p',\n",
" 'status', 'salary'],\n",
" dtype='object')"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.columns"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### How Many Students Got Placed?"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['Placed', 'Not Placed'], dtype=object)"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['status'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Placed 148\n",
"Not Placed 67\n",
"Name: status, dtype: int64"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['status'].value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Could you display the top 5 sci&tech students placed according to their
salary?"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['sl_no', 'gender', 'ssc_p', 'ssc_b', 'hsc_p', 'hsc_b', 'hsc_s',\n",
" 'degree_p', 'degree_t', 'workex', 'etest_p', 'specialisation',
'mba_p',\n",
" 'status', 'salary'],\n",
" dtype='object')"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.columns"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sl_no</th>\n",
" <th>gender</th>\n",
" <th>ssc_p</th>\n",
" <th>ssc_b</th>\n",
" <th>hsc_p</th>\n",
" <th>hsc_b</th>\n",
" <th>hsc_s</th>\n",
" <th>degree_p</th>\n",
" <th>degree_t</th>\n",
" <th>workex</th>\n",
" <th>etest_p</th>\n",
" <th>specialisation</th>\n",
" <th>mba_p</th>\n",
" <th>status</th>\n",
" <th>salary</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>150</th>\n",
" <td>151</td>\n",
" <td>0</td>\n",
" <td>71.00</td>\n",
" <td>Central</td>\n",
" <td>58.66</td>\n",
" <td>Central</td>\n",
" <td>Science</td>\n",
" <td>58.00</td>\n",
" <td>Sci&amp;Tech</td>\n",
" <td>Yes</td>\n",
" <td>56.0</td>\n",
" <td>Mkt&amp;Fin</td>\n",
" <td>61.30</td>\n",
" <td>Placed</td>\n",
" <td>690000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>77</th>\n",
" <td>78</td>\n",
" <td>0</td>\n",
" <td>64.00</td>\n",
" <td>Others</td>\n",
" <td>80.00</td>\n",
" <td>Others</td>\n",
" <td>Science</td>\n",
" <td>65.00</td>\n",
" <td>Sci&amp;Tech</td>\n",
" <td>Yes</td>\n",
" <td>69.0</td>\n",
" <td>Mkt&amp;Fin</td>\n",
" <td>57.65</td>\n",
" <td>Placed</td>\n",
" <td>500000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>163</th>\n",
" <td>164</td>\n",
" <td>0</td>\n",
" <td>63.00</td>\n",
" <td>Others</td>\n",
" <td>67.00</td>\n",
" <td>Others</td>\n",
" <td>Science</td>\n",
" <td>64.00</td>\n",
" <td>Sci&amp;Tech</td>\n",
" <td>No</td>\n",
" <td>75.0</td>\n",
" <td>Mkt&amp;Fin</td>\n",
" <td>66.46</td>\n",
" <td>Placed</td>\n",
" <td>500000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174</th>\n",
" <td>175</td>\n",
" <td>0</td>\n",
" <td>73.24</td>\n",
" <td>Others</td>\n",
" <td>50.83</td>\n",
" <td>Others</td>\n",
" <td>Science</td>\n",
" <td>64.27</td>\n",
" <td>Sci&amp;Tech</td>\n",
" <td>Yes</td>\n",
" <td>64.0</td>\n",
" <td>Mkt&amp;Fin</td>\n",
" <td>66.23</td>\n",
" <td>Placed</td>\n",
" <td>500000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53</th>\n",
" <td>54</td>\n",
" <td>0</td>\n",
" <td>80.00</td>\n",
" <td>Others</td>\n",
" <td>70.00</td>\n",
" <td>Others</td>\n",
" <td>Science</td>\n",
" <td>72.00</td>\n",
" <td>Sci&amp;Tech</td>\n",
" <td>No</td>\n",
" <td>87.0</td>\n",
" <td>Mkt&amp;HR</td>\n",
" <td>71.04</td>\n",
" <td>Placed</td>\n",
" <td>450000.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sl_no gender ssc_p ssc_b hsc_p hsc_b hsc_s degree_p \\\
n",
"150 151 0 71.00 Central 58.66 Central Science 58.00 \
n",
"77 78 0 64.00 Others 80.00 Others Science 65.00 \
n",
"163 164 0 63.00 Others 67.00 Others Science 64.00 \
n",
"174 175 0 73.24 Others 50.83 Others Science 64.27 \
n",
"53 54 0 80.00 Others 70.00 Others Science 72.00 \
n",
"\n",
" degree_t workex etest_p specialisation mba_p status salary \n",
"150 Sci&Tech Yes 56.0 Mkt&Fin 61.30 Placed 690000.0 \n",
"77 Sci&Tech Yes 69.0 Mkt&Fin 57.65 Placed 500000.0 \n",
"163 Sci&Tech No 75.0 Mkt&Fin 66.46 Placed 500000.0 \n",
"174 Sci&Tech Yes 64.0 Mkt&Fin 66.23 Placed 500000.0 \n",
"53 Sci&Tech No 87.0 Mkt&HR 71.04 Placed 450000.0 "
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[(data['degree_t']==\"Sci&Tech\") &
(data['status']==\"Placed\")].sort_values(by=\"salary\",ascending=False).head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 8. Data Preprocessing"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sl_no</th>\n",
" <th>gender</th>\n",
" <th>ssc_p</th>\n",
" <th>ssc_b</th>\n",
" <th>hsc_p</th>\n",
" <th>hsc_b</th>\n",
" <th>hsc_s</th>\n",
" <th>degree_p</th>\n",
" <th>degree_t</th>\n",
" <th>workex</th>\n",
" <th>etest_p</th>\n",
" <th>specialisation</th>\n",
" <th>mba_p</th>\n",
" <th>status</th>\n",
" <th>salary</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>67.00</td>\n",
" <td>Others</td>\n",
" <td>91.00</td>\n",
" <td>Others</td>\n",
" <td>Commerce</td>\n",
" <td>58.00</td>\n",
" <td>Sci&amp;Tech</td>\n",
" <td>No</td>\n",
" <td>55.0</td>\n",
" <td>Mkt&amp;HR</td>\n",
" <td>58.80</td>\n",
" <td>Placed</td>\n",
" <td>270000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>79.33</td>\n",
" <td>Central</td>\n",
" <td>78.33</td>\n",
" <td>Others</td>\n",
" <td>Science</td>\n",
" <td>77.48</td>\n",
" <td>Sci&amp;Tech</td>\n",
" <td>Yes</td>\n",
" <td>86.5</td>\n",
" <td>Mkt&amp;Fin</td>\n",
" <td>66.28</td>\n",
" <td>Placed</td>\n",
" <td>200000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>65.00</td>\n",
" <td>Central</td>\n",
" <td>68.00</td>\n",
" <td>Central</td>\n",
" <td>Arts</td>\n",
" <td>64.00</td>\n",
" <td>Comm&amp;Mgmt</td>\n",
" <td>No</td>\n",
" <td>75.0</td>\n",
" <td>Mkt&amp;Fin</td>\n",
" <td>57.80</td>\n",
" <td>Placed</td>\n",
" <td>250000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>56.00</td>\n",
" <td>Central</td>\n",
" <td>52.00</td>\n",
" <td>Central</td>\n",
" <td>Science</td>\n",
" <td>52.00</td>\n",
" <td>Sci&amp;Tech</td>\n",
" <td>No</td>\n",
" <td>66.0</td>\n",
" <td>Mkt&amp;HR</td>\n",
" <td>59.43</td>\n",
" <td>Not Placed</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>85.80</td>\n",
" <td>Central</td>\n",
" <td>73.60</td>\n",
" <td>Central</td>\n",
" <td>Commerce</td>\n",
" <td>73.30</td>\n",
" <td>Comm&amp;Mgmt</td>\n",
" <td>No</td>\n",
" <td>96.8</td>\n",
" <td>Mkt&amp;Fin</td>\n",
" <td>55.50</td>\n",
" <td>Placed</td>\n",
" <td>425000.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sl_no gender ssc_p ssc_b hsc_p hsc_b hsc_s degree_p \\\
n",
"0 1 0 67.00 Others 91.00 Others Commerce 58.00 \n",
"1 2 0 79.33 Central 78.33 Others Science 77.48 \n",
"2 3 0 65.00 Central 68.00 Central Arts 64.00 \n",
"3 4 0 56.00 Central 52.00 Central Science 52.00 \n",
"4 5 0 85.80 Central 73.60 Central Commerce 73.30 \n",
"\n",
" degree_t workex etest_p specialisation mba_p status salary \
n",
"0 Sci&Tech No 55.0 Mkt&HR 58.80 Placed 270000.0 \
n",
"1 Sci&Tech Yes 86.5 Mkt&Fin 66.28 Placed 200000.0 \
n",
"2 Comm&Mgmt No 75.0 Mkt&Fin 57.80 Placed 250000.0 \
n",
"3 Sci&Tech No 66.0 Mkt&HR 59.43 Not Placed NaN \
n",
"4 Comm&Mgmt No 96.8 Mkt&Fin 55.50 Placed 425000.0 "
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"data = data.drop(['sl_no','salary'],axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>gender</th>\n",
" <th>ssc_p</th>\n",
" <th>ssc_b</th>\n",
" <th>hsc_p</th>\n",
" <th>hsc_b</th>\n",
" <th>hsc_s</th>\n",
" <th>degree_p</th>\n",
" <th>degree_t</th>\n",
" <th>workex</th>\n",
" <th>etest_p</th>\n",
" <th>specialisation</th>\n",
" <th>mba_p</th>\n",
" <th>status</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>67.0</td>\n",
" <td>Others</td>\n",
" <td>91.0</td>\n",
" <td>Others</td>\n",
" <td>Commerce</td>\n",
" <td>58.0</td>\n",
" <td>Sci&amp;Tech</td>\n",
" <td>No</td>\n",
" <td>55.0</td>\n",
" <td>Mkt&amp;HR</td>\n",
" <td>58.8</td>\n",
" <td>Placed</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" gender ssc_p ssc_b hsc_p hsc_b hsc_s degree_p degree_t
workex \\\n",
"0 0 67.0 Others 91.0 Others Commerce 58.0 Sci&Tech
No \n",
"\n",
" etest_p specialisation mba_p status \n",
"0 55.0 Mkt&HR 58.8 Placed "
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head(1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Encoding the Categorical Columns"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['Others', 'Central'], dtype=object)"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['ssc_b'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"data['ssc_b'] = data['ssc_b'].map({'Central':1,'Others':0})"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>gender</th>\n",
" <th>ssc_p</th>\n",
" <th>ssc_b</th>\n",
" <th>hsc_p</th>\n",
" <th>hsc_b</th>\n",
" <th>hsc_s</th>\n",
" <th>degree_p</th>\n",
" <th>degree_t</th>\n",
" <th>workex</th>\n",
" <th>etest_p</th>\n",
" <th>specialisation</th>\n",
" <th>mba_p</th>\n",
" <th>status</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>67.00</td>\n",
" <td>0</td>\n",
" <td>91.00</td>\n",
" <td>Others</td>\n",
" <td>Commerce</td>\n",
" <td>58.00</td>\n",
" <td>Sci&amp;Tech</td>\n",
" <td>No</td>\n",
" <td>55.0</td>\n",
" <td>Mkt&amp;HR</td>\n",
" <td>58.80</td>\n",
" <td>Placed</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>79.33</td>\n",
" <td>1</td>\n",
" <td>78.33</td>\n",
" <td>Others</td>\n",
" <td>Science</td>\n",
" <td>77.48</td>\n",
" <td>Sci&amp;Tech</td>\n",
" <td>Yes</td>\n",
" <td>86.5</td>\n",
" <td>Mkt&amp;Fin</td>\n",
" <td>66.28</td>\n",
" <td>Placed</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" gender ssc_p ssc_b hsc_p hsc_b hsc_s degree_p degree_t workex
\\\n",
"0 0 67.00 0 91.00 Others Commerce 58.00 Sci&Tech No
\n",
"1 0 79.33 1 78.33 Others Science 77.48 Sci&Tech Yes
\n",
"\n",
" etest_p specialisation mba_p status \n",
"0 55.0 Mkt&HR 58.80 Placed \n",
"1 86.5 Mkt&Fin 66.28 Placed "
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['Others', 'Central'], dtype=object)"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['hsc_b'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"data['hsc_b'] = data['hsc_b'].map({'Central':1,'Others':0})"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>gender</th>\n",
" <th>ssc_p</th>\n",
" <th>ssc_b</th>\n",
" <th>hsc_p</th>\n",
" <th>hsc_b</th>\n",
" <th>hsc_s</th>\n",
" <th>degree_p</th>\n",
" <th>degree_t</th>\n",
" <th>workex</th>\n",
" <th>etest_p</th>\n",
" <th>specialisation</th>\n",
" <th>mba_p</th>\n",
" <th>status</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>67.00</td>\n",
" <td>0</td>\n",
" <td>91.00</td>\n",
" <td>0</td>\n",
" <td>Commerce</td>\n",
" <td>58.00</td>\n",
" <td>Sci&amp;Tech</td>\n",
" <td>No</td>\n",
" <td>55.0</td>\n",
" <td>Mkt&amp;HR</td>\n",
" <td>58.80</td>\n",
" <td>Placed</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>79.33</td>\n",
" <td>1</td>\n",
" <td>78.33</td>\n",
" <td>0</td>\n",
" <td>Science</td>\n",
" <td>77.48</td>\n",
" <td>Sci&amp;Tech</td>\n",
" <td>Yes</td>\n",
" <td>86.5</td>\n",
" <td>Mkt&amp;Fin</td>\n",
" <td>66.28</td>\n",
" <td>Placed</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" gender ssc_p ssc_b hsc_p hsc_b hsc_s degree_p degree_t workex
\\\n",
"0 0 67.00 0 91.00 0 Commerce 58.00 Sci&Tech No
\n",
"1 0 79.33 1 78.33 0 Science 77.48 Sci&Tech Yes
\n",
"\n",
" etest_p specialisation mba_p status \n",
"0 55.0 Mkt&HR 58.80 Placed \n",
"1 86.5 Mkt&Fin 66.28 Placed "
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['Commerce', 'Science', 'Arts'], dtype=object)"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['hsc_s'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"data['hsc_s'] = data['hsc_s'].map({'Science':2,'Commerce':1,'Arts':0})"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>gender</th>\n",
" <th>ssc_p</th>\n",
" <th>ssc_b</th>\n",
" <th>hsc_p</th>\n",
" <th>hsc_b</th>\n",
" <th>hsc_s</th>\n",
" <th>degree_p</th>\n",
" <th>degree_t</th>\n",
" <th>workex</th>\n",
" <th>etest_p</th>\n",
" <th>specialisation</th>\n",
" <th>mba_p</th>\n",
" <th>status</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>67.00</td>\n",
" <td>0</td>\n",
" <td>91.00</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>58.00</td>\n",
" <td>Sci&amp;Tech</td>\n",
" <td>No</td>\n",
" <td>55.0</td>\n",
" <td>Mkt&amp;HR</td>\n",
" <td>58.80</td>\n",
" <td>Placed</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>79.33</td>\n",
" <td>1</td>\n",
" <td>78.33</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>77.48</td>\n",
" <td>Sci&amp;Tech</td>\n",
" <td>Yes</td>\n",
" <td>86.5</td>\n",
" <td>Mkt&amp;Fin</td>\n",
" <td>66.28</td>\n",
" <td>Placed</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" <td>65.00</td>\n",
" <td>1</td>\n",
" <td>68.00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>64.00</td>\n",
" <td>Comm&amp;Mgmt</td>\n",
" <td>No</td>\n",
" <td>75.0</td>\n",
" <td>Mkt&amp;Fin</td>\n",
" <td>57.80</td>\n",
" <td>Placed</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0</td>\n",
" <td>56.00</td>\n",
" <td>1</td>\n",
" <td>52.00</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>52.00</td>\n",
" <td>Sci&amp;Tech</td>\n",
" <td>No</td>\n",
" <td>66.0</td>\n",
" <td>Mkt&amp;HR</td>\n",
" <td>59.43</td>\n",
" <td>Not Placed</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>85.80</td>\n",
" <td>1</td>\n",
" <td>73.60</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>73.30</td>\n",
" <td>Comm&amp;Mgmt</td>\n",
" <td>No</td>\n",
" <td>96.8</td>\n",
" <td>Mkt&amp;Fin</td>\n",
" <td>55.50</td>\n",
" <td>Placed</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" gender ssc_p ssc_b hsc_p hsc_b hsc_s degree_p degree_t
workex \\\n",
"0 0 67.00 0 91.00 0 1 58.00 Sci&Tech
No \n",
"1 0 79.33 1 78.33 0 2 77.48 Sci&Tech
Yes \n",
"2 0 65.00 1 68.00 1 0 64.00 Comm&Mgmt
No \n",
"3 0 56.00 1 52.00 1 2 52.00 Sci&Tech
No \n",
"4 0 85.80 1 73.60 1 1 73.30 Comm&Mgmt
No \n",
"\n",
" etest_p specialisation mba_p status \n",
"0 55.0 Mkt&HR 58.80 Placed \n",
"1 86.5 Mkt&Fin 66.28 Placed \n",
"2 75.0 Mkt&Fin 57.80 Placed \n",
"3 66.0 Mkt&HR 59.43 Not Placed \n",
"4 96.8 Mkt&Fin 55.50 Placed "
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['Sci&Tech', 'Comm&Mgmt', 'Others'], dtype=object)"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['degree_t'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"data['degree_t'] =
data['degree_t'].map({'Sci&Tech':2,'Comm&Mgmt':1,'Others':0})"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>gender</th>\n",
" <th>ssc_p</th>\n",
" <th>ssc_b</th>\n",
" <th>hsc_p</th>\n",
" <th>hsc_b</th>\n",
" <th>hsc_s</th>\n",
" <th>degree_p</th>\n",
" <th>degree_t</th>\n",
" <th>workex</th>\n",
" <th>etest_p</th>\n",
" <th>specialisation</th>\n",
" <th>mba_p</th>\n",
" <th>status</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>67.00</td>\n",
" <td>0</td>\n",
" <td>91.00</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>58.00</td>\n",
" <td>2</td>\n",
" <td>No</td>\n",
" <td>55.0</td>\n",
" <td>Mkt&amp;HR</td>\n",
" <td>58.80</td>\n",
" <td>Placed</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>79.33</td>\n",
" <td>1</td>\n",
" <td>78.33</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>77.48</td>\n",
" <td>2</td>\n",
" <td>Yes</td>\n",
" <td>86.5</td>\n",
" <td>Mkt&amp;Fin</td>\n",
" <td>66.28</td>\n",
" <td>Placed</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" gender ssc_p ssc_b hsc_p hsc_b hsc_s degree_p degree_t
workex \\\n",
"0 0 67.00 0 91.00 0 1 58.00 2 No \
n",
"1 0 79.33 1 78.33 0 2 77.48 2 Yes \
n",
"\n",
" etest_p specialisation mba_p status \n",
"0 55.0 Mkt&HR 58.80 Placed \n",
"1 86.5 Mkt&Fin 66.28 Placed "
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['Mkt&HR', 'Mkt&Fin'], dtype=object)"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['specialisation'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
"data['specialisation'] =data['specialisation'].map({'Mkt&HR':1,'Mkt&Fin':0})"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>gender</th>\n",
" <th>ssc_p</th>\n",
" <th>ssc_b</th>\n",
" <th>hsc_p</th>\n",
" <th>hsc_b</th>\n",
" <th>hsc_s</th>\n",
" <th>degree_p</th>\n",
" <th>degree_t</th>\n",
" <th>workex</th>\n",
" <th>etest_p</th>\n",
" <th>specialisation</th>\n",
" <th>mba_p</th>\n",
" <th>status</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>67.00</td>\n",
" <td>0</td>\n",
" <td>91.00</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>58.00</td>\n",
" <td>2</td>\n",
" <td>No</td>\n",
" <td>55.0</td>\n",
" <td>1</td>\n",
" <td>58.80</td>\n",
" <td>Placed</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>79.33</td>\n",
" <td>1</td>\n",
" <td>78.33</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>77.48</td>\n",
" <td>2</td>\n",
" <td>Yes</td>\n",
" <td>86.5</td>\n",
" <td>0</td>\n",
" <td>66.28</td>\n",
" <td>Placed</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" gender ssc_p ssc_b hsc_p hsc_b hsc_s degree_p degree_t
workex \\\n",
"0 0 67.00 0 91.00 0 1 58.00 2 No \
n",
"1 0 79.33 1 78.33 0 2 77.48 2 Yes \
n",
"\n",
" etest_p specialisation mba_p status \n",
"0 55.0 1 58.80 Placed \n",
"1 86.5 0 66.28 Placed "
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['No', 'Yes'], dtype=object)"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['workex'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"data['workex'] = data['workex'].map({'Yes':1,'No':0})"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>gender</th>\n",
" <th>ssc_p</th>\n",
" <th>ssc_b</th>\n",
" <th>hsc_p</th>\n",
" <th>hsc_b</th>\n",
" <th>hsc_s</th>\n",
" <th>degree_p</th>\n",
" <th>degree_t</th>\n",
" <th>workex</th>\n",
" <th>etest_p</th>\n",
" <th>specialisation</th>\n",
" <th>mba_p</th>\n",
" <th>status</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>67.00</td>\n",
" <td>0</td>\n",
" <td>91.00</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>58.00</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>55.0</td>\n",
" <td>1</td>\n",
" <td>58.80</td>\n",
" <td>Placed</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>79.33</td>\n",
" <td>1</td>\n",
" <td>78.33</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>77.48</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>86.5</td>\n",
" <td>0</td>\n",
" <td>66.28</td>\n",
" <td>Placed</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" gender ssc_p ssc_b hsc_p hsc_b hsc_s degree_p degree_t
workex \\\n",
"0 0 67.00 0 91.00 0 1 58.00 2
0 \n",
"1 0 79.33 1 78.33 0 2 77.48 2
1 \n",
"\n",
" etest_p specialisation mba_p status \n",
"0 55.0 1 58.80 Placed \n",
"1 86.5 0 66.28 Placed "
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['Placed', 'Not Placed'], dtype=object)"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['status'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [],
"source": [
"data['status'] = data['status'].map({'Placed':1,'Not Placed':0})"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>gender</th>\n",
" <th>ssc_p</th>\n",
" <th>ssc_b</th>\n",
" <th>hsc_p</th>\n",
" <th>hsc_b</th>\n",
" <th>hsc_s</th>\n",
" <th>degree_p</th>\n",
" <th>degree_t</th>\n",
" <th>workex</th>\n",
" <th>etest_p</th>\n",
" <th>specialisation</th>\n",
" <th>mba_p</th>\n",
" <th>status</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>67.00</td>\n",
" <td>0</td>\n",
" <td>91.00</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>58.00</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>55.0</td>\n",
" <td>1</td>\n",
" <td>58.80</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>79.33</td>\n",
" <td>1</td>\n",
" <td>78.33</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>77.48</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>86.5</td>\n",
" <td>0</td>\n",
" <td>66.28</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" <td>65.00</td>\n",
" <td>1</td>\n",
" <td>68.00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>64.00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>75.0</td>\n",
" <td>0</td>\n",
" <td>57.80</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0</td>\n",
" <td>56.00</td>\n",
" <td>1</td>\n",
" <td>52.00</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>52.00</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>66.0</td>\n",
" <td>1</td>\n",
" <td>59.43</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>85.80</td>\n",
" <td>1</td>\n",
" <td>73.60</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>73.30</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>96.8</td>\n",
" <td>0</td>\n",
" <td>55.50</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" gender ssc_p ssc_b hsc_p hsc_b hsc_s degree_p degree_t
workex \\\n",
"0 0 67.00 0 91.00 0 1 58.00 2
0 \n",
"1 0 79.33 1 78.33 0 2 77.48 2
1 \n",
"2 0 65.00 1 68.00 1 0 64.00 1
0 \n",
"3 0 56.00 1 52.00 1 2 52.00 2
0 \n",
"4 0 85.80 1 73.60 1 1 73.30 1
0 \n",
"\n",
" etest_p specialisation mba_p status \n",
"0 55.0 1 58.80 1 \n",
"1 86.5 0 66.28 1 \n",
"2 75.0 0 57.80 1 \n",
"3 66.0 1 59.43 0 \n",
"4 96.8 0 55.50 1 "
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 9. Store Feature Matrix In X and Response(Target) In Vector y"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['gender', 'ssc_p', 'ssc_b', 'hsc_p', 'hsc_b', 'hsc_s', 'degree_p',\
n",
" 'degree_t', 'workex', 'etest_p', 'specialisation', 'mba_p',
'status'],\n",
" dtype='object')"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.columns"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"X = data.drop('status',axis=1)\n",
"y= data['status']"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 1\n",
"1 1\n",
"2 1\n",
"3 0\n",
"4 1\n",
" ..\n",
"210 1\n",
"211 1\n",
"212 1\n",
"213 1\n",
"214 0\n",
"Name: status, Length: 215, dtype: int64"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 10. Splitting The Dataset Into The Training Set And Test Set"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [

"X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20,random_state=42)
"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 11. Import The models"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn import svm\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.ensemble import GradientBoostingClassifier"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 12. Model Training"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"GradientBoostingClassifier()"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lr = LogisticRegression()\n",
"lr.fit(X_train,y_train)\n",
"\n",
"svm = svm.SVC()\n",
"svm.fit(X_train,y_train)\n",
"\n",
"knn=KNeighborsClassifier()\n",
"knn.fit(X_train,y_train)\n",
"\n",
"dt=DecisionTreeClassifier()\n",
"dt.fit(X_train,y_train)\n",
"\n",
"rf=RandomForestClassifier()\n",
"rf.fit(X_train,y_train)\n",
"\n",
"gb=GradientBoostingClassifier()\n",
"gb.fit(X_train,y_train)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 13. Prediction on Test Data"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [],
"source": [
"y_pred1 = lr.predict(X_test)\n",
"y_pred2 = svm.predict(X_test)\n",
"y_pred3 = knn.predict(X_test)\n",
"y_pred4 = dt.predict(X_test)\n",
"y_pred5 = rf.predict(X_test)\n",
"y_pred6 = gb.predict(X_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 14. Evaluating the Algorithms"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import accuracy_score"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [],
"source": [
"score1=accuracy_score(y_test,y_pred1)\n",
"score2=accuracy_score(y_test,y_pred2)\n",
"score3=accuracy_score(y_test,y_pred3)\n",
"score4=accuracy_score(y_test,y_pred4)\n",
"score5=accuracy_score(y_test,y_pred5)\n",
"score6=accuracy_score(y_test,y_pred6)"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.8837209302325582 0.7674418604651163 0.7906976744186046 0.8372093023255814
0.7906976744186046 0.813953488372093\n"
]
}
],
"source": [
"print(score1,score2,score3,score4,score5,score6)"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [],
"source": [
"final_data = pd.DataFrame({'Models':['LR','SVC','KNN','DT','RF','GB'],\n",
" 'ACC':[score1*100,\n",
" score2*100,\n",
" score3*100,\n",
" score4*100,\n",
" score5*100,score6*100]})"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Models</th>\n",
" <th>ACC</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>LR</td>\n",
" <td>88.372093</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>SVC</td>\n",
" <td>76.744186</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>KNN</td>\n",
" <td>79.069767</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>DT</td>\n",
" <td>83.720930</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>RF</td>\n",
" <td>79.069767</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>GB</td>\n",
" <td>81.395349</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Models ACC\n",
"0 LR 88.372093\n",
"1 SVC 76.744186\n",
"2 KNN 79.069767\n",
"3 DT 83.720930\n",
"4 RF 79.069767\n",
"5 GB 81.395349"
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"final_data"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [],
"source": [
"import seaborn as sns"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:xlabel='Models', ylabel='ACC'>"
]
},
"execution_count": 78,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png":
"iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIH
ZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/
Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAAARD0lEQVR4nO3deZBlZX3G8e8Do7K4sLUUAjKoRIOKCFOghY
JxNEFFIUopBHG0sLBS4gLE4FYCllZiEFGUqCOgY6kIwQXcFwRBY9AZQWBAZQsIYWlK0CBEBH75457Rpqe7Z
xrm3Nsz7/dTNdX3vOece3+n7vRz337vOe9JVSFJasd6oy5AkjRcBr8kNcbgl6TGGPyS1BiDX5IaM2/
UBayOLbbYoubPnz/
qMiRprbJs2bLbqmpscvtaEfzz589n6dKloy5DktYqSa6bqt2hHklqjMEvSY0x+CWpMQa/
JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JasxaceXuVHZ922dHXcKsLTvuNaMuQZLs8UtSawx+SWqMwS9JjTH
4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqzFo7ZYM0l/
1wz71GXcKs7HX+D0ddgobIHr8kNcbgl6TGGPyS1BiDX5Ia0+uXu0kOB14PFHAp8DpgK+CLwObAMuDgqrqnz
zokaXW9/9X7j7qEWXvX586c1fa99fiTbA28GVhQVU8D1gcOAD4AnFBVTwJuBw7pqwZJ0sr6HuqZB2yYZB6w
EXAT8HxgxcfTEmC/nmuQJE3QW/BX1Y3AB4HrGQT+7xgM7dxRVfd2m90AbD3V/
kkOTbI0ydLx8fG+ypSk5vQ51LMpsC+wPfA4YGNg79Xdv6oWV9WCqlowNjbWU5WS1J4+h3peAFxbVeNV9Sfg
y8AewCbd0A/ANsCNPdYgSZqkz7N6rgeelWQj4G5gIbAUOBfYn8GZPYuAs3qsQXPUHh/dY9QlzNqP3/
TjUZcwZ3zsyK+NuoRZOez4l466hDmlzzH+Cxl8iftzBqdyrgcsBo4CjkhyFYNTOk/pqwZJ0sp6PY+/
qo4Gjp7UfA2wW5+vK0manlfuSlJjDH5Jaozz8c9R17/36aMuYdYe/
55LR12CpNVgj1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/
JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/
BLUmMMfklqjMEvSY0x+CWpMQa/
JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/
JLUGINfkhpj8EtSYwx+SWpMr8GfZJMkZyb5ZZIrkjw7yWZJvpfkyu7npn3WIEl6oL57/
B8Bvl1VTwGeAVwBvB04p6p2AM7pliVJQ9Jb8Cd5DLAncApAVd1TVXcA+wJLus2WAPv1VYMkaWV99vi3B8aB
Tye5KMnJSTYGtqyqm7ptbga2nGrnJIcmWZpk6fj4eI9lSlJb+gz+ecAuwMer6pnAH5g0rFNVBdRUO1fV4qp
aUFULxsbGeixTktrSZ/DfANxQVRd2y2cy+CC4JclWAN3PW3usQZI0SW/
BX1U3A79J8uSuaSFwOXA2sKhrWwSc1VcNkqSVzev5+d8EfD7Jw4FrgNcx+LA5I8khwHXAK3uuQZI0Qa/
BX1UXAwumWLWwz9eVJE3PK3clqTEGvyQ1xuCXpMYY/
JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/
BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxkwb/EmOS/
KGKdrfkORf+y1LktSXmXr8zwcWT9H+KWCffsqRJPVtpuB/RFXV5Maquh9IfyVJkvo0U/
DfnWSHyY1d2939lSRJ6tO8Gda9B/
hWkvcBy7q2BcA7gLf2XJckqSfTBn9VfSvJfsDbgDd1zZcBr6iqS4dQmySpB9MGf5INgFuqatGk9rEkG1TV/
/VenSRpjZtpjP9E4LlTtD8HOKGfciRJfZsp+Hetqi9PbqyqrwB79leSJKlPMwX/
Rg9yP0nSHDZTgN+aZLfJjV3beH8lSZL6NNPpnG8DzkjyGR54OudrgAN6rkuS1JNpe/
xV9VNgdwZX6b4WWHF2zyIG4S9JWgvN1OOnqm4Bjk6yC3Agg9DfE/
jSEGqTJPVgpvP4/4pB2B8I3AacDqSq/mZItUmSejBTj/
+XwAXAPlV1FUCSw4dSlSSpNzOd1fNy4Cbg3CSfSrIQZ+WUpLXeTF/ufrWqDgCeApzLYGK2xyb5eJK/
HVJ9kqQ1bJUXYlXVH6rqC1X1UmAb4CLgqN4rkyT1YlZX4FbV7VW1uKoWru4+SdZPclGSr3fL2ye5MMlVSU5
P8vDZFi1JevCGMfXCW4ArJix/ADihqp4E3A4cMoQaJEmdXoM/
yTbAS4CTu+UwuJfvmd0mS4D9+qxBkvRAfff4Pwz8M3B/
t7w5cEdV3dst3wBsPdWOSQ5NsjTJ0vFxpwaSpDWlt+BPsg9wa1UtW+XGU+i+S1hQVQvGxsbWcHWS1K4Zp2x
4iPYAXpbkxcAGwKOBjwCbJJnX9fq3AW7ssQZJ0iS99fir6h1VtU1VzWcwm+cPquogBtcE7N9ttgg4q68aJE
krG8UNVY4CjkhyFYMx/1NGUIMkNavPoZ4/q6rzgPO6x9cAK93gRZI0HN5CUZIaY/
BLUmMMfklqjMEvSY0x+CWpMQa/
JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/
JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/
BLUmMMfklqjMEvSY0x+CWpMQa/
JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1prfgT7JtknOTXJ5keZK3dO2bJflekiu
7n5v2VYMkaWV99vjvBY6sqh2BZwFvTLIj8HbgnKraATinW5YkDUlvwV9VN1XVz7vH/
wtcAWwN7Ass6TZbAuzXVw2SpJUNZYw/
yXzgmcCFwJZVdVO36mZgy2n2OTTJ0iRLx8fHh1GmJDWh9+BP8kjgS8Bbq+r3E9dVVQE11X5VtbiqFlTVgrG
xsb7LlKRm9Br8SR7GIPQ/X1Vf7ppvSbJVt34r4NY+a5AkPVCfZ/
UEOAW4oqo+NGHV2cCi7vEi4Ky+apAkrWxej8+9B3AwcGmSi7u2dwL/
CpyR5BDgOuCVPdYgSZqkt+Cvqh8BmWb1wr5eV5I0M6/claTGGPyS1BiDX5IaY/
BLUmMMfklqjMEvSY0x+CWpMQa/
JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/
JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/
BLUmMMfklqjMEvSY0x+CWpMQa/
JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNGUnwJ9k7ya+SXJXk7aOoQZJaNfTgT7I+cBLwImBH4MA
kOw67Dklq1Sh6/LsBV1XVNVV1D/BFYN8R1CFJTUpVDfcFk/2Bvavq9d3ywcDuVXXYpO0OBQ7tFp8M/
GqIZW4B3DbE1xu2dfn41uVjA49vbTfs49uuqsYmN84bYgGzUlWLgcWjeO0kS6tqwSheexjW5eNbl48NPL61
3Vw5vlEM9dwIbDtheZuuTZI0BKMI/
p8BOyTZPsnDgQOAs0dQhyQ1aehDPVV1b5LDgO8A6wOnVtXyYdexCiMZYhqidfn41uVjA49vbTcnjm/
oX+5KkkbLK3clqTEGvyQ1pvngT3LnFG3HJLkxycVJLk9y4ChqezCSvCvJ8iSXdPUfneRfJm2zc5IrusePTP
LJJFcnWZbkvCS7j6b6mU18r5K8OMmvk2zXvV93JXnsNNtWkuMnLP9TkmOGVviDkOS+7v1bnuQXSY5Msl6Sv
+vaL05yZzf1ycVJPjvqmmdrwjFeluRrSTbp2ucnuXvCcV7cnQiy1kiyZZIvJLmm+736SZK/T/K8JL/
rjumSJN+f+P92WJoP/hmcUFU7M7iq+JNJHjbielYpybOBfYBdqmon4AXAucCrJm16AHBa9/
hk4LfADlW1K/A6BheZzFlJFgInAi+qquu65tuAI6fZ5Y/
Ay5PM6eOa5O6q2rmqngq8kMEUJ0dX1Xe69p2BpcBB3fJrRlnsg7TiGJ/G4P/gGyesu3rFcXb/
7hlRjbOWJMBXgfOr6gnd79UBDE5dB7igO6adGJzl+Mapn6k/
Bv8qVNWVwF3ApqOuZTVsBdxWVX8EqKrbqup84PZJvfhXAqcleSKwO/Duqrq/
2+faqvrGsAtfXUn2BD4F7FNVV09YdSrwqiSbTbHbvQzOpjh8CCWucVV1K4Or2A/
rQmVd9BNg61EXsYY8H7inqj6xoqGqrquqj07cqHsvHwXcPuT6DP5VSbILcGX3yzfXfRfYthsC+fcke3Xtpz
HocZDkWcBvuw+0pwIXV9V9oyl31h7BoCe1X1X9ctK6OxmE/
1um2fck4KAkj+mvvP5U1TUMTn8e+rBA37qJGxfywOt5njhhmOekEZX2YD0V+PkM65+b5GLgegZ/
lZ86jKImMvind3iS5cCFwPtHXczqqKo7gV0Z9A7HgdOTvBY4Hdg/
yXo8cJhnbfMn4D+BQ6ZZfyKwKMmjJq+oqt8DnwXe3F95mqUNuwC8GdgS+N6EdROHeoY+FLImJTmp+57mZ13
TiqGebYFPA/827JoM/
umd0I2vvgI4JckGoy5odVTVfVV1XlUdDRwGvKKqfgNcC+zF4HhO7zZfDjyj63GtDe5nMEy1W5J3Tl5ZVXcA
X2D6MdMPM/jQ2Lin+nqT5AnAfcDa8Jfn6rq7+65iOyCMYKy7J8uBXVYsdB9cC4GVJktj8FfOnkOq688M/
lWoqrMZfIm2aNS1rEqSJyfZYULTzsCKLz9PA04ArqmqGwC6MfKlwLErxo67MypeMryqZ6eq7gJewmDYZqqe
/4eANzDFVelV9VvgDKb/
i2FOSjIGfAL4WK2DV1x27+mbgSOTzNmJI2fhB8AGSf5xQttG02z7HODqadb1xuCHjZLcMOHfEVNs817giG6
oZC57JLCkOwX1EgY3ujmmW/cfDMYeJw/
zvJ7Bn9lXJbkM+AxzvFfZBfjewLuTvGzSutuArzD4PmAqxzPHz1rqbLjidE7g+wy+vzl2xDX1pqouAi4B1p
pTp6fTfTjvB+yV5NokPwWWAEd1mzy3e29/ARzM9Gej9cYpGySpMXO9BytJWsMMfklqjMEvSY0x+CWpMQa/
JDXG4FfTupk7PzdheV6S8SRfn+Xz/PeqJoFbnW2kYTD41bo/AE9LsmG3/ELgxhHWI/
XO4JfgmwyuBobBBUR/
vsgtyWZJvtrNnf5fSXbq2jdP8t1uvvyTGUw5sGKfVyf5aXeRzicnT4mRZOMk3+jmb7ksyeRps6VeGfwSfBE
4oJuPaScGE/OtcCxwUTd3+jsZTPQGcDTwo24+p68AjwdI8tcM7n+wRzcPzX3AQZNeb2/gf6rqGd1c9N/
u5aikaawL82JID0lVXZJkPoPe/
jcnrX4Og4ntqKofdD39RzOYWOvlXfs3kqyYU30hgxlSf9ZNf7QhK0+BcSlwfJIPAF+vqgvW/
FFJ0zP4pYGzgQ8CzwM2fwjPE2BJVb1jug2q6tfdfR5eDLwvyTlV9d6H8JrSrDjUIw2cChxbVZdOar+Abqgm
yfMY3OHs98D5wD907S/iL3doO4fBvQ8e263bLMl2E58wyeOAu6rqc8BxTJjCVxoGe/
wS0E1VfeIUq44BTu1mO72Lv0zPfSyD21cuZ3BzmOu757k8ybuB73azuf6JwTzz1014zqcDxyW5v1s/
cfpeqXfOzilJjXGoR5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxvw/
EJxCZYVRTqwAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"sns.barplot(final_data['Models'],final_data['ACC'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 15. Prediction on New Data"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
"new_data = pd.DataFrame({\n",
" 'gender':0,\n",
" 'ssc_p':67.0,\n",
" 'ssc_b':0,\n",
" 'hsc_p':91.0,\n",
" 'hsc_b':0,\n",
" 'hsc_s':1,\n",
" 'degree_p':58.0,\n",
" 'degree_t':2,\n",
" 'workex':0,\n",
" 'etest_p':55.0,\n",
" 'specialisation':1,\n",
" 'mba_p':58.8, \n",
"},index=[0])"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"LogisticRegression()"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lr= LogisticRegression()\n",
"lr.fit(X,y)"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Placed\n",
"You will be placed with probability of 0.96\n"
]
}
],
"source": [
"p=lr.predict(new_data)\n",
"prob=lr.predict_proba(new_data)\n",
"if p==1:\n",
" print('Placed')\n",
" print(f\"You will be placed with probability of {prob[0][1]:.2f}\")\n",
"else:\n",
" print(\"Not-placed\")"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.04186191, 0.95813809]])"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prob"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 16. Save Model Using Joblib"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [],
"source": [
"import joblib"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['model_campus_placement']"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"joblib.dump(lr,'model_campus_placement')"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [],
"source": [
"model = joblib.load('model_campus_placement')"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1], dtype=int64)"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.predict(new_data)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### GUI"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"from tkinter import *\n",
"import joblib\n",
"import numpy as np\n",
"from sklearn import *\n",
"import tkinter.font as font\n",
"import pandas as pd\n",
"\n",
"def show_entry_fields():\n",
" text = clicked.get()\n",
" if text == \"Male\":\n",
" p1=1\n",
" print(p1)\n",
" else:\n",
" p1=0\n",
" print(p1)\n",
" p2=float(e2.get())\n",
" text = clicked1.get()\n",
" if text == \"Central\":\n",
" p3=1\n",
" print(p3)\n",
" else:\n",
" p3=0\n",
" print(p3)\n",
" p4=float(e4.get())\n",
" text = clicked6.get()\n",
" if text == \"Central\":\n",
" p5=1\n",
" print(p3)\n",
" else:\n",
" p5=0\n",
" print(p3)\n",
" text = clicked2.get()\n",
" if text == \"Science\":\n",
" p6=2\n",
" print(p6)\n",
" elif text == \"Commerce\":\n",
" p6=1\n",
" print(p6)\n",
" else:\n",
" p6=0\n",
" print(p6)\n",
" p7=float(e7.get())\n",
" text = clicked3.get()\n",
" if text == \"Sci&Tech\":\n",
" p8=2\n",
" print(p8)\n",
" elif text==\"Comm&Mgmt\":\n",
" p8=1\n",
" print(p8)\n",
" else:\n",
" p8=0\n",
" print(p8)\n",
" text = clicked4.get()\n",
" if text == \"Yes\":\n",
" p9=1\n",
" print(p3)\n",
" else:\n",
" p9=0\n",
" print(p3)\n",
" p10=float(e10.get())\n",
" text = clicked5.get()\n",
" if text == \"Mkt&HR\":\n",
" p11=1\n",
" print(p11)\n",
" else:\n",
" p11=0\n",
" print(p11)\n",
" p12=float(e12.get())\n",
"\n",
" model = joblib.load('model_campus_placement')\n",
" new_data = pd.DataFrame({\n",
" 'gender':p1,\n",
" 'ssc_p':p2,\n",
" 'ssc_b':p3,\n",
" 'hsc_p':p4,\n",
" 'hsc_b':p5,\n",
" 'hsc_s':p6,\n",
" 'degree_p':p7,\n",
" 'degree_t':p8,\n",
" 'workex':p9,\n",
" 'etest_p':p10,\n",
" 'specialisation':p11,\n",
" 'mba_p':p12, \n",
"},index=[0])\n",
" result=model.predict(new_data)\n",
" result1=model.predict_proba(new_data)\n",
" \n",
" if result[0] == 0:\n",
" Label(master, text=\"Can't Placed\").grid(row=31)\n",
" else:\n",
" Label(master, text=\"Student Will be Placed With Probability
of\",font=(\"Arial\", 15)).grid(row=31)\n",
" Label(master, text=round(result1[0][1],2)*100,font=(\"Arial\",
15)).grid(row=33)\n",
" Label(master, text=\"Percent\",font=(\"Arial\", 15)).grid(row=34)\n",
"\n",
"master = Tk()\n",
"master.title(\"Campus Placement Prediction System\")\n",
"\n",
"\n",
"label = Label(master, text = \"Campus Placement Prediction System\"\n",
" , bg = \"green\", fg = \"white\",font=(\"Arial\",
20)) \\\n",
" .grid(row=0,columnspan=2)\n",
"\n",
"\n",
"Label(master, text=\"Gender\",font=(\"Arial\", 15)).grid(row=1)\n",
"Label(master, text=\"Secondary Education percentage- 10th
Grade\",font=(\"Arial\", 15)).grid(row=2)\n",
"Label(master, text=\"Board of Education\",font=(\"Arial\", 15)).grid(row=3)\
n",
"Label(master, text=\"Higher Secondary Education percentage- 12th
Grade\",font=(\"Arial\", 15)).grid(row=4)\n",
"Label(master, text=\"Board of Education\",font=(\"Arial\", 15)).grid(row=5)\
n",
"Label(master, text=\"Specialization in Higher Secondary
Education\",font=(\"Arial\", 15)).grid(row=6)\n",
"Label(master, text=\"Degree Percentage\",font=(\"Arial\", 15)).grid(row=7)\n",
"Label(master, text=\"Under Graduation(Degree type)- Field of degree
education\",font=(\"Arial\", 15)).grid(row=8)\n",
"Label(master, text=\"Work Experience\",font=(\"Arial\", 15)).grid(row=9)\n",
"Label(master, text=\"Enter test percentage\",font=(\"Arial\",
15)).grid(row=10)\n",
"Label(master, text=\"branch specialization\",font=(\"Arial\",
15)).grid(row=11)\n",
"Label(master, text=\"MBA percentage\",font=(\"Arial\", 15)).grid(row=12)\n",
"clicked = StringVar()\n",
"options = [\"Male\",\"Female\"]\n",
"\n",
"clicked1 = StringVar()\n",
"options1 = [\"Central\",\"Others\"]\n",
"\n",
"clicked2 = StringVar()\n",
"options2 = [\"Science\",\"Commerce\",\"Arts\"]\n",
"\n",
"clicked3 = StringVar()\n",
"options3 = [\"Sci&Tech\",\"Comm&Mgmt\",\"Others\"]\n",
"\n",
"clicked4 = StringVar()\n",
"options4 = [\"Yes\",\"No\"]\n",
"\n",
"clicked5 = StringVar()\n",
"options5 = [\"Mkt&HR\",\"Mky&Fin\"]\n",
"\n",
"clicked6 = StringVar()\n",
"options6 = [\"Central\",\"Others\"]\n",
"e1 = OptionMenu(master , clicked , *options )\n",
"e1.configure(width=13)\n",
"e2 = Entry(master)\n",
"e3 = OptionMenu(master , clicked1 , *options1 )\n",
"e3.configure(width=13)\n",
"e4 = Entry(master)\n",
"e5 = OptionMenu(master , clicked6 , *options6)\n",
"e5.configure(width=13)\n",
"e6 = OptionMenu(master , clicked2 , *options2)\n",
"e6.configure(width=13)\n",
"e7 = Entry(master)\n",
"e8 = OptionMenu(master , clicked3 , *options3)\n",
"e8.configure(width=13)\n",
"e9 = OptionMenu(master , clicked4 , *options4)\n",
"e9.configure(width=13)\n",
"e10 = Entry(master)\n",
"e11 = OptionMenu(master , clicked5 , *options5)\n",
"e11.configure(width=13)\n",
"e12 = Entry(master)\n",
"\n",
"\n",
"e1.grid(row=1, column=1)\n",
"e2.grid(row=2, column=1)\n",
"e3.grid(row=3, column=1)\n",
"e4.grid(row=4, column=1)\n",
"e5.grid(row=5, column=1)\n",
"e6.grid(row=6, column=1)\n",
"e7.grid(row=7, column=1)\n",
"e8.grid(row=8, column=1)\n",
"e9.grid(row=9, column=1)\n",
"e10.grid(row=10, column=1)\n",
"e11.grid(row=11, column=1)\n",
"e12.grid(row=12, column=1)\n",
"buttonFont = font.Font(family='Helvetica', size=16, weight='bold')\n",
"Button(master, text='Predict',height= 1,
width=8,activebackground='#00ff00',font=buttonFont,bg='black',
fg='white',command=show_entry_fields).grid()\n",
"\n",
"mainloop()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

You might also like