CSTSGTCODE

The document loads data on mall customers from a CSV file and performs exploratory data analysis and clustering. It analyzes characteristics like age, income, spending score distributions. Visualizations include histograms, box plots, bar plots to show frequency distributions. K-means clustering is used to segment customers into 5 groups based on age, income, spending features. The clusters are visualized in 3D scatter plots.

Uploaded by

OXEN Enterprises

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

7 views3 pages

CSTSGTCODE

Uploaded by

OXEN Enterprises

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 3

import numpy as np

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
print(os.listdir("../input"))

df=pd.read_csv("Mall_Customers.csv")
df.info()

df.columns

# First 5 rows of our data

df.head()

df.drop(["CustomerID"], axis = 1, inplace=True)

plt.figure(figsize=(10,6))
plt.title("Ages Frequency")
sns.axes_style("dark")
sns.violinplot(y=df["Age"])
plt.show()

plt.figure(figsize=(15,6))
plt.subplot(1,2,1)
sns.boxplot(y=df["Spending Score (1-100)"], color="red")
plt.subplot(1,2,2)
sns.boxplot(y=df["Annual Income (k$)"])
plt.show()

genders = df.Gender.value_counts()
sns.set_style("darkgrid")
plt.figure(figsize=(10,4))
sns.barplot(x=genders.index, y=genders.values)
plt.show()

age18_25 = df.Age[(df.Age <= 25) & (df.Age >= 18)]

age26_35 = df.Age[(df.Age <= 35) & (df.Age >= 26)]
age36_45 = df.Age[(df.Age <= 45) & (df.Age >= 36)]
age46_55 = df.Age[(df.Age <= 55) & (df.Age >= 46)]
age55above = df.Age[df.Age >= 56]

x = ["18-25","26-35","36-45","46-55","55+"]
y =
[len(age18_25.values),len(age26_35.values),len(age36_45.values),len(age46_55.values
),len(age55above.values)]

plt.figure(figsize=(15,6))
sns.barplot(x=x, y=y, palette="rocket")
plt.title("Number of Customer and Ages")
plt.xlabel("Age")
plt.ylabel("Number of Customer")
plt.show()

ss1_20 = df["Spending Score (1-100)"][(df["Spending Score (1-100)"] >= 1) &

(df["Spending Score (1-100)"] <= 20)]
ss21_40 = df["Spending Score (1-100)"][(df["Spending Score (1-100)"] >= 21) &
(df["Spending Score (1-100)"] <= 40)]
ss41_60 = df["Spending Score (1-100)"][(df["Spending Score (1-100)"] >= 41) &
(df["Spending Score (1-100)"] <= 60)]
ss61_80 = df["Spending Score (1-100)"][(df["Spending Score (1-100)"] >= 61) &
(df["Spending Score (1-100)"] <= 80)]
ss81_100 = df["Spending Score (1-100)"][(df["Spending Score (1-100)"] >= 81) &
(df["Spending Score (1-100)"] <= 100)]

ssx = ["1-20", "21-40", "41-60", "61-80", "81-100"]

ssy = [len(ss1_20.values), len(ss21_40.values), len(ss41_60.values),
len(ss61_80.values), len(ss81_100.values)]

plt.figure(figsize=(15,6))
sns.barplot(x=ssx, y=ssy, palette="nipy_spectral_r")
plt.title("Spending Scores")
plt.xlabel("Score")
plt.ylabel("Number of Customer Having the Score")
plt.show()

ai0_30 = df["Annual Income (k$)"][(df["Annual Income (k$)"] >= 0) & (df["Annual

Income (k$)"] <= 30)]
ai31_60 = df["Annual Income (k$)"][(df["Annual Income (k$)"] >= 31) & (df["Annual
Income (k$)"] <= 60)]
ai61_90 = df["Annual Income (k$)"][(df["Annual Income (k$)"] >= 61) & (df["Annual
Income (k$)"] <= 90)]
ai91_120 = df["Annual Income (k$)"][(df["Annual Income (k$)"] >= 91) & (df["Annual
Income (k$)"] <= 120)]
ai121_150 = df["Annual Income (k$)"][(df["Annual Income (k$)"] >= 121) &
(df["Annual Income (k$)"] <= 150)]

aix = ["$ 0 - 30,000", "$ 30,001 - 60,000", "$ 60,001 - 90,000", "$ 90,001 -
120,000", "$ 120,001 - 150,000"]
aiy = [len(ai0_30.values), len(ai31_60.values), len(ai61_90.values),
len(ai91_120.values), len(ai121_150.values)]

plt.figure(figsize=(15,6))
sns.barplot(x=aix, y=aiy, palette="Set2")
plt.title("Annual Incomes")
plt.xlabel("Income")
plt.ylabel("Number of Customer")
plt.show()

from mpl_toolkits.mplot3d import Axes3D

sns.set_style("white")
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(df.Age, df["Annual Income (k$)"], df["Spending Score (1-100)"],
c='blue', s=60)
ax.view_init(30, 185)
plt.xlabel("Age")
plt.ylabel("Annual Income (k$)")
ax.set_zlabel('Spending Score (1-100)')
plt.show()

from sklearn.cluster import KMeans

wcss = []
for k in range(1,11):
kmeans = KMeans(n_clusters=k, init="k-means++")
kmeans.fit(df.iloc[:,1:])
wcss.append(kmeans.inertia_)
plt.figure(figsize=(12,6))
plt.grid()
plt.plot(range(1,11),wcss, linewidth=2, color="red", marker ="8")
plt.xlabel("K Value")
plt.xticks(np.arange(1,11,1))
plt.ylabel("WCSS")
plt.show()

km = KMeans(n_clusters=5)
clusters = km.fit_predict(df.iloc[:,1:])

df["label"] = clusters

from mpl_toolkits.mplot3d import Axes3D

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(df.Age[df.label == 0], df["Annual Income (k$)"][df.label == 0],
df["Spending Score (1-100)"][df.label == 0], c='blue', s=60)
ax.scatter(df.Age[df.label == 1], df["Annual Income (k$)"][df.label == 1],
df["Spending Score (1-100)"][df.label == 1], c='red', s=60)
ax.scatter(df.Age[df.label == 2], df["Annual Income (k$)"][df.label == 2],
df["Spending Score (1-100)"][df.label == 2], c='green', s=60)
ax.scatter(df.Age[df.label == 3], df["Annual Income (k$)"][df.label == 3],
df["Spending Score (1-100)"][df.label == 3], c='orange', s=60)
ax.scatter(df.Age[df.label == 4], df["Annual Income (k$)"][df.label == 4],
df["Spending Score (1-100)"][df.label == 4], c='purple', s=60)
ax.view_init(30, 185)
plt.xlabel("Age")
plt.ylabel("Annual Income (k$)")
ax.set_zlabel('Spending Score (1-100)')
plt.show()

Heirarchical Clustering.ipynb - Colab
No ratings yet
Heirarchical Clustering.ipynb - Colab
4 pages
Python Data Analysis and Visualization 100 Practical Exercises With Results and Explanations (Yuka, Horikawa Yui, Kirigaya Kouta Etc.) (Z-Library)
No ratings yet
Python Data Analysis and Visualization 100 Practical Exercises With Results and Explanations (Yuka, Horikawa Yui, Kirigaya Kouta Etc.) (Z-Library)
453 pages
Jupyter Notebook Project DM Nikita Chaturvedi 25.07.2021
100% (5)
Jupyter Notebook Project DM Nikita Chaturvedi 25.07.2021
83 pages
Clustering Algorithms SciKit Learn 1705740354
No ratings yet
Clustering Algorithms SciKit Learn 1705740354
22 pages
Vijay Shankar Customer Churn Random Forest Hyperparameter Tuning
No ratings yet
Vijay Shankar Customer Churn Random Forest Hyperparameter Tuning
40 pages
Seaborn Besant
No ratings yet
Seaborn Besant
27 pages
Customer Retail Shopping Analysis 1686591558
No ratings yet
Customer Retail Shopping Analysis 1686591558
45 pages
End To End Machine Learning Problem
No ratings yet
End To End Machine Learning Problem
20 pages
Data Science Project VI - Ipynb - Colaboratory
No ratings yet
Data Science Project VI - Ipynb - Colaboratory
15 pages
Churn Prediction Model
No ratings yet
Churn Prediction Model
36 pages
ML Assignment No 5
No ratings yet
ML Assignment No 5
11 pages
06 Seaborn
No ratings yet
06 Seaborn
13 pages
Exploratory Data Analysis66
No ratings yet
Exploratory Data Analysis66
17 pages
Cognizant's Artificial Intelligence Task 1
No ratings yet
Cognizant's Artificial Intelligence Task 1
14 pages
EDA Plots Code
No ratings yet
EDA Plots Code
13 pages
Case Study 3 Aman
No ratings yet
Case Study 3 Aman
9 pages
Exploratory Data Analysis
No ratings yet
Exploratory Data Analysis
10 pages
Reading Data: #Importing Required Libraries
No ratings yet
Reading Data: #Importing Required Libraries
16 pages
Fds Slips
No ratings yet
Fds Slips
6 pages
Diwali Sales Analysis
No ratings yet
Diwali Sales Analysis
14 pages
Assignmnet 5
No ratings yet
Assignmnet 5
11 pages
KMEANS
No ratings yet
KMEANS
13 pages
PMA_Experiment_2
No ratings yet
PMA_Experiment_2
6 pages
prac2
No ratings yet
prac2
11 pages
Assignment ....
No ratings yet
Assignment ....
8 pages
End semester Answer key format-fods
No ratings yet
End semester Answer key format-fods
8 pages
6
No ratings yet
6
4 pages
Practical 3
No ratings yet
Practical 3
8 pages
Mainpy (Customer Segmentation)
No ratings yet
Mainpy (Customer Segmentation)
6 pages
prac2
No ratings yet
prac2
11 pages
BTECH1010622_LAB4
No ratings yet
BTECH1010622_LAB4
4 pages
IntroQugates
No ratings yet
IntroQugates
4 pages
Seaborn Final
No ratings yet
Seaborn Final
67 pages
Practical 5
No ratings yet
Practical 5
6 pages
BIDA practical print
No ratings yet
BIDA practical print
56 pages
Expt6total.i (2) - JupyterLab
No ratings yet
Expt6total.i (2) - JupyterLab
7 pages
Churn For Bank Customers
No ratings yet
Churn For Bank Customers
28 pages
EDP-3[2]
No ratings yet
EDP-3[2]
16 pages
Lecture - 7 - Practical - DBSCAN Clustering in Python
No ratings yet
Lecture - 7 - Practical - DBSCAN Clustering in Python
3 pages
Practical D.V
No ratings yet
Practical D.V
13 pages
Axe Submission
No ratings yet
Axe Submission
4 pages
Customer Segmentation PDF
No ratings yet
Customer Segmentation PDF
18 pages
ETAP Presentation
0% (1)
ETAP Presentation
63 pages
Python Magazine Survey
No ratings yet
Python Magazine Survey
7 pages
Data Visualization With Python
No ratings yet
Data Visualization With Python
34 pages
vertopal.com_Project_13_Customer_Segmentation_using_K_Means_Clustering
No ratings yet
vertopal.com_Project_13_Customer_Segmentation_using_K_Means_Clustering
9 pages
Sea Histo
No ratings yet
Sea Histo
1 page
DF PD - Read - Excel ('Sample - Superstore - XLS') : Anjaliassignmnet - Ipy NB
No ratings yet
DF PD - Read - Excel ('Sample - Superstore - XLS') : Anjaliassignmnet - Ipy NB
23 pages
ADS2
No ratings yet
ADS2
3 pages
matplotlib
No ratings yet
matplotlib
7 pages
Data Mining Graded Assignment: Problem 1: Clustering Analysis
100% (3)
Data Mining Graded Assignment: Problem 1: Clustering Analysis
39 pages
Data Preprocessing & Visualization1
No ratings yet
Data Preprocessing & Visualization1
2 pages
K Means Clustering
100% (1)
K Means Clustering
10 pages
practice_questions2
No ratings yet
practice_questions2
2 pages
Bank Customer Churn Analysis - Jupyter Notebook
No ratings yet
Bank Customer Churn Analysis - Jupyter Notebook
11 pages
Project Sale Analysis
No ratings yet
Project Sale Analysis
8 pages
Supermarket Sales Analysis Project
No ratings yet
Supermarket Sales Analysis Project
8 pages
Mall Customer Data Analysis PDF
No ratings yet
Mall Customer Data Analysis PDF
10 pages
Diwali Sales Analysis EDA 1696347982
No ratings yet
Diwali Sales Analysis EDA 1696347982
8 pages
DSBDAL - Assignment No 9
No ratings yet
DSBDAL - Assignment No 9
12 pages
Quiz 9 - Chap 10
No ratings yet
Quiz 9 - Chap 10
3 pages
Premiun PWM720FG
No ratings yet
Premiun PWM720FG
35 pages
Deccan Clap - Woodpolish & Deco Paint Brochure
No ratings yet
Deccan Clap - Woodpolish & Deco Paint Brochure
6 pages
20 Years Speciliased Pyq Garima Goel Biological Classification
No ratings yet
20 Years Speciliased Pyq Garima Goel Biological Classification
14 pages
Baldwin 1e Ch06 PPT FINAL Accessible
No ratings yet
Baldwin 1e Ch06 PPT FINAL Accessible
26 pages
AR Reading
0% (2)
AR Reading
7 pages
1.SchedulingandStaffingModels
No ratings yet
1.SchedulingandStaffingModels
15 pages
Magic With Everyday Objects
No ratings yet
Magic With Everyday Objects
76 pages
Office Note - of Majedul Islam
No ratings yet
Office Note - of Majedul Islam
4 pages
Udah Di Mark David Gibson - Games and Simulations in Online Learning - Research and Development Frameworks (2006) Uda Dimark
No ratings yet
Udah Di Mark David Gibson - Games and Simulations in Online Learning - Research and Development Frameworks (2006) Uda Dimark
421 pages
Canadian Customs Tariff Schedule - HS 72 Iron and Steel
No ratings yet
Canadian Customs Tariff Schedule - HS 72 Iron and Steel
33 pages
de Thi Dap An
No ratings yet
de Thi Dap An
6 pages
Prabhupada
No ratings yet
Prabhupada
9 pages
ARTEMIS Road Model Description V04d 071008
No ratings yet
ARTEMIS Road Model Description V04d 071008
169 pages
PH M Quang Anh - 18045128 Module Title/module Code: International Human Resource management/UMPD7G-15-3 Word Count: 2971
No ratings yet
PH M Quang Anh - 18045128 Module Title/module Code: International Human Resource management/UMPD7G-15-3 Word Count: 2971
16 pages
RPT Matematik DLP Year 6 Date 1
No ratings yet
RPT Matematik DLP Year 6 Date 1
18 pages
Bahasa Inggris (Sheila XI MIPA 2)
No ratings yet
Bahasa Inggris (Sheila XI MIPA 2)
11 pages
Bowel Preparation - Picolax
No ratings yet
Bowel Preparation - Picolax
5 pages
Criminal Law Session 5
No ratings yet
Criminal Law Session 5
6 pages
Country of The Philippines
No ratings yet
Country of The Philippines
8 pages
Marketing Models
No ratings yet
Marketing Models
14 pages
TSM 070 Form
No ratings yet
TSM 070 Form
8 pages
0510 s15 QP 22
No ratings yet
0510 s15 QP 22
16 pages
Prayer With and For The Youth 2018-2019
No ratings yet
Prayer With and For The Youth 2018-2019
3 pages
VTSM Chart
No ratings yet
VTSM Chart
6 pages
Science and Humanities: Arunachala College of Engineering For Women
No ratings yet
Science and Humanities: Arunachala College of Engineering For Women
7 pages
Mathfinal Refresh. 1
No ratings yet
Mathfinal Refresh. 1
1 page
Product Datasheet: Circuit Breaker Compact NSX250H - TMD - 250 A - 3 Poles 3d
No ratings yet
Product Datasheet: Circuit Breaker Compact NSX250H - TMD - 250 A - 3 Poles 3d
2 pages
Case Study Format
No ratings yet
Case Study Format
4 pages
No Ph.D. Game Design With Three.js
From Everand
No Ph.D. Game Design With Three.js
Nikiforos Kontopoulos
No ratings yet