Praktikum IV
Praktikum IV
ipynb - Colaboratory
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from pandas.plotting import autocorrelation_plot
from scipy import stats
plt.style.use("ggplot")
import warnings
warnings.filterwarnings("ignore")
from scipy import stats
url_data = "https://raw.githubusercontent.com/supasonicx/ATA-praktikum-01/main/Starbucks%2
data = pd.read_csv(url_data)
data.head()
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
1 1 1 1
data.shape
(113, 33)
data.columns
'loyal'],
dtype='object')
data['gender'].unique()
array([1, 0])
plt.figure(figsize=(20,10))
https://colab.research.google.com/drive/1qVPMTOdz4eXyXMKL6SFtxrUYWmhJZAVK#scrollTo=-2nyy2bdQ1Yn&uniqifier=1 1/6
10/19/21, 8:56 AM Untitled3.ipynb - Colaboratory
plt.figure(figsize=(20,10))
plt.title('Histogram of timeSpend')
sns.histplot(data,x='...', kde=True)
---------------------------------------------------------------------------
<ipython-input-17-576c17bd502c> in <module>()
1 plt.figure(figsize=(20,10))
2 plt.title('Histogram of timeSpend')
4 frames
/usr/local/lib/python3.7/dist-packages/seaborn/_core.py in _assign_variables_longfor
901
902 err = f"Could not interpret value `{val}` for parameter `{ke
--> 903 raise ValueError(err)
904
905 else:
## ploting untuk data produk pastry
m = plt.hist(data[data["gender"] == 1].itemPurchasePastries,bins=30,fc = (1,0,0,0.5),label
##ploting untuk data tumor dengan label jinak
f = plt.hist(data[data["gender"] == 0].itemPurchasePastries,bins=30,fc = (0,1,0,0.5),label
plt.legend()
https://colab.research.google.com/drive/1qVPMTOdz4eXyXMKL6SFtxrUYWmhJZAVK#scrollTo=-2nyy2bdQ1Yn&uniqifier=1 2/6
10/19/21, 8:56 AM Untitled3.ipynb - Colaboratory
plt.xlabel("Nilai itemPurchasePastries")
plt.ylabel("Frequency")
plt.title("Histogram Pembelian produk pastry bedasarkan demografi gender")
plt.show()
## Menghitung nilai outliers dari variabel timeSpend pada data customer dengan gender lak
male = data[data["gender"] == 1]
female = data[data["gender"] == 0]
desc = male.timeSpend.describe()
Q1 = desc[4]
Q3 = desc[6]
IQR = Q3-Q1
lower_bound = Q1 - 1.5*IQR
upper_bound = Q3 + 1.5*IQR
print("Apa pun di luar kisaran ini adalah outlier timeSpend: (", lower_bound ,",", upper_b
male[male.timeSpend < lower_bound].timeSpend
print("Outliers: ",male[(male.timeSpend < lower_bound) | (male.timeSpend > upper_bound)].t
Apa pun di luar kisaran ini adalah outlier timeSpend: ( -1.5 , 2.5 )
Outliers: [4]
## Menghitung nilai outliers dari variabel visitNo (jumlah kedatangan) pada data customer
male = data[data["gender"] == 1]
female = data[data["gender"] == 0]
desc = data.<<??>>.describe()
Q1 = desc[4]
Q3 = desc[6]
IQR = Q3-Q1
lower_bound = Q1 - 1.5*IQR
upper_bound = Q3 + 1.5*IQR
print("Apa pun di luar kisaran ini adalah outlier visitNo: (", lower_bound ,",", upper_bou
female[female.timeSpend < lower_bound].timeSpend
print("Outliers: ",female[(male.timeSpend < lower_bound) | (female.timeSpend > upper_bound
https://colab.research.google.com/drive/1qVPMTOdz4eXyXMKL6SFtxrUYWmhJZAVK#scrollTo=-2nyy2bdQ1Yn&uniqifier=1 3/6
10/19/21, 8:56 AM Untitled3.ipynb - Colaboratory
desc = data.<<??>>.describe()
^
## variabel male menyimpan data yang ada pada seluruh kolom dengan nilai gender = 1
male = data[data["gender"] == 1]
## variabel female menyimpan data yang ada pada seluruh kolom dengan nilai gender = 0
print("mean: ",<<2.407407>>.<<2.407407>>.mean())
print("variance: ",<<0.6610761705101328>>.<<0.6610761705101328>>.var())
print("standart deviation (std): ",<< 0.8130659078513456>>.<< 0.8130659078513456>>.std())
print("describe method: ",<<54.000000>>.<<54.000000>>.describe())
print("mean: ",<<2.407407>>.<<2.407407>>.mean())
mean_diff = male.visitNo.mean() - female.visitNo.mean()
var_male = male.visitNo.var()
var_female = female.visitNo.var()
var_pooled = (len(male)*var_male +len(female)*var_female ) /float(len(male)+ len(female))
effect_size = mean_diff/np.sqrt(var_pooled)
print("Effect size: ",effect_size)
## Relationship Between Variables - Korelasi
* Kita dapat mengatakan bahwa dua variabel terkait satu sama lain, jika salah satunya memb
* Misalnya, harga dan jarak. Jika Anda pergi jarak jauh dengan taksi Anda akan membayar le
* Scatter Plot, Cara termudah untuk memeriksa hubungan antara dua variabel
* Matriks korelasi besar yang mencakup banyak angka
* Kisaran angka ini adalah -1 hingga 1.
* Arti dari 1 adalah dua variabel yang saling berkorelasi positif seperti mean radius dan
* Arti dari nol adalah tidak ada korelasi antara variabel seperti productRate dengan incom
* Arti dari -1 adalah dua variabel berkorelasi negatif satu sama lain seperti income dan m
* Kita dapat mengatakan bahwa dua variabel terkait satu sama lain, jika salah sa
yang lain\n",
f,ax=plt.subplots(figsize = (11, 9))
dfs = data.loc[:,['productRate','priceRate','serviceRate','visitNo','timeSpend','wifiRate'
sns.heatmap(dfs.corr(),annot= True,linewidths=0.5,fmt = ".1f",ax=ax)
bottom, top = ax.get_ylim()
ax.set_ylim(bottom + 0.5, top - 0.5)
plt.xticks(rotation=90)
plt.yticks(rotation=0)
https://colab.research.google.com/drive/1qVPMTOdz4eXyXMKL6SFtxrUYWmhJZAVK#scrollTo=-2nyy2bdQ1Yn&uniqifier=1 4/6
10/19/21, 8:56 AM Untitled3.ipynb - Colaboratory
plt.title('Correlation Map')
plt.savefig('graph.png')
plt.show()
print("Covariance diantara timeSpend dan income: ",data.timeSpend.cov(data.<<??>>))
print("Covariance diantara timeSpend dan visitNo: ",data.timeSpend.cov(data.<<??>>))"
p1 = data.loc[:,["gender","serviceRate"]].corr(method= "pearson")
p2 = data.serviceRate.cov(data.timeSpend)(data.serviceRate.std()*data.timeSpend.std())
print('Pearson correlation: ')
print(p1)
print('Pearson correlation: ',p2)
https://colab.research.google.com/drive/1qVPMTOdz4eXyXMKL6SFtxrUYWmhJZAVK#scrollTo=-2nyy2bdQ1Yn&uniqifier=1 5/6
10/19/21, 8:56 AM Untitled3.ipynb - Colaboratory
---------------------------------------------------------------------------
<ipython-input-37-e5795f5e9419> in <module>()
1 p1 = data.loc[:,["gender","serviceRate"]].corr(method= "pearson")
----> 2 p2 = data.serviceRate.cov(data.timeSpend)
(data.serviceRate.std()*data.timeSpend.std())
4 print(p1)
ranked_data = data.rank()
spearman_corr = ranked_data.loc[:,["timeSpend","serviceRate"]].corr(method= "pearson")
print("Spearman's correlation: ")
print(spearman_corr)
Spearman's correlation:
timeSpend serviceRate
statistic, p_value = stats.ttest_rel(data.gender,data.productRate)
print('p-value adalah: ',p_value)
https://colab.research.google.com/drive/1qVPMTOdz4eXyXMKL6SFtxrUYWmhJZAVK#scrollTo=-2nyy2bdQ1Yn&uniqifier=1 6/6