0% found this document useful (0 votes)

62 views

ML 1-10

The document contains code to analyze bollywood movie data using pandas and perform descriptive analysis. It calculates genre counts, groups data by genre and release time to get movie counts, calculates return on investment for movies, compares ROI by release time and genre, analyzes correlations between variables, and creates box plots to compare youtube likes by genre.

Uploaded by

22128008

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

62 views

ML 1-10

Uploaded by

22128008

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 53

1.

DESCRIPTIVE ANALYSIS

import pandas as pd
df=pd.read_csv('bollywood.csv')
df.shape

OP:

df.info()

OP:

df.Genre.value_counts()

OP:
print(df.groupby('Genre')['MovieName'].count().sort_values(ascending=False))

OP:

df.groupby(['Genre','ReleaseTime'])['MovieName'].count()

OP:

dft = df
dft['ROI'] = (dft['BoxOfficeCollection'] - dft['Budget'])/dft['Budget']
dft.sort_values('ROI',ascending=False)['MovieName'].head(10)

OP:
print(dft.groupby(['ReleaseTime'])['ROI'].mean())
dft.groupby(['ReleaseTime'])['ROI'].mean().plot()
OP:

df['Budget'].plot.hist()

OP:
dft.loc[dft.Genre == "Comedy"]['ROI'].plot.hist()

dft.loc[dft.Genre == "Drama"]['ROI'].plot.hist()

print("Comedy has higher ROI")

OP:

boc = df['BoxOfficeCollection']
yl = df['YoutubeLikes']
boc.corr(yl)

OP:

df.groupby('Genre')['YoutubeLikes'].sum().sort_values(ascending=False).head(1)

OP:

df.loc[df.Genre == 'Action']['YoutubeLikes'].plot.box()

OP:
df.loc[df.Genre == 'Comedy']['YoutubeLikes'].plot.box()

OP:

df.loc[df.Genre == 'Drama']['YoutubeLikes'].plot.box()

OP:
df.loc[df.Genre == 'Romance']['YoutubeLikes'].plot.box()

OP:

df.loc[df.Genre == 'Thriller']['YoutubeLikes'].plot.box()

OP:
import seaborn as sns
btod=df[['Budget','BoxOfficeCollection','YoutubeViews','YoutubeLikes','YoutubeDislikes']]
correlation = btod.corr()
sns.pairplot(correlation)

OP:
2.EXPLORATORY ANALYSIS

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
data=pd.read csv(Data.csv)
df=pd.DataFrame(data)

df.head()
OP:

df.shape
df.info()

OP:
df.describe()
type(df)
list(df.columns)
df.iloc[0:9,0:4]
df.describe(include=['O'])
OP:

corr_df=df[['sbp', 'obesity' , 'age', 'idl']] sn.pairplot(correlation_matrix)

plt.show()

OP:
correlation_matrix=cor_df.corr()
chd_stats=df.groupby('chd' , 'tobacco'].describe()
print(chd_stats)
sn.regplot(x="age" , y="sbp" , data=df)
plt.show()

OP:
bins=[0, 15, 15, 35,float ('inf')]
Labels =['young' , 'adult', 'mid', 'old']
df['agegroup']=pd.cut(df['age'] bins=bins, labels= labels, right=False)
print(df)

OP:
3.FEATURE ENGINEERING

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv('iris.csv')
df.head()
OP:

df.info()
df.tail()
df.describe()

OP:
df.isnull().sum()

OP:

df.loc[df.isnull().any(axis=1)]

OP:

df = df.dropna()
df.duplicated().sum()
df.loc[df.duplicated()]
df['sepal_length'] = df['sepal_length'].astype(int)

OP:

df.rename(columns={'old_column_name': 'new_column_name'}, inplace=True)

df['sum_lengths'] = df['sepal_length'] + df['petal_length']
df.head()
from scipy import stats
z_scores = stats.zscore(df['sepal_length'])
OP:

z_scores
df[(z_scores > 3)]

OP:

df.to_csv("C:\\Users\\STUDENT\\Downloads\\22128018\\iris_data.csv", index=False)

OP:
4.MEASURE OF FEATURE RELEVANCE & REDUNDANCY

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor

# Load the dataset

df = pd.read_csv("DAD.csv")

# Task 2: Build correlation matrix

numeric_cols = df.select_dtypes(include=np.number).columns
correlation_matrix = df[numeric_cols].corr().abs()

# Identify highly correlated features

high_correlation_vars = np.where(correlation_matrix > 0.7)
high_correlation_vars = [(correlation_matrix.index[x], correlation_matrix.columns[y]) for x, y in
zip(*high_correlation_vars) if x != y and x < y]
print("Highly correlated features:", high_correlation_vars)

OP:

# Task 3: Create BMI feature

df['BMI'] = df['Body Weight'] / ((df['Body Height'] / 100) ** 2)

# Task 4: Impute 'None' for missing values in Past Medical History Code
df['Past Medical History Code'].fillna('None', inplace=True)

# Display column names in the DataFrame

print("Column names in the DataFrame:")
print(df.columns)

OP:
# Task 5: Select features for model building
selected_features = ['Age', 'Gender', 'Marital Status', 'Key Complaints Code', 'HR Pulse', 'BP
High', 'BP Low', 'RR',
'Past Medical History Code', 'HB', 'Urea', 'Creatinine', 'Mode of Arrival',
'Ambulance',
'Walked Instate at the Time of Arrival', 'Type of Admission', 'Elective', 'Emergency',
'BMI', 'Cost of Implant']

# Task 6: Encode categorical features

categorical_features = ['Gender', 'Marital Status', 'Key Complaints Code', 'Past Medical History
Code',
'Mode of Arrival', 'Type of Admission', 'Elective', 'Emergency', 'Ambulance',
'Walked Instate at the Time of Arrival', 'Implant Used Yes/no']

# Filter categorical features for encoding

df_categorical = df[categorical_features]
for col in df_categorical.columns:
df[col] = LabelEncoder().fit_transform(df[col])

# Create dummy variables for categorical features

df = pd.get_dummies(df, columns=categorical_features, drop_first=True)

# Display column names after encoding

print("\nColumn names after encoding:")
print(df.columns)

# Update selected_features based on column names after encoding

selected_features = [col for col in selected_features if col in df.columns]
OP:

# Task 7: Check for multicollinearity

X = df[selected_features]
vif_data = pd.DataFrame()
vif_data["feature"] = X.columns
vif_data["VIF"] = [variance_inflation_factor(X.values, i) for i in range(len(X.columns))]
print("\nVIF:")
print(vif_data)

OP:

# Task 8: Find outliers using Z-score

z_scores = np.abs((df[selected_features] - df[selected_features].mean()) /
df[selected_features].std())
outliers_zscore = df[(z_scores > 3).any(axis=1)]
print("Outliers using Z-score:")
print(outliers_zscore)

OP:

# Task 9: Split the data

X_train, X_test, y_train, y_test = train_test_split(X, df['Total Cost to Hospital'], test_size=0.2,
random_state=42)

# Task 10: Build regression model with statsmodels

X_train_sm = sm.add_constant(X_train)
model = sm.OLS(y_train, X_train_sm).fit()
print("Regression Model Summary:")
print(model.summary())
# Task 11: Identify statistically significant features
significant_features = model.pvalues[model.pvalues < 0.05].index.tolist()
print("Significant features:", significant_features)

OP:

# Task 12: Build linear regression model with significant features

X_train_sm = sm.add_constant(X_train[significant_features])
model_significant = sm.OLS(y_train, X_train_sm).fit()
print("Regression Model with Significant Features Summary:")
print(model_significant.summary())
OP:

# Task 13: Residual analysis

sm.qqplot(model_significant.resid, line='s')
plt.title("P-P Plot of Residuals")
plt.show()

OP:
# Task 14: Predict and evaluate RMSE
X_test_sm = sm.add_constant(X_test[significant_features])
y_pred = model_significant.predict(X_test_sm)

from sklearn.metrics import mean_squared_error

rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print("RMSE:", rmse)

OP:
5.SUPERVISED LEARNING TECHNIQUE - CLASSIFICATION

A)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
dataset = pd.read_csv("loan_data_set.csv")
dataset.head()

OP:

X = dataset.drop('Loan_Status', axis=1)
y = dataset['Loan_Status']
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test= train_test_split(X, y, test_size= 0.25, random_state=38, stratify
= y)
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 5)
knn.fit(X_train, y_train)

OP:

#Prediction of test set

prediction_knn = knn.predict(X_test)
#Print the predicted values
print("Prediction for test set: {}".format(prediction_knn))

OP:

a = pd.DataFrame({'Actual value': y_test, 'Predicted value': prediction_knn})

a.head()

OP:

from sklearn import metrics

from sklearn.metrics import classification_report, confusion_matrix
matrix = confusion_matrix(y_test, prediction_knn)
sns.heatmap(matrix, annot=True, fmt="d")
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
print(classification_report(y_test, prediction_knn))

OP:
5-B

import pandas as pd
from sklearn.tree import DecisionTreeClassifier, plot_tree # Import Decision Tree Classifier and
plot_tree function
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn import metrics
col_names = ['pregnant', 'glucose', 'bp', 'skin', 'insulin', 'bmi', 'pedigree', 'age', 'label']
# Load dataset
pima = pd.read_csv("diabetes.csv", header=None, names=col_names, skiprows=1)

# Split dataset into features and target variable

feature_cols = ['pregnant', 'insulin', 'bmi', 'age','glucose','bp','pedigree']
X = pima[feature_cols] # Features
y = pima.label # Target variable

# Split dataset into training set and test set

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # 70%
training and 30% test

# Create Decision Tree classifer object

clf = DecisionTreeClassifier()

# Train Decision Tree Classifer

clf = clf.fit(X_train,y_train)

# Predict the response for test dataset

y_pred = clf.predict(X_test)

# Model Accuracy, how often is the classifier correct?

print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

# Visualize the decision tree

import matplotlib.pyplot as plt
plt.figure(figsize=(20,10))
plot_tree(clf, feature_names=feature_cols, class_names=['0','1'], filled=True)
plt.show()

OP:
5-C

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv("titanic.csv")
df.head()

OP:

df.drop(['Cabin','PassengerId','Name','Ticket'],axis=1,inplace=True)
df = df.fillna(0)
from sklearn.preprocessing import LabelEncoder
#Initialize LabelEncoder
le = LabelEncoder()
# Apply label encoding to 'Sex' column
df['Sex'] = le.fit_transform(df['Sex'].astype(str))
# Apply label encoding to 'Embarked' column
df['Embarked'] = le.fit_transform(df['Embarked'].astype(str))

# Putting feature variable to X

X = df.drop('Survived',axis=1)
# Putting response variable to y
y = df['Survived']

# Splitting the data into train and test

from sklearn.model_selection import train_test_split
# Splitting the data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=42)

#Import Random Forest Model

from sklearn.ensemble import RandomForestClassifier
#Create a Gaussian Classifier
clf=RandomForestClassifier(n_estimators=100)
#Train the model using the training sets y_pred=clf.predict(X_test)
clf.fit(X_train,y_train)

OP:

# Predicting the test set results

Pred = clf.predict(X_test)
print(Pred)

OP:
from sklearn.metrics import classification_report
rand_score=clf.score(X_test, y_test)
classification_report_rf=classification_report(y_test,Pred)
print("Accuracy score:",rand_score)

OP:

5-D

class Dog:
attr1 = "mammal"
attr2 = "dog"

def fun(self):
print("I'm a", self.attr1)
print("I'm a", self.attr2)

# Object instantiation
Rodger = Dog()

# Accessing class attributes

# and method through objects
print(Rodger.attr1)
Rodger.fun()

OP:
6.SUPERVISED LEARNING METHOD - REGRESSION

A)
import pandas as pd
import numpy as np
import matplotlib.pyplot as mtplt

def estimate_coeff(p, q):

# Here, we will estimate the total number of points or observation
n1 = nmp.size(p)
# Now, we will calculate the mean of a and b vector
m_p = nmp.mean(p)
m_q = nmp.mean(q)

# here, we will calculate the cross deviation and deviation about a

SS_pq = nmp.sum(q * p) - n1 * m_q * m_p
SS_pp = nmp.sum(p * p) - n1 * m_p * m_p

# here, we will calculate the regression coefficients

b_1 = SS_pq / SS_pp
b_0 = m_q - b_1 * m_p

return (b_0, b_1)

def plot_regression_line(p, q, b):
# Now, we will plot the actual points or observation as scatter plot
mtplt.scatter(p, q, color = "m",
marker = "o", s = 30)

# here, we will calculate the predicted response vector

q_pred = b[0] + b[1] * p

# here, we will plot the regression line

mtplt.plot(p, q_pred, color = "g")

# here, we will put the labels

mtplt.xlabel('p')
mtplt.ylabel('q')

# here, we will define the function to show plot

mtplt.show()
def main():
# entering the observation points or data
p = np.array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])
q = np.array([11, 13, 12, 15, 17, 18, 18, 19, 20, 22])

# now, we will estimate the coefficients

b = estimate_coeff(p, q)
print("Estimated coefficients are :\nb_0 = {} \ \nb_1 = {}".format(b[0], b[1]))

# Now, we will plot the regression line

plot_regression_line(p, q, b)

if __name__ == "__main__":
main()

OP:
6-B

import matplotlib.pyplot as mtpplt

import pandas as pd
import numpy as np
import numpy as nmp
from sklearn import datasets as DS
from sklearn import linear_model as LM
from sklearn import metrics as mts
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
target = raw_df.values[1::2, 2]
H = data
f = target
from sklearn.model_selection import train_test_split as tts
H_train, H_test, f_train, f_test = tts(H, f, test_size=0.4, random_state=1)

# Here, we will create linear regression object

reg1 = LM.LinearRegression()

# Now, we will train the model by using the training sets

reg1.fit(H_train, f_train)

# here, we will print the regression coefficients

print('Regression Coefficients are: ', reg1.coef_)

# Here, we will print the variance score: 1 means perfect prediction

print('Variance score is: {}'.format(reg1.score(H_test, f_test)))

# Here, we will plot for residual error

# here, we will set the plot style

mtpplt.style.use('fivethirtyeight')

# here we will plot the residual errors in training data

mtpplt.scatter(reg1.predict(H_train), reg1.predict(H_train) - f_train, color="green", s=10,
label='Train data')

# Here, we will plot the residual errors in test data

mtpplt.scatter(reg1.predict(H_test), reg1.predict(H_test) - f_test, color="blue", s=10, label='Test
data')

# Here, we will plot the line for zero residual error

mtpplt.hlines(y=0, xmin=0, xmax=50, linewidth=2)

# here, we will plot the legend

mtpplt.legend(loc='upper right')

# now, we will plot the title

mtpplt.title("Residual errors")

# here, we will define the method call for showing the plot
mtpplt.show()

OP:
6-C

# Import necessary libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve,
auc

# Load the diabetes dataset

diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target

# Convert the target variable to binary (1 for diabetes, 0 for no diabetes)

y_binary = (y > np.median(y)).astype(int)

# Split the data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train the Logistic Regression model

model = LogisticRegression()
model.fit(X_train, y_train)

# Evaluate the model

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}%".format(accuracy * 100))

# evaluate the model

print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Visualize the decision boundary with accuracy information

plt.figure(figsize=(8, 6))
sns.scatterplot(x=X_test[:, 2], y=X_test[:, 8], hue=y_test, palette={0: 'blue', 1: 'red'}, marker='o')
plt.xlabel("BMI")
plt.ylabel("Age")
plt.title("Logistic Regression Decision Boundary\nAccuracy: {:.2f}%".format(accuracy * 100))
plt.legend(title="Diabetes", loc="upper right")
plt.show()

y_prob = model.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_prob)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2,label=f'ROC Curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Random')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve\nAccuracy: {:.2f}%'.format(accuracy *
100))
plt.legend(loc="lower right")
plt.show()

OP:
7.UNSUPERVISED LEARNING METHODS

A)
import pandas as pd
df = pd.read_csv("Mall_Customers.csv")
print(df.head())

from sklearn.cluster import KMeans

#define the inputs we will use for our K-means clustering algorithm
X = df[['Age', 'Spending Score (1-100)']].copy()

#determine the number of Python clusters that we will use

for i in range(1, 11):
kmeans = KMeans(n_clusters=i, random_state=0)
kmeans.fit(X)

wcss = []

#append the WCSS values to our list

for i in range(1, 11):
kmeans = KMeans(n_clusters=i, random_state=0)
kmeans.fit(X)
wcss.append(kmeans.inertia_)

print(wcss)

import matplotlib.pyplot as plt

import seaborn as sns

#style the plots using Seaborn

sns.set()

#plot the WCSS versus the clusters

plt.plot(range(1, 11), wcss)

#add a title and label the axes

plt.title('Selecting the Numbeer of Clusters using the Elbow Method')
plt.xlabel('Clusters')
plt.ylabel('WCSS')
plt.show()

OP:

7-B

import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
raw_data= pd.read_excel(r"C:\Users\STUDENT\Downloads\OnlineRetail.xlsx")
print(raw_data)

def prepare_retail(dataframe):
# preparing dataset
dataframe.dropna(inplace=True)
dataframe = dataframe[~dataframe["InvoiceNo"].str.contains("C", na=False)]
dataframe = dataframe[dataframe["Quantity"] > 0]
dataframe = dataframe[dataframe["UnitPrice"] > 0]
return dataframe
df = prepare_retail(raw_data)

def create_apriori_datastructure(dataframe, id=False):

if id:
grouped = germany_df.groupby(
['InvoiceNo', 'StockCode'], as_index=False).agg({'Quantity': 'sum'})
apriori_datastructure = pd.pivot(data=grouped, index='InvoiceNo', columns='StockCode',
values='Quantity').fillna(
0).applymap(lambda x: 1 if x > 0 else 0)
return apriori_datastructure
else:
grouped = germany_df.groupby(
['InvoiceNo', 'Description'], as_index=False).agg({'Quantity': 'sum'})
apriori_datastructure = pd.pivot(data=grouped, index='InvoiceNo', columns='Description',
values='Quantity').fillna(
0).applymap(lambda x: 1 if x > 0 else 0)
return apriori_datastructure

germany_df = df[df['Country'] == 'Germany']

germany_df.head()

germany_apriori_df = create_apriori_datastructure(germany_df,True)
germany_apriori_df.head()

def get_rules(apriori_df, min_support=0.01):

frequent_itemsets = apriori(apriori_df, min_support=min_support, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="support", min_threshold=min_support)
return rules
germany_rules = get_rules(germany_apriori_df)
germany_rules.head()

def get_item_name(dataframe, stock_code):

if type(stock_code) != list:
product_name = dataframe[dataframe["StockCode"] ==
stock_code][["Description"]].values[0].tolist()
return product_name
else:
product_names = [dataframe[dataframe["StockCode"] ==
product][["Description"]].values[0].tolist()[0] for product in stock_code]
return product_names

get_item_name(germany_df,10125)

OP:

def recommend_products(rules_df, product_id, rec_count=5):

sorted_rules = rules_df.sort_values('lift', ascending=False)
# we are sorting the rules dataframe by using "lift" metric
recommended_products = []

for i, product in sorted_rules["antecedents"].items():

for j in list(product):
if j == product_id:
recommended_products.append(
list(sorted_rules.iloc[i]["consequents"]))

recommended_products = list({item for item_list in recommended_products for item in

item_list})
return recommended_products[:rec_count]
# simulating some products like they are in cart
TARGET_PRODUCT_ID_1 = 21987
TARGET_PRODUCT_ID_2 = 23235
TARGET_PRODUCT_ID_3 = 22747

get_item_name(germany_df, [TARGET_PRODUCT_ID_1,TARGET_PRODUCT_ID_2,
TARGET_PRODUCT_ID_3])

OP:
8.TRAINING ARTIFICIAL NEURAL NETWORK

import numpy as np
import pandas as pd
import tensorflow as tf

tf.__version__

dataset = pd.read_csv(r"C:\Users\STUDENT\Downloads\Churn_Modelling.csv")
X = dataset.iloc[:, 3:-1].values
y = dataset.iloc[:, -1].values

print(X)

print(y)

from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
X[:, 2] = le.fit_transform(X[:, 2])
print(X)
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])],

remainder='passthrough')
X = np.array(ct.fit_transform(X))
print(X)

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

ann = tf.keras.models.Sequential()
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))
ann.compile(optimizer = 'adam', loss='binary_crossentropy', metrics = ['accuracy'])

ann.fit(X_train, y_train, batch_size = 32, epochs = 100)

Op:
y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

from sklearn.metrics import confusion_matrix, accuracy_score

cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

OP:
9.INDEPENDENT COMPONENT ANALYSIS

pip install librosa

!pip install pydub
from pydub import AudioSegment
import IPython
import numpy as np
import wave
mix_1_wave = wave.open(r"C:\Users\aashw\Downloads\ICA mix 1.wav", 'r')

mix_1_wave.getparams()
OP:
_wave_params(nchannels=1, sampwidth=2, framerate=44100,
nframes=264515, comptype='NONE', compn ame='not compressed')

# Extract Raw Audio from Wav File

signal_1_raw = mix_1_wave.readframes(-1)
signal_1 = np.fromstring(signal_1_raw, 'int16')
'length: ', len(signal_1) , 'first 100 elements: ',signal_1[:100]
OP:

('length: ',264515,'first 100 elements: ',array([ 879, 1268, 1460, 1756, 1943, 2216, 24
07, 2668, 2866,3106, 3308, 3546, 3752, 3981, 4175, 4395, 4588, 4790,4966, 5146, 52
92, 5436, 5550, 5643, 5717, 5759, 5790,5798, 5789, 5756, 5713, 5649, 5576, 5478,
5381, 5267,5146, 4999, 4856, 4682, 4502, 4308, 4097, 3875, 3637,3380, 3107, 2825,
2514, 2194, 1847, 1472, 1087, 671,227, -219, -691, -1176, -1666, -2167, -2669, -3179,
-3668,-4170, -4643, -5116, -5559, -5985, -6380, -6765, -7105, -7422,-7706, -7955, -8163,
-8339
, -8470, -8557, -8600, -8618, -8585,-8524, -8425, -8298, -8129, -7947, -7720, -7475, -7205,
-6
916,-6606, -6266, -5922, -5556, -5165, -4774, -4353, -3922, -3476,-3021], dtype=int16))

import matplotlib.pyplot as plt

fs = mix_1_wave.getframerate()
timing = np.linspace(0, len(signal_1)/fs, num=len(signal_1))
plt.figure(figsize=(12,2))
plt.title('Recording 1')
plt.plot(timing,signal_1, c="#3ABFE7")
plt.ylim(-35000, 35000)
plt.show()
mix_2_wave = wave.open(r"C:\Users\aashw\Downloads\ICA mix 1.wav",'r')
#Extract Raw Audio from Wav File
signal_raw_2 = mix_2_wave.readframes(-1)
signal_2 = np.fromstring(signal_raw_2, 'int16')
mix_3_wave = wave.open(r"C:\Users\aashw\Downloads\ICA mix 1.wav",'r')

#Extract Raw Audio from Wav File

signal_raw_3 = mix_3_wave.readframes(-1)
signal_3 = np.fromstring(signal_raw_3, 'int16')

plt.figure(figsize=(12,2))
plt.title('Recording 2')
plt.plot(timing,signal_2, c="#3ABFE7")
plt.ylim(-35000, 35000)
plt.show()

plt.figure(figsize=(12,2))
plt.title('Recording 3')
plt.plot(timing,signal_3, c="#3ABFE7")
plt.ylim(-35000, 35000)
plt.show()

OP:

from IPython.display import Audio

import librosa
# Absolute path to the audio file
audio_path = r"C:\Users\aashw\Downloads\ICA mix 1.wav"
# Load the audio file
audio_data, sampling_rate = librosa.load(audio_path)
# Display the audio
Audio(data=audio_data, rate=sampling_rate)

OP:

from scipy.io import wavfile

from IPython.display import Audio
# Load audio file
sampling_rate, audio_data = wavfile.read(r"C:\Users\aashw\Downloads\ICA mix 1.wav")
# Display the audio
Audio(audio_data, rate=sampling_rate)

OP:

X = list(zip(signal_1, signal_2, signal_3))

# Let's peak at what X looks like
X[:10]

OP:
[(879, 879, 879),
(1268, 1268, 1268),
(1460, 1460, 1460),
(1756, 1756, 1756),
(1943, 1943, 1943),
(2216, 2216, 2216),
(2407, 2407, 2407),
(2668, 2668, 2668),
(2866, 2866, 2866),
(3106, 3106, 3106)]

from sklearn.decomposition import FastICA

# Initializing FastICA with n_components=3
ica = FastICA(n_components=3)
# Running the FastICA algorithm using fit_transform on dataset X
ica_result = ica.fit_transform(X)

ica_result.shape

OP:
(264515, 3)

result_signal_1 = ica_result[:,0]
result_signal_2 = ica_result[:,1]
result_signal_3 = ica_result[:,2]

# Independent Component #1
plt.figure(figsize=(12,2))
plt.title('Independent Component #1')
plt.plot(result_signal_1, c="#df8efd")
plt.ylim(-0.010, 0.010)
plt.show()

OP:

# Independent Component #2
plt.figure(figsize=(12,2))
plt.title('Independent Component #2')
plt.plot(result_signal_2, c="#87de72")
plt.ylim(-0.010, 0.010)
plt.show()

OP:
# Independent Component #3
plt.figure(figsize=(12,2))
plt.title('Independent Component #3')
plt.plot(result_signal_3, c="#f65e97")
plt.ylim(-0.010, 0.010)
plt.show()

OP:

from scipy.io import wavfile

# Converting to int, mapping the appropriate range, and increasing the volume a little bit
result_signal_1_int = np.int16(result_signal_1*32767*100)
result_signal_2_int = np.int16(result_signal_2*32767*100)
result_signal_3_int = np.int16(result_signal_3*32767*100)

# Writing wave files

wavfile.write("result_signal_1.wav", fs, result_signal_1_int)
wavfile.write("result_signal_2.wav", fs, result_signal_2_int)
wavfile.write("result_signal_3.wav", fs, result_signal_3_int)

IPython.display.Audio("result_signal_1.wav")
OP:

IPython.display.Audio("result_signal_3.wav")
OP:

IPython.display.Audio("result_signal_2.wav")
10.ENSEMBLE LEARNING ALGORITHM

A)
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import BaggingClassifier
data = datasets.load_wine(as_frame = True)
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 22)
estimator_range = [2,4,6,8,10,12,14,16,18,20]
models = []
scores = []
for n_estimators in estimator_range:

# Create a bagging classifier

clf = BaggingClassifier(n_estimators = n_estimators, random_state = 22)

# Fit the model

clf.fit(X_train, y_train)

# Append the model and score to their respective list

models.append(clf)
scores.append(accuracy_score(y_true = y_test, y_pred = clf.predict(X_test)))

# Generate the plot of the scores against a number of the estimators

plt.figure(figsize=(9,6))
plt.plot(estimator_range, scores)

# Adjust labels and font (to make them visible)

plt.xlabel("n_estimators")
plt.ylabel("score")

# show the plot

plt.show()
10-B

import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

#creating a list of values for years_experience & salary

years_experience = [1.1, 1.3, 1.5, 2.0, 2.2, 2.9,
3.0,3.2,3.2,3.7,3.9,4.0,4.0,4.1,4.5,4.9,5.1,5.3,5.9,6.0,6.8,7.1,7.9,8.2,8.7,9.0,9.5,9.6,10.3,10.5]
salary = [39343.00, 46205.00, 37731.00, 43525.00, 39891.00, 56642.00, 60150.00, 54445.00,
64445.00, 57189.00, 63218.00, 55794.00, 56957.00,
57081.00,61111.00,67938.00,66029.00,83088.00,81363.00,93940.00,91738.00,98273.00,101302
.00,113812.00,109431.00,105582.00,116969.00,112635.00,122391.00, 121872.00 ]

# Create a dataframe from lists

df = pd.DataFrame({'years_experience': years_experience, 'salary': salary})

# Split the data into training and testing sets

X = df[['years_experience']]
y = df['salary']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit a GradientBoostingRegressor model

model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3,
random_state=42)
model.fit(X_train, y_train)

# Make a prediction on the test data

y_pred = model.predict(X_test)

# Print the R-squared value

r2 = model.score(X_test, y_test)
print("Mean_absoloute_score is: ", mean_absolute_error(y_pred, y_test))
print("R_squared score is: ",r2_score(y_pred, y_test))

OP:

Supervised Learning
100% (1)
Supervised Learning
15 pages
Regression Analysis - Cheatsheet
No ratings yet
Regression Analysis - Cheatsheet
9 pages
(Feature Engineering) (Extended-Cheatsheet)
No ratings yet
(Feature Engineering) (Extended-Cheatsheet)
9 pages
Mercedes-Benz Greener Manufacturing Ai
0% (1)
Mercedes-Benz Greener Manufacturing Ai
16 pages
Lama Riding Rules
No ratings yet
Lama Riding Rules
27 pages
23BCE7199 ML Lab Assignment[1]
No ratings yet
23BCE7199 ML Lab Assignment[1]
15 pages
CQF June 2021 M4L4 Solutions
No ratings yet
CQF June 2021 M4L4 Solutions
14 pages
23BCE7092_ML_Lab_Assignment[1]
No ratings yet
23BCE7092_ML_Lab_Assignment[1]
14 pages
ML pdf
No ratings yet
ML pdf
30 pages
Prathamesh KRAI
No ratings yet
Prathamesh KRAI
38 pages
MACHINE LEARNING manual
No ratings yet
MACHINE LEARNING manual
36 pages
Classification Problems
100% (1)
Classification Problems
25 pages
Import Pandas As PD DF PD - Read - CSV ("Titanic - Train - CSV") DF - Head
No ratings yet
Import Pandas As PD DF PD - Read - CSV ("Titanic - Train - CSV") DF - Head
20 pages
Slip
No ratings yet
Slip
5 pages
Train
No ratings yet
Train
17 pages
Articles Xgboost Classification With Smote-Enn Algorithm
No ratings yet
Articles Xgboost Classification With Smote-Enn Algorithm
11 pages
Chapter 5 - Classification Problems
100% (1)
Chapter 5 - Classification Problems
25 pages
ML Python Exercises UOM BDS Classification
No ratings yet
ML Python Exercises UOM BDS Classification
18 pages
ML Final Prac
No ratings yet
ML Final Prac
47 pages
ml_code_output
No ratings yet
ml_code_output
38 pages
Practical 1 52
No ratings yet
Practical 1 52
4 pages
Fall Semester 2020-21 AI With Python ECE-4031
No ratings yet
Fall Semester 2020-21 AI With Python ECE-4031
5 pages
Multi Classification.py(for 1 Class Tp,Tn,Fp,Fn)
No ratings yet
Multi Classification.py(for 1 Class Tp,Tn,Fp,Fn)
25 pages
Aiml Ex 4-7
No ratings yet
Aiml Ex 4-7
8 pages
22K61A0654_2_sasi_auto
No ratings yet
22K61A0654_2_sasi_auto
24 pages
83 Sklearn Pipeline
No ratings yet
83 Sklearn Pipeline
8 pages
Code shabab error 7
No ratings yet
Code shabab error 7
5 pages
221IT027_DA_lab3 (2)
No ratings yet
221IT027_DA_lab3 (2)
5 pages
UNITIV.BtechIot
No ratings yet
UNITIV.BtechIot
43 pages
som
No ratings yet
som
19 pages
DA_012307
No ratings yet
DA_012307
8 pages
Data Pre Processing
No ratings yet
Data Pre Processing
2 pages
ETHICS AND AI LAB FINAL
No ratings yet
ETHICS AND AI LAB FINAL
31 pages
Continuous Assessment
No ratings yet
Continuous Assessment
4 pages
Gaurav - Data Mining Lab Assignment
No ratings yet
Gaurav - Data Mining Lab Assignment
36 pages
Titanic Akshaya
No ratings yet
Titanic Akshaya
12 pages
EDS - Python Cheat Sheet
0% (1)
EDS - Python Cheat Sheet
3 pages
Final ML File
No ratings yet
Final ML File
34 pages
Day 35
No ratings yet
Day 35
9 pages
Naive Bayes Classification
No ratings yet
Naive Bayes Classification
8 pages
05 E RandomForest LoanData
No ratings yet
05 E RandomForest LoanData
8 pages
Final Project Making Predictions From Data-Course 2: October 6, 2020
No ratings yet
Final Project Making Predictions From Data-Course 2: October 6, 2020
20 pages
ML Book Notes
No ratings yet
ML Book Notes
9 pages
ml lab
No ratings yet
ml lab
14 pages
Credit_Scores_classification
No ratings yet
Credit_Scores_classification
104 pages
ML 7
No ratings yet
ML 7
6 pages
ModuleAr Merged
No ratings yet
ModuleAr Merged
42 pages
'Universalbank - CSV': #Reading The File
No ratings yet
'Universalbank - CSV': #Reading The File
4 pages
Machine File
No ratings yet
Machine File
27 pages
ML INTERNAL ANSWERS
No ratings yet
ML INTERNAL ANSWERS
9 pages
ML Internal questions
No ratings yet
ML Internal questions
15 pages
Dealing with categorical
No ratings yet
Dealing with categorical
25 pages
Aiml 5-8
No ratings yet
Aiml 5-8
19 pages
MLLabManual
No ratings yet
MLLabManual
24 pages
Manual
No ratings yet
Manual
48 pages
Bank Marketing Targets 1724510938
No ratings yet
Bank Marketing Targets 1724510938
13 pages
ml
No ratings yet
ml
23 pages
EXP2-DM - KS
No ratings yet
EXP2-DM - KS
9 pages
ml_all_projectpdf_removed
No ratings yet
ml_all_projectpdf_removed
41 pages
Computer Engineering Laboratory Solution Primer
From Everand
Computer Engineering Laboratory Solution Primer
Karan Bhandari
No ratings yet
Advanced C Concepts and Programming: First Edition
From Everand
Advanced C Concepts and Programming: First Edition
Gayatri
3/5 (1)
Manejo de Formas en Las Organizaciones: Facultad de Ciencias Empresariales
No ratings yet
Manejo de Formas en Las Organizaciones: Facultad de Ciencias Empresariales
5 pages
Cicon
No ratings yet
Cicon
90 pages
Revised Wps Pqr Jnpt
No ratings yet
Revised Wps Pqr Jnpt
5 pages
Types of Managerial Communication 1
No ratings yet
Types of Managerial Communication 1
11 pages
Talent Management HCM Oracle
No ratings yet
Talent Management HCM Oracle
34 pages
Introduction To Word For MS Word 2007
100% (2)
Introduction To Word For MS Word 2007
6 pages
Manual Helix-V
No ratings yet
Manual Helix-V
32 pages
WillThereBe Computer Art
No ratings yet
WillThereBe Computer Art
3 pages
VolvoCatalogue PDF
No ratings yet
VolvoCatalogue PDF
322 pages
Installed Files
100% (2)
Installed Files
61 pages
Digital Temperature Controller Operation Instruction: . Technical Specification
No ratings yet
Digital Temperature Controller Operation Instruction: . Technical Specification
2 pages
Electrical & Power Overview of Jammu & Kashmir
100% (1)
Electrical & Power Overview of Jammu & Kashmir
14 pages
SAR/GMTI Reconnaissance System For Transport Aircraft: EL/M-2060T
No ratings yet
SAR/GMTI Reconnaissance System For Transport Aircraft: EL/M-2060T
2 pages
TOR 1 Sanaag
No ratings yet
TOR 1 Sanaag
8 pages
Delmag RH Reihe en
No ratings yet
Delmag RH Reihe en
20 pages
Research Paper
No ratings yet
Research Paper
11 pages
NEDA Board Approved Projects (Duterte Administration) : (Last Update: March 2019)
No ratings yet
NEDA Board Approved Projects (Duterte Administration) : (Last Update: March 2019)
12 pages
Science K-10 Curriculum Guide
100% (2)
Science K-10 Curriculum Guide
78 pages
EBS - How To Interpret A BAI2 File
No ratings yet
EBS - How To Interpret A BAI2 File
3 pages
Eoow Prepration
No ratings yet
Eoow Prepration
256 pages
Any Connect 31 RN
No ratings yet
Any Connect 31 RN
34 pages
Adobe CS6 Brochure Lores No Prices FINAL
No ratings yet
Adobe CS6 Brochure Lores No Prices FINAL
4 pages
Autogreder F106-156 - GB
No ratings yet
Autogreder F106-156 - GB
16 pages
Halcyon Amika - AMH51 - USERGUIDE - (EN)
No ratings yet
Halcyon Amika - AMH51 - USERGUIDE - (EN)
17 pages
Catalog - Hot Rolled Mills
No ratings yet
Catalog - Hot Rolled Mills
18 pages
Concrete Encased Underground Electrical Duct Banks
100% (1)
Concrete Encased Underground Electrical Duct Banks
2 pages
TFP-GPC-EMEA - 10-21 - v2 12
No ratings yet
TFP-GPC-EMEA - 10-21 - v2 12
1 page
Index Internal Auditor 2004 - 2016
100% (1)
Index Internal Auditor 2004 - 2016
222 pages
Vitz Manual
100% (1)
Vitz Manual
1 page