0% found this document useful (0 votes)
6 views

AIFB Outputs

The document contains multiple code snippets for financial analysis using Python, including portfolio optimization, clustering of assets, market sentiment analysis using Hidden Markov Models, and a moving average crossover trading strategy. Key outputs include Sharpe ratios for a portfolio and individual assets, clustering results for various stocks, and visualizations of trading signals and market regimes. The document also discusses risk management techniques such as stop-loss in trading strategies.

Uploaded by

Rahul Hope
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
6 views

AIFB Outputs

The document contains multiple code snippets for financial analysis using Python, including portfolio optimization, clustering of assets, market sentiment analysis using Hidden Markov Models, and a moving average crossover trading strategy. Key outputs include Sharpe ratios for a portfolio and individual assets, clustering results for various stocks, and visualizations of trading signals and market regimes. The document also discusses risk management techniques such as stop-loss in trading strategies.

Uploaded by

Rahul Hope
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 28

CODE:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
# 1. Define Tickers, Date Range, and Risk-Free Rate
tickers = ['AAPL', 'MSFT', 'GOOG', 'AMZN', 'TSLA'] # Portfolio tickers
start_date = '2022-01-01'
end_date = '2023-01-01'
risk_free_rate = 0.02 # Risk-free rate (e.g., 2%)
# 2. Download Historical Data
data = yf.download(tickers, start=start_date, end=end_date)#['Adj Close']
data = data['Close']
# 3. Calculate Daily Returns
daily_returns = data.pct_change().dropna()
# 4. Assign Portfolio Weights
# (Assuming equal weights for simplicity)
weights = np.array([1/len(tickers)] * len(tickers))
# 5. Calculate Portfolio Returns
portfolio_returns = daily_returns.dot(weights)
# 6. Calculate Annualized Return and Volatility
annual_return = portfolio_returns.mean() * 252
annual_volatility = portfolio_returns.std() * np.sqrt(252)
# 7. Calculate Sharpe Ratio
sharpe_ratio = (annual_return - risk_free_rate) / annual_volatility
# 8. Print Results
print("Portfolio Sharpe Ratio:", sharpe_ratio)
# 9. Visualization
plt.figure(figsize=(10, 6))
plt.bar(tickers, weights, label='Portfolio Weights')
plt.xlabel('Tickers')
plt.ylabel('Weights')
plt.title('Portfolio Asset Allocation')
plt.legend()
plt.show()
# Sharpe Ratio for Individual Assets
individual_sharpe_ratios = {}
for ticker in tickers:
asset_returns = daily_returns[ticker]
annual_asset_return = asset_returns.mean() * 252
annual_asset_volatility = asset_returns.std() * np.sqrt(252)
individual_sharpe_ratios[ticker] = (annual_asset_return - risk_free_rate) /
annual_asset_volatility
# Print Individual Sharpe Ratios
print("\nIndividual Asset Sharpe Ratios:")
for ticker, sharpe in individual_sharpe_ratios.items():
print(f"{ticker}: {sharpe:.2f}")
# Visualize Individual Sharpe Ratios
plt.figure(figsize=(10, 6))
plt.bar(individual_sharpe_ratios.keys(), individual_sharpe_ratios.values())
plt.xlabel('Tickers')
plt.ylabel('Sharpe Ratio')
plt.title('Individual Asset Sharpe Ratios')
plt.show()
OUTPUT:

Portfolio Sharpe Ratio: -1.3462110597919914


Individual Asset Sharpe Ratios:
AAPL: -0.81
MSFT: -0.80
GOOG: -1.13
AMZN: -1.21
TSLA: -1.51
CODE:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
# Sample financial data (e.g., returns, volatility, and market capitalization)
data = {
'Asset': ['AAPL', 'AMZN', 'GOOGL', 'MSFT', 'TSLA', 'NFLX', 'FB', 'IBM', 'ORCL', 'INTC'],
'Return': [0.12, 0.15, 0.10, 0.14, 0.20, 0.18, 0.11, 0.05, 0.07, 0.06],
'Volatility': [0.25, 0.30, 0.20, 0.22, 0.35, 0.32, 0.21, 0.15, 0.18, 0.17],
'Market_Cap': [2.3, 1.7, 1.5, 2.2, 0.8, 0.2, 1.0, 0.1, 0.3, 0.2]
}
# Convert to DataFrame
df = pd.DataFrame(data)
# Extract features for clustering
features = df[['Return', 'Volatility', 'Market_Cap']]
# Standardize the data
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)
# Apply KMeans clustering
num_clusters = 3
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
clusters = kmeans.fit_predict(scaled_features)
# Add cluster labels to the original data
df['Cluster'] = clusters
# Visualize the clusters
plt.figure(figsize=(8, 6))
plt.scatter(df['Return'], df['Volatility'], c=df['Cluster'], cmap='viridis',
s=df['Market_Cap'] * 100, alpha=0.6, edgecolors='w')
plt.xlabel('Return')
plt.ylabel('Volatility')
plt.title('Cluster Analysis of Financial Assets')
plt.colorbar(label='Cluster')
# Annotate points with asset names
for i, txt in enumerate(df['Asset']):
plt.annotate(txt, (df['Return'][i], df['Volatility'][i]))
plt.show()
# Print the resulting clusters
print(df)

OUTPUT:

Asset Return Volatility Market_Cap Cluster


0 AAPL 0.12 0.25 2.3 0
1 AMZN 0.15 0.30 1.7 2
2 GOOGL 0.10 0.20 1.5 0
3 MSFT 0.14 0.22 2.2 0
4 TSLA 0.20 0.35 0.8 2
5 NFLX 0.18 0.32 0.2 2
6 FB 0.11 0.21 1.0 1
7 IBM 0.05 0.15 0.1 1
8 ORCL 0.07 0.18 0.3 1
9 INTC 0.06 0.17 0.2 1
CODE:
!pip install yfinance
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from hmmlearn.hmm import GaussianHMM
import yfinance as yf
# Step 1: Download market data (e.g., S&P 500 index)
sp500 = yf.download('^GSPC', start='2010-01-01', end='2024-01-01') #
Access the 'Close' column instead of 'Adj Close'
#If 'Adj Close' is available, you can continue using that.
sp500 = sp500['Close']
# Step 2: Calculate daily returns
returns = sp500.pct_change().dropna()
# Step 3: Prepare data for HMM
X = returns.values.reshape(-1, 1)
# Step 4: Fit a Gaussian HMM with 2 regimes (Bull and Bear)
model = GaussianHMM(n_components=2, covariance_type='diag', n_iter=1000)
model.fit(X)
# Step 5: Predict the hidden states
hidden_states = model.predict(X)
# Step 6: Visualize the results
plt.figure(figsize=(10, 6))
plt.plot(sp500.index[1:], returns, label='Returns', color='gray', alpha=0.5)
colors = ['red' if state == 0 else 'green' for state in hidden_states]
plt.scatter(sp500.index[1:], returns, c=colors, s=10, alpha=0.8)
plt.title('Market Sentiment Analysis Using Markov Regime Switching Model')
plt.xlabel('Date')
plt.ylabel('Daily Returns')
plt.grid(True)
plt.show()
# Step 7: Print state probabilities
print("Transition Matrix:")
print(model.transmat_)
print("Means and Variances of Each State:")
for i in range(model.n_components):
print(f"State {i}: Mean = {model.means_[i][0]:.6f}, Variance =
{np.diag(model.covars_[i])[0]:.6f}")

OUTPUT:
Transition Matrix: [[0.96029722
0.03970278]
[0.01305629 0.98694371]]
Means and Variances of Each State:
State 0: Mean = -0.001040, Variance = 0.000362
State 1: Mean = 0.000966, Variance = 0.000049
CODE:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
# Step 1: Download market data (e.g., AAPL stock)
asset = 'AAPL'
start_date = '2020-01-01'
end_date = '2024-01-01'
data = yf.download(asset, start=start_date, end=end_date)
# Access the 'Close' column instead of 'Adj Close'
# If 'Adj Close' is available, you can continue using that.
data = data['Close']
# Step 2: Calculate moving averages
short_window = 50
long_window = 200
short_ma = data.rolling(window=short_window).mean()
long_ma = data.rolling(window=long_window).mean()
# Step 3: Generate trading signals
signal = pd.DataFrame(index=data.index)
signal['Signal'] = 0
signal['Signal'][short_window:] = np.where(short_ma[short_window:].values >
long_ma[short_window:].values, 1, 0).flatten()
# Step 4: Calculate positions
signal['Position'] = signal['Signal'].diff()
# Step 5: Backtest the strategy
initial_capital = 10000
positions = pd.DataFrame(index=signal.index)
positions[asset] = 100 * signal['Signal']
portfolio = positions.multiply(data, axis=0)
portfolio['Cash'] = initial_capital - (positions.diff() * data).sum(axis=1).cumsum()
portfolio['Total'] = portfolio.sum(axis=1)
# Step 6: Visualization
plt.figure(figsize=(10, 6))
plt.plot(data, label='Asset Price', alpha=0.5)
plt.plot(short_ma, label='50-day MA', alpha=0.75)
plt.plot(long_ma, label='200-day MA', alpha=0.75)
plt.plot(portfolio['Total'], label='Portfolio Value', color='purple')
plt.legend()
plt.title('Simple Moving Average Crossover Strategy')
# Mark buy and sell signals
buy_signals = signal[signal['Position'] == 1].index
sell_signals = signal[signal['Position'] == -1].index
# Use .loc to access data by index instead of column names
plt.plot(buy_signals, data.loc[buy_signals], '^', markersize=10, color='g', label='Buy
Signal', alpha=0.7)
plt.plot(sell_signals, data.loc[sell_signals], 'v', markersize=10, color='r', label='Sell
Signal', alpha=0.7)
plt.grid(True)
plt.legend()
plt.show()
# Step 7: Print final portfolio value
print(f"Final Portfolio Value: ${portfolio['Total'].iloc[-1]:.2f}")
OUTPUT:
CODE:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
# Step 1: Download market data (e.g., AAPL stock)
asset = 'AAPL'
start_date = '2020-01-01'
end_date = '2024-01-01'
data = yf.download(asset, start=start_date, end=end_date)
# Access the 'Close' column if 'Adj Close' is not available
# This ensures the code works even if 'Adj Close' is missing
data = data['Close']
# Step 2: Calculate moving averages
short_window = 50
long_window = 200
short_ma = data.rolling(window=short_window).mean()
long_ma = data.rolling(window=long_window).mean()
# Step 3: Generate trading signals
signal = pd.DataFrame(index=data.index)
signal['Signal'] = 0
signal['Signal'][short_window:] = np.where(short_ma[short_window:] >
long_ma[short_window:], 1, 0).flatten()
# Step 4: Calculate positions
signal['Position'] = signal['Signal'].diff()
# Step 5: Backtest the strategy with position sizing and stop-loss
initial_capital = 10000
position_size = 100 # Number of shares per trade
stop_loss_pct = 0.02 # 2% stop-loss
positions = pd.DataFrame(index=signal.index)
positions[asset] = position_size * signal['Signal']
portfolio = positions.multiply(data, axis=0)
# Calculate cash and total value with stop-loss handling
cash = initial_capital - (positions.diff() * data).sum(axis=1).cumsum()
portfolio['Cash'] = cash
portfolio['Total'] = portfolio.sum(axis=1)
# Implement stop-loss
high_watermark = data.cummax()
stop_loss_trigger = high_watermark * (1 - stop_loss_pct)
stop_loss_hit = data < stop_loss_trigger
# Use .loc with the index of stop_loss_hit to update the 'Total' column
portfolio.loc[stop_loss_hit.index, 'Total'] = cash[stop_loss_hit.index]
# Step 6: Visualization
plt.figure(figsize=(10, 6))
plt.plot(data, label='Asset Price', alpha=0.5)
plt.plot(short_ma, label='50-day MA', alpha=0.75)
plt.plot(long_ma, label='200-day MA', alpha=0.75)
plt.plot(portfolio['Total'], label='Portfolio Value', color='purple')
buy_signals = signal[signal['Position'] == 1].index
sell_signals = signal[signal['Position'] == -1].index
plt.plot(buy_signals, data.loc[buy_signals], '^', markersize=10, color='g', label='Buy
Signal', alpha=0.7)
plt.plot(sell_signals, data.loc[sell_signals], 'v', markersize=10, color='r', label='Sell
Signal', alpha=0.7)
plt.legend()
plt.grid(True)
plt.title('Advanced Risk Management Strategy with Stop-Loss')
plt.show()
# Step 7: Print final portfolio value
print(f"Final Portfolio Value: ${portfolio['Total'].iloc[-1]:.2f}")

OUTPUT:
CODE:
!pip install yfinance
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from hmmlearn.hmm import GaussianHMM
import yfinance as yf
# Step 1: Download market data (e.g., S&P 500 index)
sp500 = yf.download('^GSPC', start='2010-01-01', end='2024-01-01') #
Access the 'Close' column instead of 'Adj Close'
#If 'Adj Close' is available, you can continue using that.
sp500 = sp500['Close']
# Step 2: Calculate daily returns
returns = sp500.pct_change().dropna()
# Step 3: Prepare data for HMM
X = returns.values.reshape(-1, 1)
# Step 4: Fit a Gaussian HMM with 2 regimes (Bull and Bear)
model = GaussianHMM(n_components=2, covariance_type='diag', n_iter=1000)
model.fit(X)
# Step 5: Predict the hidden states
hidden_states = model.predict(X)
# Step 6: Visualize the results
plt.figure(figsize=(10, 6))
plt.plot(sp500.index[1:], returns, label='Returns', color='gray', alpha=0.5)
colors = ['red' if state == 0 else 'green' for state in hidden_states]
plt.scatter(sp500.index[1:], returns, c=colors, s=10, alpha=0.8)
plt.title('Market Sentiment Analysis Using Markov Regime Switching Model')
plt.xlabel('Date')
plt.ylabel('Daily Returns')
plt.grid(True)
plt.show()
# Step 7: Print state probabilities
print("Transition Matrix:")
print(model.transmat_)
print("Means and Variances of Each State:")
for i in range(model.n_components):
print(f"State {i}: Mean = {model.means_[i][0]:.6f}, Variance =
{np.diag(model.COVARS_[I])[0]:.6F}")

OUTPUT:
Transition Matrix: [[0.96029722
0.03970278]
[0.01305629 0.98694371]]
Means and Variances of Each State:
State 0: Mean = -0.001040, Variance = 0.000362
State 1: Mean = 0.000966, Variance = 0.000049
CODE:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
# Step 1: Generate synthetic data for demonstration
np.random.seed(42)
num_samples = 1000
# Features: transaction amount, transaction time, customer age, account balance
data = {
'Transaction_Amount': np.random.uniform(10, 1000, num_samples),
'Transaction_Time': np.random.uniform(0, 24, num_samples),
'Customer_Age': np.random.uniform(18, 70, num_samples),
'Account_Balance': np.random.uniform(100, 10000, num_samples),
'Is_Fraud': np.random.choice([0, 1], size=num_samples, p=[0.95, 0.05])
}
# Convert to DataFrame
df = pd.DataFrame(data)
# Step 2: Visual analytics to explore data
sns.pairplot(df, hue='Is_Fraud', palette='coolwarm')
plt.show()
# Step 3: Split the data into training and testing sets
X = df.drop('Is_Fraud', axis=1)
y = df['Is_Fraud']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=42)
# Step 4: Train a Random Forest Classifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
# Step 5: Make predictions
y_pred = model.predict(X_test)
# Step 6: Evaluate the model
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
# Step 7: Visualize feature importance
feature_importances = model.feature_importances_
features = X.columns
plt.figure(figsize=(8, 6))
plt.barh(features, feature_importances, color='skyblue')
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.title('Feature Importance')
plt.show()
# Step 8: Visualize fraudulent patterns
fraud_data = df[df['Is_Fraud'] == 1]
non_fraud_data = df[df['Is_Fraud'] == 0]
plt.figure(figsize=(10, 6))
plt.scatter(non_fraud_data['Transaction_Amount'],
non_fraud_data['Account_Balance'], alpha=0.5, label='Non-Fraud', color='blue')
plt.scatter(fraud_data['Transaction_Amount'], fraud_data['Account_Balance'],
alpha=0.7, label='Fraud', color='red')

plt.xlabel('Transaction Amount')
plt.ylabel('Account Balance')
plt.title('Fraudulent Patterns')
plt.legend()
plt.grid(True)
plt.show()

OUTPUT:
CODE:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
# Step 1: Fetch historical market data
symbol = 'AAPL'
start_date = '2020-01-01'
end_date = '2024-01-01'
data = yf.download(symbol, start=start_date, end=end_date)
# Check if 'Adj Close' column exists, if not, use 'Close'
if 'Adj Close' in data.columns:
close_col = 'Adj Close'
else:
close_col = 'Close'
# Step 2: Feature engineering
data['Return'] = data[close_col].pct_change() # Use close_col instead of 'Adj Close'
data['SMA_50'] = data[close_col].rolling(window=50).mean() # Use close_col instead
of 'Adj Close'
data['SMA_200'] = data[close_col].rolling(window=200).mean() # Use close_col
instead of 'Adj Close'
data['Signal'] = np.where(data['SMA_50'] > data['SMA_200'], 1, 0) #
Remove NaN values
data = data.dropna()
# Step 3: Prepare the data for machine learning
features = ['Return', 'SMA_50', 'SMA_200']
X = data[features]
y = data['Signal']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=42)
# Step 4: Train a Random Forest Classifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
# Step 5: Predict signals and evaluate
predictions = model.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, predictions))
# Step 6: Backtest the strategy
data['Predicted_Signal'] = model.predict(X)
data['Strategy_Return'] = data['Return'] * data['Predicted_Signal'].shift(1)
# Step 7: Cumulative returns
cumulative_market = (1 + data['Return']).cumprod()
cumulative_strategy = (1 + data['Strategy_Return']).cumprod()
plt.figure(figsize=(10, 6))
plt.plot(cumulative_market, label='Market', color='blue')
plt.plot(cumulative_strategy, label='Strategy', color='green')
plt.legend()
plt.title('Market vs Strategy Cumulative Returns')
plt.grid(True)
plt.show()
OUTPUT:

Classification Report:
precision recall f1-score support
0 0.96 0.98 0.97 55
1 0.99 0.99 0.99 188
accuracy 0.99 243
macro avg 0.98 0.99 0.98 243
weighted avg 0.99 0.99 0.99 243
CODE:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, precision_score,
recall_score, f1_score
# Step 1: Fetch historical market data
symbol = 'AAPL'
start_date = '2020-01-01'
end_date = '2024-01-01'
data = yf.download(symbol, start=start_date, end=end_date)
# Check if 'Adj Close' column exists, if not, use 'Close'
if 'Adj Close' in data.columns:
close_col = 'Adj Close'
else:
close_col = 'Close'
# Step 2: Feature engineering
data['Return'] = data[close_col].pct_change() # Use close_col instead of 'Adj Close'
data['SMA_50'] = data[close_col].rolling(window=50).mean() # Use close_col instead
of 'Adj Close'
data['SMA_200'] = data[close_col].rolling(window=200).mean() # Use close_col
instead of 'Adj Close'
data['Signal'] = np.where(data['SMA_50'] > data['SMA_200'], 1, 0) #
Remove NaN values
data = data.dropna()
# Step 3: Prepare the data for machine learning
features = ['Return', 'SMA_50', 'SMA_200']
X = data[features]
y = data['Signal']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=42)
# Step 4: Train a Random Forest Classifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
# Step 5: Predict signals and evaluate
predictions = model.predict(X_test)
# Step 6: Backtest the strategy
data['Predicted_Signal'] = model.predict(X)
data['Strategy_Return'] = data['Return'] * data['Predicted_Signal'].shift(1)
# Step 7: Cumulative returns
cumulative_market = (1 + data['Return']).cumprod()
cumulative_strategy = (1 + data['Strategy_Return']).cumprod()
plt.figure(figsize=(10, 6))
plt.plot(cumulative_market, label='Market', color='blue')
plt.plot(cumulative_strategy, label='Strategy', color='green')
plt.legend()
plt.title('Market vs Strategy Cumulative Returns')
plt.grid(True)
plt.show()
# Step 8: Performance evaluation
accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions)
recall = recall_score(y_test, predictions)

f1 = f1_score(y_test, predictions)
print("Classification Report:")
print(classification_report(y_test, predictions))
print("\nPerformance Metrics:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

OUTPUT:
Classification Report:
precision recall f1-score support
0 0.96 0.98 0.97 55
1 0.99 0.99 0.99 188
accuracy 0.99 243
macro avg 0.98 0.99 0.98 243
weighted avg 0.99 0.99 0.99 243
Performance Metrics:
Accuracy: 0.99
Precision: 0.99
Recall: 0.99
F1 Score: 0.99
CODE:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
# Step 1: Generate synthetic data
np.random.seed(42)
num_samples = 1000
data = {
'Transaction_Amount': np.random.uniform(10, 1000, num_samples),
'Transaction_Time': np.random.uniform(0, 24, num_samples),
'Customer_Age': np.random.uniform(18, 70, num_samples),
'Account_Balance': np.random.uniform(100, 10000, num_samples),
'Is_Fraud': np.random.choice([0, 1], size=num_samples, p=[0.95, 0.05])
}
# Convert to DataFrame
df = pd.DataFrame(data)
# Step 2: Visual analytics
sns.pairplot(df, hue='Is_Fraud', palette='coolwarm')
plt.show()
# Step 3: Split the data
X = df.drop('Is_Fraud', axis=1)
y = df['Is_Fraud']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=42)

# Step 4: Train the model


model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
# Step 5: Make predictions
y_pred = model.predict(X_test)
# Step 6: Evaluate the model
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)
print("\nAccuracy:")
print(f"{accuracy:.2f}")
# Step 7: Feature importance
feature_importances = model.feature_importances_
features = X.columns
plt.figure(figsize=(8, 6))
plt.barh(features, feature_importances, color='skyblue')
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.title('Feature Importance')
plt.show()
# Step 8: Visualize fraudulent patterns
fraud_data = df[df['Is_Fraud'] == 1]
non_fraud_data = df[df['Is_Fraud'] == 0]
plt.figure(figsize=(10, 6))

plt.scatter(non_fraud_data['Transaction_Amount'],
non_fraud_data['Account_Balance'], alpha=0.5, label='Non-Fraud', color='blue')
plt.scatter(fraud_data['Transaction_Amount'], fraud_data['Account_Balance'],
alpha=0.7, label='Fraud', color='red')
plt.xlabel('Transaction Amount')
plt.ylabel('Account Balance')
plt.title('Fraudulent Patterns')
plt.legend()
plt.grid(True)
plt.show()

OUTPUT:

Confusion Matrix:
[[283 0]

[ 17 0]]
Classification Report:
precision recall f1-score support
0 0.94 1.00 0.97 283
1 0.00 0.00 0.00 17
accuracy 0.94 300
macro avg 0.47 0.50 0.49 300
weighted avg 0.89 0.94 0.92 300
Accuracy: 0.94

You might also like