Vansh
Vansh
UNIVERSITY
IT-205n
PRACTICAL FILE
DATA SCIENCE
AND
VISUALIZATION
Code:
def fibonacci(n):
fib_sequence = [0, 1]
while len(fib_sequence) < n:
fib_sequence.append(fib_sequence[-1] + fib_sequence[-2])
return fib_sequence
n = 10
print(f"First {n} Fibonacci numbers: {fibonacci(n)}")
def bubble_sort(arr):
n = len(arr)
for i in range(n):
for j in range(0, n-i-1):
if arr[j] > arr[j+1]:
arr[j], arr[j+1] = arr[j+1], arr[j]
return arr
arr = [64, 34, 25, 12, 22, 11, 90]
sorted_arr = bubble_sort(arr)
print("Sorted array:", sorted_arr)
Output:
Code:
import pandas as pd
from ucimlrepo import fetch_ucirepo
data = rice_dataset.data.features
target = rice_dataset.data.targets
Output:
Code:
import pandas as pd
from ucimlrepo import fetch_ucirepo
rice_cammeo_and_osmancik = fetch_ucirepo(id=545)
X = rice_cammeo_and_osmancik.data.features
y = rice_cammeo_and_osmancik.data.targets
df = X.copy()
df['Target'] = y
df=df.fillna(df.drop(columns=['Target']).median())
plt.figure(figsize=(8, 6))
sns.scatterplot(x='sepal_length', y='sepal_width', hue='class', data=dataset)
plt.title("Scatter Plot of Sepal Length vs Sepal Width")
plt.show()
plt.figure(figsize=(8, 6))
dataset['sepal_length'].hist(bins=20)
plt.title("Histogram of Sepal Length")
plt.xlabel("Sepal Length")
plt.ylabel("Frequency")
plt.show()
plt.figure(figsize=(8, 6))
sns.boxplot(x='class', y='sepal_length', data=dataset)
plt.title("Box Plot of Sepal Length by Class")
plt.show()
sns.pairplot(dataset, hue='class')
plt.show()
Output:
rice_cammeo_and_osmancik = fetch_ucirepo(id=545)
X = rice_cammeo_and_osmancik.data.features
y = rice_cammeo_and_osmancik.data.targets
df = pd.DataFrame(X)
df['Target'] = y
feature_to_analyze = 'Area'
stats = statistical_analysis(df, feature_to_analyze)
print(f'Statistical Analysis for {feature_to_analyze}:')
for stat, value in stats.items():
print(f'{stat}: {value}')
Output:
def load_data():
data = load_iris()
X = data.data
y = data.target
return X, y
def perform_classification_experiment():
X, y = load_data()
naive_bayes_classification(X, y)
random_forest_classification(X, y)
Output:
data = fetch_california_housing()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
Code:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from scipy.cluster.hierarchy import dendrogram, linkage
data = load_iris()
X = pd.DataFrame(data.data, columns=data.feature_names)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=kmeans_labels, cmap='viridis',
marker='o')
plt.title("K-means Clustering")
plt.xlabel(data.feature_names[0])
plt.ylabel(data.feature_names[1])
plt.subplot(1, 2, 2)
Z = linkage(X_scaled, method='ward')
dendrogram(Z)
plt.title("Hierarchical Agglomerative Clustering (Dendrogram)")
plt.xlabel("Sample Index")
plt.ylabel("Distance")
plt.tight_layout()
plt.show()
Output:
Objective: Write a program to perform time series analysis for a given dataset.
Code:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.holtwinters import ExponentialSmoothing
# Step 1: Load the Household Power Consumption dataset from UCI repository
url = 'https://archive.ics.uci.edu/ml/machine-learning-
databases/00235/household_power_consumption.zip'
data = pd.read_csv(url, sep=';', parse_dates={'DateTime': ['Date', 'Time']},
infer_datetime_format=True, na_values=['?'], low_memory=False)
Objective: Write a program to perform association rule mining for a given dataset.
Code:
import pandas as pd
from mlxtend.frequent_patterns import fpgrowth, association_rules