0% found this document useful (0 votes)
13 views

Da Programs

Uploaded by

ailurophileas24
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
13 views

Da Programs

Uploaded by

ailurophileas24
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 10

1.

Implement word count / frequency programs


import string

def word_count(file_path):

# Initialize an empty dictionary to store word counts

word_counts = {}

try:

# Open the file in read mode

with open(file_path, 'r') as file:

# Read the entire content of the file

text = file.read()

# Remove punctuation and convert text to lowercase

text = text.translate(str.maketrans('', '', string.punctuation)).lower()

# Split the text into individual words

words = text.split()

# Count the frequency of each word

for word in words:

if word in word_counts:

word_counts[word] += 1

else:

word_counts[word] = 1

return word_counts

except FileNotFoundError:

print(f"Error: The file at {file_path} was not found.")

return None
# Example usage

file_path = 'example.txt' # Replace this with the path to your text file

word_counts = word_count(file_path)

# Display the word counts if the dictionary is populated

if word_counts is not None:

for word, count in word_counts.items():

print(f"{word}: {count}")

OUTPUT

2.Implement an python program that processes a weather dataset

import pandas as pd

# Read CSV file into a pandas DataFrame

df = pd.read_csv('weather.csv')
# Print the DataFrame to see the content

print(df)

output

3. Implement SVM / Decision tree classification techniques.

3 a) import matplotlib.pyplot as plt

from sklearn import datasets, svm

from sklearn.inspection import DecisionBoundaryDisplay

# import some data to play with

iris = datasets.load_iris()

# Take the first two features. We could avoid this by using a two-dim dataset

X = iris.data[:, :2]

y = iris.target
# we create an instance of SVM and fit out data. We do not scale our

# data since we want to plot the support vectors

C = 1.0 # SVM regularization parameter

models = (

svm.SVC(kernel="linear", C=C),

svm.LinearSVC(C=C, max_iter=10000),

svm.SVC(kernel="rbf", gamma=0.7, C=C),

svm.SVC(kernel="poly", degree=3, gamma="auto", C=C),

models = (clf.fit(X, y) for clf in models)

# title for the plots

titles = (

"SVC with linear kernel",

"LinearSVC (linear kernel)",

"SVC with RBF kernel",

"SVC with polynomial (degree 3) kernel",

# Set-up 2x2 grid for plotting.

fig, sub = plt.subplots(2, 2)

plt.subplots_adjust(wspace=0.4, hspace=0.4)

X0, X1 = X[:, 0], X[:, 1]

for clf, title, ax in zip(models, titles, sub.flatten()):

disp = DecisionBoundaryDisplay.from_estimator(

clf,

X,

response_method="predict",

cmap=plt.cm.coolwarm,
alpha=0.8,

ax=ax,

xlabel=iris.feature_names[0],

ylabel=iris.feature_names[1],

ax.scatter(X0, X1, c=y, cmap=plt.cm.coolwarm, s=20, edgecolors="k")

ax.set_xticks(())

ax.set_yticks(())

ax.set_title(title)

plt.show()

3b) Decision tree


# Import necessary libraries

from sklearn.datasets import load_iris

from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score


# Load the Iris dataset

data = load_iris()

X = data.data # Features

y = data.target # Labels

# Split the data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create and train the Decision Tree Classifier with optimized hyperparameters

clf = DecisionTreeClassifier(

criterion='entropy', # Use 'entropy' for Information Gain

max_depth=5, # Limit the depth of the tree

random_state=42

clf.fit(X_train, y_train)

# Make predictions on the test set

y_pred = clf.predict(X_test)

# Evaluate the performance

accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

# Optional: Visualize the Decision Tree

from sklearn.tree import plot_tree

import matplotlib.pyplot as plt

plt.figure(figsize=(12, 8))

plot_tree(clf, feature_names=data.feature_names, class_names=data.target_names, filled=True)

plt.show()
4. Visualize data using any plotting framework

import seaborn as sns

import matplotlib.pyplot as plt

from sklearn.datasets import load_iris

import pandas as pd

# Load the Iris dataset

data = load_iris()

# Convert it into a pandas DataFrame for easier visualization

df = pd.DataFrame(data.data, columns=data.feature_names)
df['species'] = data.target

df['species'] = df['species'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})

# Pairplot to visualize relationships between features

sns.pairplot(df, hue='species', diag_kind='kde', corner=True)

plt.show()

# Heatmap to visualize feature correlations

plt.figure(figsize=(8, 6))

sns.heatmap(df.iloc[:, :-1].corr(), annot=True, cmap='coolwarm')

plt.title("Feature Correlation Heatmap")

plt.show()

# Boxplot to show feature distributions

plt.figure(figsize=(10, 6))

sns.boxplot(data=df.iloc[:, :-1], orient='h', palette='Set2')

plt.title("Boxplot of Features")

plt.show()

# Scatter plot for specific features

plt.figure(figsize=(8, 6))

sns.scatterplot(

data=df,

x='sepal length (cm)',

y='petal length (cm)',

hue='species',

style='species',

palette='deep',

s=100

plt.title("Scatterplot of Sepal Length vs Petal Length")


plt.show()

You might also like