0% found this document useful (0 votes)
25 views10 pages

Da Programs

Uploaded by

ailurophileas24
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
25 views10 pages

Da Programs

Uploaded by

ailurophileas24
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 10

1.

Implement word count / frequency programs


import string

def word_count(file_path):

# Initialize an empty dictionary to store word counts

word_counts = {}

try:

# Open the file in read mode

with open(file_path, 'r') as file:

# Read the entire content of the file

text = file.read()

# Remove punctuation and convert text to lowercase

text = text.translate(str.maketrans('', '', string.punctuation)).lower()

# Split the text into individual words

words = text.split()

# Count the frequency of each word

for word in words:

if word in word_counts:

word_counts[word] += 1

else:

word_counts[word] = 1

return word_counts

except FileNotFoundError:

print(f"Error: The file at {file_path} was not found.")

return None
# Example usage

file_path = 'example.txt' # Replace this with the path to your text file

word_counts = word_count(file_path)

# Display the word counts if the dictionary is populated

if word_counts is not None:

for word, count in word_counts.items():

print(f"{word}: {count}")

OUTPUT

2.Implement an python program that processes a weather dataset

import pandas as pd

# Read CSV file into a pandas DataFrame

df = pd.read_csv('weather.csv')
# Print the DataFrame to see the content

print(df)

output

3. Implement SVM / Decision tree classification techniques.

3 a) import matplotlib.pyplot as plt

from sklearn import datasets, svm

from sklearn.inspection import DecisionBoundaryDisplay

# import some data to play with

iris = datasets.load_iris()

# Take the first two features. We could avoid this by using a two-dim dataset

X = iris.data[:, :2]

y = iris.target
# we create an instance of SVM and fit out data. We do not scale our

# data since we want to plot the support vectors

C = 1.0 # SVM regularization parameter

models = (

svm.SVC(kernel="linear", C=C),

svm.LinearSVC(C=C, max_iter=10000),

svm.SVC(kernel="rbf", gamma=0.7, C=C),

svm.SVC(kernel="poly", degree=3, gamma="auto", C=C),

models = (clf.fit(X, y) for clf in models)

# title for the plots

titles = (

"SVC with linear kernel",

"LinearSVC (linear kernel)",

"SVC with RBF kernel",

"SVC with polynomial (degree 3) kernel",

# Set-up 2x2 grid for plotting.

fig, sub = plt.subplots(2, 2)

plt.subplots_adjust(wspace=0.4, hspace=0.4)

X0, X1 = X[:, 0], X[:, 1]

for clf, title, ax in zip(models, titles, sub.flatten()):

disp = DecisionBoundaryDisplay.from_estimator(

clf,

X,

response_method="predict",

cmap=plt.cm.coolwarm,
alpha=0.8,

ax=ax,

xlabel=iris.feature_names[0],

ylabel=iris.feature_names[1],

ax.scatter(X0, X1, c=y, cmap=plt.cm.coolwarm, s=20, edgecolors="k")

ax.set_xticks(())

ax.set_yticks(())

ax.set_title(title)

plt.show()

3b) Decision tree


# Import necessary libraries

from sklearn.datasets import load_iris

from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score


# Load the Iris dataset

data = load_iris()

X = data.data # Features

y = data.target # Labels

# Split the data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create and train the Decision Tree Classifier with optimized hyperparameters

clf = DecisionTreeClassifier(

criterion='entropy', # Use 'entropy' for Information Gain

max_depth=5, # Limit the depth of the tree

random_state=42

clf.fit(X_train, y_train)

# Make predictions on the test set

y_pred = clf.predict(X_test)

# Evaluate the performance

accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

# Optional: Visualize the Decision Tree

from sklearn.tree import plot_tree

import matplotlib.pyplot as plt

plt.figure(figsize=(12, 8))

plot_tree(clf, feature_names=data.feature_names, class_names=data.target_names, filled=True)

plt.show()
4. Visualize data using any plotting framework

import seaborn as sns

import matplotlib.pyplot as plt

from sklearn.datasets import load_iris

import pandas as pd

# Load the Iris dataset

data = load_iris()

# Convert it into a pandas DataFrame for easier visualization

df = pd.DataFrame(data.data, columns=data.feature_names)
df['species'] = data.target

df['species'] = df['species'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})

# Pairplot to visualize relationships between features

sns.pairplot(df, hue='species', diag_kind='kde', corner=True)

plt.show()

# Heatmap to visualize feature correlations

plt.figure(figsize=(8, 6))

sns.heatmap(df.iloc[:, :-1].corr(), annot=True, cmap='coolwarm')

plt.title("Feature Correlation Heatmap")

plt.show()

# Boxplot to show feature distributions

plt.figure(figsize=(10, 6))

sns.boxplot(data=df.iloc[:, :-1], orient='h', palette='Set2')

plt.title("Boxplot of Features")

plt.show()

# Scatter plot for specific features

plt.figure(figsize=(8, 6))

sns.scatterplot(

data=df,

x='sepal length (cm)',

y='petal length (cm)',

hue='species',

style='species',

palette='deep',

s=100

plt.title("Scatterplot of Sepal Length vs Petal Length")


plt.show()

You might also like