Batch 2
Batch 2
PROGRAM:
import re
def find_all_digits_in_string(input_string):
pattern = r'\d'
matches = re.findall(pattern, input_string)
if matches:
print(f'Digits found: {", ".join(matches)}')
else:
print('No digits found in the string.')
user_input = input('Enter a string: ')
find_all_digits_in_string(user_input)
OUTPUT:
2. Write a Python program to find the occurrence and position of substrings within a string
def find_substring_occurrences(main_string, substring):
occurrences = []
start_position = 0
while True:
if position == -1:
break
occurrences.append(position)
start_position = position + 1
return occurrences
def main():
if positions:
else:
if __name__ == "__main__":
main()
OUTPUT:
3. Write a Python program that takes a string with some words. For two consecutive words in the said string,
check whether the first word ends with a vowel and the next word begins with a vowel. If the program
meets the condition, return true, otherwise false. Only one space is allowed between the words.
PROGRAM:
import re
def check_vowel_condition(input_string):
word_pattern = r'\b\w+\b'
first_word = words[i]
second_word = words[i + 1]
return False
result = check_vowel_condition(user_input)
print(result)
OUTPUT:
4. Write a python program to find the frequency of each word from a text file using NLTK?
import re
from collections import Counter
from nltk.corpus import stopwords
def most_frequent_non_stop_words(text, num_words=10):
clean_text = re.sub(r'[^\w\s]', '', text.lower())
words = clean_text.split()
stop_words = set(stopwords.words('english'))
word_counts = Counter([word for word in words if word not in stop_words])
most_common_words = word_counts.most_common(num_words)
return most_common_words
text = """Natural language processing (NLP) is a subfield of artificial intelligence that focuses on the interaction
between computers and humans through natural language. The ultimate goal of NLP is to read, decipher, understand,
and make sense of human language in a way that is both valuable and meaningful. NLP techniques are used to analyze
text, sentiment, language structure, and more. These techniques involve various tasks such as text classification,
named entity recognition, machine translation, and text generation.
Stop words are common words that are often removed from text during NLP preprocessing. They typically
include words like "the", "and", "in", "is", "of", "on", etc."""
top_words = most_frequent_non_stop_words(text, num_words=10)
pprint(top_words)
[('language', 4),
('nlp', 4),
('text', 4),
('words', 3),
('natural', 2),
('techniques', 2),
('processing', 1),
('subfield', 1),
('artificial', 1),
('intelligence', 1)]
5. Write a NLTK program for text classification using naïve bayes algorithm
PROGRAM:
pip install nltk
import nltk
from nltk.corpus import movie_reviews
from nltk.classify import NaiveBayesClassifier
from nltk.classify.util import accuracy
nltk.download('movie_reviews')
documents = [(list(movie_reviews.words(fileid)), category)
for category in movie_reviews.categories()
for fileid in movie_reviews.fileids(category)]
import random
random.shuffle(documents)
def document_features(document):
document_words = set(document)
features = {}
for word in word_features:
features[word] = (word in document_words)
return features
all_words = nltk.FreqDist(w.lower() for w in movie_reviews.words())
word_features = list(all_words.keys())[:2000]
featuresets = [(document_features(d), c) for (d, c) in documents]
train_set, test_set = featuresets[:1500], featuresets[1500:]
classifier = NaiveBayesClassifier.train(train_set)
accuracy_score = accuracy(classifier, test_set)
print(f'Accuracy: {accuracy_score:.2%}')
OUTPUT:
6. Write a Python NLTK program to get a list of common stop words in various languages in Python
import nltk
def get_stopwords(language):
stop_words = set(stopwords.words(language))
return stop_words
def main():
# Specify the languages for which you want to get stop words
stop_words = get_stopwords(language)
if __name__ == "__main__":
main()
OUTPUT:
i, me, my, myself, we, our, ours, ourselves, you, you're, you've, ...
7. Write a Python program to generate Bigrams of words from a given list of strings
PROGRAM:
import nltk
def generate_bigrams(strings):
return bigram_list
result = generate_bigrams(input_strings)
8. Write a Python NLTK program to get the overview of the tagset, details of a specific tag in the tagset and
details on several related tagsets, using regular expression.
import nltk
import re
nltk.download('tagsets')
def get_tagset_overview(tagset):
tags = nltk.help.upenn_tagset(tagset)
print(tags)
def get_tag_details(tag):
tag_info = nltk.help.upenn_tagset(tag)
print(tag_info)
def get_related_tagsets(tag):
related_tagsets = nltk.help.brown_tagset(tag)
print(related_tagsets)
def main():
tagset = 'upenn'
specific_tag = 'NN'
get_tagset_overview(tagset)
get_tag_details(specific_tag)
get_related_tagsets(specific_tag)
if __name__ == "__main__":
main()
output:
9.Write a Python program to count the occurrences of each word in a given sentence
PROGRAM:
from collections import Counter
import string
def count_word_occurrences(sentence):
words = sentence.split()
word_counts = Counter(words)
return word_counts
result = count_word_occurrences(input_sentence)
print(f"{word}: {count}")
OUTPUT:
10. Write a Python NLTK program to compare the similarity of two given nouns.
OUTPUT:
11. Write a function that finds the 50 most frequently occurring words of a text that are not stop words.
LAB EXP NO 5
LAX EXP 6
13. Write a python Program to implement your own word2vec(skip-gram) model in Python
LAB EXP NO 6
14. Write to program to find the Odd Word amongst given words using Word2Vec embeddings
18. Write a program to convert Text to Speech in python using win32.com client
19. Write a Program to Convert PDF File Text to Audio Speech using Python
import PyPDF2
import os
pdf_reader = PyPDF2.PdfReader(file)
text = ''
page = pdf_reader.pages[page_num]
text += page.extract_text()
output_path = 'output.mp3'
tts.save(output_path)
# Print information
LAB EXP 20