0% found this document useful (0 votes)
2 views15 pages

Gen AI Micro

The document contains multiple Python programs utilizing various libraries such as Gensim, Matplotlib, and Transformers for tasks related to word vector analysis, sentiment analysis, summarization, and information extraction. Each program demonstrates different functionalities, including exploring word relationships, visualizing word embeddings, generating sentences, and extracting structured information from text. The programs showcase practical applications of natural language processing techniques and machine learning models.

Uploaded by

Vaishnavi Y. U
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
2 views15 pages

Gen AI Micro

The document contains multiple Python programs utilizing various libraries such as Gensim, Matplotlib, and Transformers for tasks related to word vector analysis, sentiment analysis, summarization, and information extraction. Each program demonstrates different functionalities, including exploring word relationships, visualizing word embeddings, generating sentences, and extracting structured information from text. The programs showcase practical applications of natural language processing techniques and machine learning models.

Uploaded by

Vaishnavi Y. U
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 15

Program 1

!pip install gensim


numpy

import gensim.downloader as api

import numpy as np

from numpy.linalg import norm

print("Loading pre-trained word


vectors...")
word_vectors = api.load("word2vec-google-news-300")

def explore_word_relationships(word1, word2, word3):


try:
vec1 = word_vectors[word1]
vec2 = word_vectors[word2]
vec3 =
word_vectors[word3]

result_vector = vec1 - vec2 + vec3

similar_words = word_vectors.similar_by_vector(result_vector,
topn=10)
input_words = {word1, word2, word3}
filtered_words = [(word, similarity) for word, similarity in
similar_words if word not in input_words]

print(f"\nWord Relationship: {word1} - {word2} + {word3}")


print("Most similar words to the result (excluding input words):")
for word, similarity in filtered_words[:5]:
print(f"{word}: {similarity:.4f}")

except KeyError as e:
print(f"Error: {e} not found in the vocabulary.")

explore_word_relationships("king", "man", "woman")


explore_word_relationships("paris", "france", "germany")
explore_word_relationships("apple", "fruit", "carrot")

def analyze_similarity(word1, word2):


try:
similarity = word_vectors.similarity(word1, word2) print(f"\
nSimilarity between '{word1}' and '{word2}': {similarity:.4f}") except
KeyError as e:
print(f"Error: {e} not found in the vocabulary.")

analyze_similarity("cat", "dog")
analyze_similarity("computer", "keyboard")
analyze_similarity("music", "art")

def find_most_similar(word):
try:
similar_words = word_vectors.most_similar(word, topn=5)
print(f"\nMost similar words to '{word}':")
for similar_word, similarity in similar_words:
print(f"{similar_word}: {similarity:.4f}")
except KeyError as e:
print(f"Error: {e} not found in the vocabulary.")

find_most_similar("happy")
find_most_similar("sad")
find_most_similar("technology")
Program 2

!pip install gensim matplotlib scikit-learn numpy


import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from gensim.downloader import load
import numpy as np # Import NumPy for array conversion
word_vectors = load('glove-wiki-gigaword-100')
tech_words = ['computer', 'internet', 'software', 'hardware', 'network', 'data',
'cloud', 'robot', 'algorithm', 'technology']

tech_words = [word for word in tech_words if word in word_vectors.key_to_index]

vectors = np.array([word_vectors[word] for word in tech_words])


tsne = TSNE(n_components=2, random_state=42, perplexity=5) # Perplexity is
reduced to match the small sample size
reduced_vectors = tsne.fit_transform(vectors)
plt.figure(figsize=(10, 6))
for i, word in enumerate(tech_words):
plt.scatter(reduced_vectors[I,0],reduced_vectors[i,1],label=word)
plt.text(reduced_vectors[i, 0] + 0.02, reduced_vectors[i, 1] + 0.02, word,
fontsize=12)
plt.title("t-SNE Visualization of Technology Words")
plt.xlabel("Dimension 1")
plt.ylabel("Dimension 2")
plt.legend()
plt.show()

input_word = 'computer'
if input_word in word_vectors.key_to_index:
similar_words = word_vectors.most_similar(input_word, topn=5)
print(f"5 words similar to '{input_word}':")
for word, similarity in similar_words:
print(f"{word} (similarity: {similarity:.2f})")
else:
print(f"'{input_word}' is not in the vocabulary.")
Program 3

!pip install gensim matplotlib


from gensim.models import Word2Vec
from gensim.models.word2vec import LineSentence
import matplotlib.pyplot as plt
from sklearn.manifold import
TSNE import numpy as np
medical_corpus = [
“Ten sentences” ]
processed_corpus = [sentence.lower().split() for sentence in
medical_corpus]

print("Training Word2Vec model...")


model = Word2Vec(sentences=processed_corpus, vector_size=100,
window=5, min_count=1, workers=4, epochs=50)
print("Model training complete!")

words = list(model.wv.index_to_key)
embeddings = np.array([model.wv[word] for word in words])

tsne = TSNE(n_components=2, random_state=42, perplexity=5,


n_iter=300)
tsne_result = tsne.fit_transform(embeddings)

plt.figure(figsize=(10, 8))
plt.scatter(tsne_result[:, 0], tsne_result[:, 1], color="blue")
for i, word in enumerate(words):
plt.text(tsne_result[i, 0] + 0.02, tsne_result[i, 1] + 0.02, word,
fontsize=12)
plt.title("Word Embeddings Visualization (Medical
Domain)")
plt.xlabel("Dimension 1")
plt.ylabel("Dimension 2")
plt.grid(True)
plt.show()

def find_similar_words(input_word, top_n=5):


try:
similar_words = model.wv.most_similar(input_word, topn=top_n)
print(f"Words similar to '{input_word}':")
for word, similarity in similar_words:
print(f" {word} ({similarity:.2f})")
except KeyError:
print(f"'{input_word}' not found in vocabulary.")

find_similar_words("treatment")
find_similar_words("vaccine")
Program 4

word_embeddings = {

"ai": ["machine learning", "deep learning", "data

science"], "data": ["information", "dataset", "analytics"],

"learning": ["education", "training", "knowledge"],

"robot": ["automation", "machine", "mechanism"]

def find_similar_words(word):

if word in word_embeddings:

return word_embeddings[word]

else: return

def enrich_prompt(prompt):

words = prompt.lower().split() enriched_words = []


for word in words:

similar_words = find_similar_words(word)

if similar_words:

enriched_words.append(f"{word} ({',

'.join(similar_words)})")

else:

enriched_words.append(word) return "

".join(enriched_words)

original_prompt = "Explain AI and its applications in science."

enriched_prompt = enrich_prompt(original_prompt)

print("Original Prompt:")

print(original_prompt)
print("\nEnriched Prompt:")

print(enriched_prompt)

PROGRAM 5

import gensim.downloader
as api import random
import nltk
from nltk.tokenize import sent_tokenize

nltk.download('punkt')
print("Loading pre-trained word
vectors.")
word_vectors = api.load("glove-wiki-gigaword-100")
print("Word vectors loaded successfully!")

def get_similar_words(seed_word, top_n=5):


"""Retrieve top-N similar words for a given seed
word.""" try:
similar_words = word_vectors.most_similar(seed_word, topn=top_n)
return [word[0] for word in similar_words]
except KeyError:
print(f"'{seed_word}' not found in vocabulary. Try another
word.")
return []

def generate_sentence(seed_word, similar_words):


"""Create a meaningful sentence using the seed word and its similar words."""
sentence_templates = [
f"The {seed_word} was surrounded by {similar_words[0]} and
{similar_words[1]}.", f"People often associate {seed_word} with
{similar_words[2]} and
{similar_words[3]}.",
f"In the land of {seed_word}, {similar_words[4]} was a common sight.",
f"A story about {seed_word} would be incomplete without {similar_words[1]}
and
{similar_words[3]}.", ]
return random.choice(sentence_templates)

def generate_paragraph(seed_word):
"""Construct a creative paragraph using the seed word and similar words."""
similar_words = get_similar_words(seed_word, top_n=5)

if not similar_words:
return "Could not generate a paragraph. Try another seed word."

paragraph = [generate_sentence(seed_word, similar_words) for _ in


range(4)] return " ".join(paragraph)

seed_word = input("Enter a seed


word:")
paragraph=generate_paragraph(seed_wo
rd)
print("\nGenerated Paragraph:\n")
print(paragraph)
PROGRAM 6

!pip install transformers


import pipeline

print("🔍 Loading Sentiment Analysis Model...")

sentiment_analyzer = pipeline("sentiment-analysis")

def analyze_sentiment(text):

"""

Analyze the sentiment of a given text input.


Args:

text (str): Input sentence or paragraph.


Returns:

dict: Sentiment label and confidence score.

"""

result = sentiment_analyzer(text)[0]

label = result['label']

score = result['score']

print(f"\n📝 Input Text: {text}")

print(f"📊 Sentiment: {label} (Confidence: {score:.4f})\n")

return result

customer_reviews = [

"It was an average experience, nothing special.",

"Absolutely fantastic quality! Highly recommended.",

"Not great, but not the worst either." ]

print("\n📢 Customer Sentiment Analysis Results:")


for review in customer_reviews:

analyze_sentiment(review)

PROGRAM 7

!pip install transformers


transformers import pipeline

print("🔍 Loading Summarization Model (BART)...")

summarizer = pipeline("summarization", model="facebook/bart-

large-cnn")

def summarize_text(text, max_length=None, in_length=None):

"""

Summarizes a given long text using a pre-trained BART summarization model.

Args:

text (str): The input passage to summarize.

max_length (int): Maximum length of the summary (default: auto-

calculated).

min_length (int): Minimum length of the summary (default: auto-

calculated).

Returns:

str: The summarized text.

"""

text = " ".join(text.split())

if not max_length:

max_length = min(len(text) // 3, 150)


if not min_length:

min_length = max(30, max_length // 3)

summary = summarizer(text, max_length=max_length, min_length=min_length,


do_sample=True, temperature=0.9, repetition_penalty=1.2)
summary_1 = summarizer(text, max_length=150, min_length=30, do_sample=False)

summary_2 = summarizer(text, max_length=150, min_length=30, do_sample=True,


temperature=0.9)
summary_3 = summarizer(text, max_length=150, min_length=30, do_sample=False,
num_beams=5)

summary_4 = summarizer(text, max_length=150, min_length=30, do_sample=True,


top_k=50, top_p=0.95)

print("\n Original Text:")

print(text) print("\

nSummarized Text:")

print("Default:",summary_1[0]['summary_text'])

print("High randomness:", summary_2[0]

['summary_text'])

print("Conservative:",summary_3[0]['summary_text'])

print("Diverse sampling:", summary_4[0]

['summary_text'])

long_text = """
Your own sentences (upto 10)
"""
summarize_text(long_text)
Program 8

import warnings
warnings.filterwarnings("ignore")
import gdown
from langchain.llms import Cohere
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

gdown.download("https://drive.google.com/file/d/
1oXMhn1dhYl6aYZWlp2ywbjFdfXfmgpl9/view?usp=sharing",
"document.txt", quiet=False)

with open('document.txt', 'r', encoding='utf-8') as f: text_data = f.read()


print(text_data[:500])
text_chunk = text_data[:5000]

llm =
Cohere(cohere_api_key="BroovbPDXL4Qf8BlI9GiCtCJQIpmi1N4e9vzDhUg")
question = input("Ask your question: ")

template = """
Use the following document to answer the question.
Document:
{text}
Question: {question}
Answer:
"""
prompt = PromptTemplate(input_variables=["text", "question"],
template=template)

chain = LLMChain(llm=llm, prompt=prompt)

output = chain.run(text=text_chunk, question=question)


print(output)
Program 9

import warnings
warnings.filterwarnings("ignore")
from pydantic import BaseModel
from langchain.llms import Cohere
import wikipedia
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.output_parsers import PydanticOutputParser

class InstitutionInfo(BaseModel):
founder: str
founded_year: str
branches: str
employees: str
summary: str

C:\Users\DELL\AppData\Local\Temp\ipykernel_2252\702904228.py:1:
LangChainDeprecationWarning: The class `Cohere` was deprecated in
LangChain 0.1.14 and will be removed in 1.0. An updated version of the class
exists in the :class:`~langchain-cohere package and should be used instead. To
use it run `pip install -U :class:`~langchain-cohere` and import as
`from :class:`~langchain_cohere import Cohere``.
llm =
Cohere(cohere_api_key="BroovbPDXL4Qf8BlI9GiCtCJQIpmi1N4e9vzDh
Ug")

institution = input("Enter Institution Name: ")


page_content = wikipedia.page(institution).content

page_content = wikipedia.page(institution).content
page_content = page_content[:2000]

parser = PydanticOutputParser(pydantic_object=InstitutionInfo)

template = """
Extract the following details from the institution description:
- Founder - Year Founded – Branches - Number of Employees - A
brief 4-line summary
{format_instructions}

Institution Description:
{text}
"""

prompt = PromptTemplate( input_variables=["text"],


template="Extract structured information about the institution below:\n\
n{text}\n\n{format_instructions}",
partial_variables={"format_instructions":
parser.get_format_instructions()}, )

chain = LLMChain(llm=llm, prompt=prompt)


result = chain.run(text=page_content)
parsed_result = parser.parse(result)
print(parsed_result)

print("Founder:", parsed_result.founder)
print("\nFounded Year:", parsed_result.founded_year)
print("\nBranches:", parsed_result.branches)
print("\nEmployees:", parsed_result.employees)
print("\nSummary:", parsed_result.summary)
Program 10

import warnings
warnings.filterwarnings("ignore")
import time
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain_cohere import CohereEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import Cohere
from langchain.document_loaders import PyPDFLoader
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

loader = PyPDFLoader("THE-INDIAN-PENAL-CODE-1860.pdf")
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=300,
chunk_overlap=50)
chunks = splitter.split_documents(docs)
chunks[:5]
from langchain.embeddings import HuggingFaceEmbeddings
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(chunks, embedding)

llm= Cohere(cohere_api_key="6ClgZVIwHaiQcMRpTfiPFgXKZuhW
Q5zQhvheiyhT")
memory = ConversationBufferMemory(memory_key="chat_history",
return_messages=True)
qa_chain = ConversationalRetrievalChain.from_llm(llm=llm,
retriever=vectorstore.as_retriever(), memory=memory)
while True:
query = input("Ask about IPC: ")
if query.lower() in ["exit", "quit"]:
break
result = qa_chain.run(query)
print()
print(result)
print("\n" + "-" * 100 + "\n")

You might also like