Gen AI Micro
Gen AI Micro
import numpy as np
similar_words = word_vectors.similar_by_vector(result_vector,
topn=10)
input_words = {word1, word2, word3}
filtered_words = [(word, similarity) for word, similarity in
similar_words if word not in input_words]
except KeyError as e:
print(f"Error: {e} not found in the vocabulary.")
analyze_similarity("cat", "dog")
analyze_similarity("computer", "keyboard")
analyze_similarity("music", "art")
def find_most_similar(word):
try:
similar_words = word_vectors.most_similar(word, topn=5)
print(f"\nMost similar words to '{word}':")
for similar_word, similarity in similar_words:
print(f"{similar_word}: {similarity:.4f}")
except KeyError as e:
print(f"Error: {e} not found in the vocabulary.")
find_most_similar("happy")
find_most_similar("sad")
find_most_similar("technology")
Program 2
input_word = 'computer'
if input_word in word_vectors.key_to_index:
similar_words = word_vectors.most_similar(input_word, topn=5)
print(f"5 words similar to '{input_word}':")
for word, similarity in similar_words:
print(f"{word} (similarity: {similarity:.2f})")
else:
print(f"'{input_word}' is not in the vocabulary.")
Program 3
words = list(model.wv.index_to_key)
embeddings = np.array([model.wv[word] for word in words])
plt.figure(figsize=(10, 8))
plt.scatter(tsne_result[:, 0], tsne_result[:, 1], color="blue")
for i, word in enumerate(words):
plt.text(tsne_result[i, 0] + 0.02, tsne_result[i, 1] + 0.02, word,
fontsize=12)
plt.title("Word Embeddings Visualization (Medical
Domain)")
plt.xlabel("Dimension 1")
plt.ylabel("Dimension 2")
plt.grid(True)
plt.show()
find_similar_words("treatment")
find_similar_words("vaccine")
Program 4
word_embeddings = {
def find_similar_words(word):
if word in word_embeddings:
return word_embeddings[word]
else: return
def enrich_prompt(prompt):
similar_words = find_similar_words(word)
if similar_words:
enriched_words.append(f"{word} ({',
'.join(similar_words)})")
else:
".join(enriched_words)
enriched_prompt = enrich_prompt(original_prompt)
print("Original Prompt:")
print(original_prompt)
print("\nEnriched Prompt:")
print(enriched_prompt)
PROGRAM 5
import gensim.downloader
as api import random
import nltk
from nltk.tokenize import sent_tokenize
nltk.download('punkt')
print("Loading pre-trained word
vectors.")
word_vectors = api.load("glove-wiki-gigaword-100")
print("Word vectors loaded successfully!")
def generate_paragraph(seed_word):
"""Construct a creative paragraph using the seed word and similar words."""
similar_words = get_similar_words(seed_word, top_n=5)
if not similar_words:
return "Could not generate a paragraph. Try another seed word."
sentiment_analyzer = pipeline("sentiment-analysis")
def analyze_sentiment(text):
"""
"""
result = sentiment_analyzer(text)[0]
label = result['label']
score = result['score']
return result
customer_reviews = [
analyze_sentiment(review)
PROGRAM 7
large-cnn")
"""
Args:
calculated).
calculated).
Returns:
"""
if not max_length:
print(text) print("\
nSummarized Text:")
print("Default:",summary_1[0]['summary_text'])
['summary_text'])
print("Conservative:",summary_3[0]['summary_text'])
['summary_text'])
long_text = """
Your own sentences (upto 10)
"""
summarize_text(long_text)
Program 8
import warnings
warnings.filterwarnings("ignore")
import gdown
from langchain.llms import Cohere
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
gdown.download("https://drive.google.com/file/d/
1oXMhn1dhYl6aYZWlp2ywbjFdfXfmgpl9/view?usp=sharing",
"document.txt", quiet=False)
llm =
Cohere(cohere_api_key="BroovbPDXL4Qf8BlI9GiCtCJQIpmi1N4e9vzDhUg")
question = input("Ask your question: ")
template = """
Use the following document to answer the question.
Document:
{text}
Question: {question}
Answer:
"""
prompt = PromptTemplate(input_variables=["text", "question"],
template=template)
import warnings
warnings.filterwarnings("ignore")
from pydantic import BaseModel
from langchain.llms import Cohere
import wikipedia
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.output_parsers import PydanticOutputParser
class InstitutionInfo(BaseModel):
founder: str
founded_year: str
branches: str
employees: str
summary: str
C:\Users\DELL\AppData\Local\Temp\ipykernel_2252\702904228.py:1:
LangChainDeprecationWarning: The class `Cohere` was deprecated in
LangChain 0.1.14 and will be removed in 1.0. An updated version of the class
exists in the :class:`~langchain-cohere package and should be used instead. To
use it run `pip install -U :class:`~langchain-cohere` and import as
`from :class:`~langchain_cohere import Cohere``.
llm =
Cohere(cohere_api_key="BroovbPDXL4Qf8BlI9GiCtCJQIpmi1N4e9vzDh
Ug")
page_content = wikipedia.page(institution).content
page_content = page_content[:2000]
parser = PydanticOutputParser(pydantic_object=InstitutionInfo)
template = """
Extract the following details from the institution description:
- Founder - Year Founded – Branches - Number of Employees - A
brief 4-line summary
{format_instructions}
Institution Description:
{text}
"""
print("Founder:", parsed_result.founder)
print("\nFounded Year:", parsed_result.founded_year)
print("\nBranches:", parsed_result.branches)
print("\nEmployees:", parsed_result.employees)
print("\nSummary:", parsed_result.summary)
Program 10
import warnings
warnings.filterwarnings("ignore")
import time
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain_cohere import CohereEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import Cohere
from langchain.document_loaders import PyPDFLoader
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
loader = PyPDFLoader("THE-INDIAN-PENAL-CODE-1860.pdf")
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=300,
chunk_overlap=50)
chunks = splitter.split_documents(docs)
chunks[:5]
from langchain.embeddings import HuggingFaceEmbeddings
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(chunks, embedding)
llm= Cohere(cohere_api_key="6ClgZVIwHaiQcMRpTfiPFgXKZuhW
Q5zQhvheiyhT")
memory = ConversationBufferMemory(memory_key="chat_history",
return_messages=True)
qa_chain = ConversationalRetrievalChain.from_llm(llm=llm,
retriever=vectorstore.as_retriever(), memory=memory)
while True:
query = input("Ask about IPC: ")
if query.lower() in ["exit", "quit"]:
break
result = qa_chain.run(query)
print()
print(result)
print("\n" + "-" * 100 + "\n")