from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain import document_loaders
def load_pdf_file_langchain_unstructed(content_path):
loader = loader = UnstructuredPDFLoader(content_path, mode="elements")
data = loader.load()
for page in data:
print('-------------------')
print('content')
print(page.page_content)
print('metadata')
print(page.metadata)
return data
def load_pdf_file_pypdf(content_path):
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader(content_path)
pages = loader.load_and_split()
for page in pages:
print('-------------------')
print('content')
print(page.page_content)
print('metadata')
print(page.metadata)
return pages
def load_pdf_file_MathPix(content_path):
from langchain_community.document_loaders import MathpixPDFLoader
loader = MathpixPDFLoader(content_path)
data = loader.load()
for page in data:
print('-------------------')
print