janus.utils.pdf_docs_reader# Attributes# log Classes# PDFDocsReader Module Contents# janus.utils.pdf_docs_reader.log# class janus.utils.pdf_docs_reader.PDFDocsReader(language, chunk_size=1000, chunk_overlap=100, start_page=None, end_page=None, vectorizer=TfidfVectorizer())# Parameters: language (str) – chunk_size (int) – chunk_overlap (int) – start_page (Optional[int]) – end_page (Optional[int]) – vectorizer (sklearn.feature_extraction.text.CountVectorizer) – load_and_chunk_pdf()# Return type: List[str] vectorize_documents()# Return type: (sklearn.feature_extraction.text.TfidfVectorizer, any) search_language_reference(query, top_k=1, min_similarity=0.1)# Searches through the vectorized PDF for the query using tf-idf and returns a list of langchain Documents. Parameters: query (List[str]) – top_k (int) – min_similarity (float) – Return type: List[langchain_core.documents.Document]