import os import pypdf import re pdf_path = os.path.join(os.path.dirname(__file__), "Modbus储能-组串-微逆宁波德业V118-1.pdf") def extract_text_from_pdf(pdf_path): reader = pypdf.PdfReader(pdf_path) text = "" for page in reader.pages: text += page.extract_text() + "\n" return text full_text = extract_text_from_pdf(pdf_path) # Registers to look for registers = [94, 245, 320] print("Searching for registers...") # Split text into lines for easier processing lines = full_text.split('\n') found_registers = {} with open("pdf_output.txt", "w", encoding="utf-8") as f: for i, line in enumerate(lines): for reg in registers: if re.search(r'\b' + str(reg) + r'\b', line): f.write(f"MATCH {reg}: {line.strip()}\n") # Write context start = max(0, i - 5) end = min(len(lines), i + 6) for j in range(start, end): if i != j: f.write(f" CTX: {lines[j].strip()}\n") f.write("-" * 20 + "\n") print("Done. Check pdf_output.txt")