40 lines
1.1 KiB
Python
40 lines
1.1 KiB
Python
import os
|
|
import pypdf
|
|
import re
|
|
|
|
pdf_path = os.path.join(os.path.dirname(__file__), "Modbus储能-组串-微逆宁波德业V118-1.pdf")
|
|
|
|
def extract_text_from_pdf(pdf_path):
|
|
reader = pypdf.PdfReader(pdf_path)
|
|
text = ""
|
|
for page in reader.pages:
|
|
text += page.extract_text() + "\n"
|
|
return text
|
|
|
|
full_text = extract_text_from_pdf(pdf_path)
|
|
|
|
# Registers to look for
|
|
registers = [94, 245, 320]
|
|
|
|
print("Searching for registers...")
|
|
|
|
# Split text into lines for easier processing
|
|
lines = full_text.split('\n')
|
|
|
|
found_registers = {}
|
|
|
|
with open("pdf_output.txt", "w", encoding="utf-8") as f:
|
|
for i, line in enumerate(lines):
|
|
for reg in registers:
|
|
if re.search(r'\b' + str(reg) + r'\b', line):
|
|
f.write(f"MATCH {reg}: {line.strip()}\n")
|
|
# Write context
|
|
start = max(0, i - 5)
|
|
end = min(len(lines), i + 6)
|
|
for j in range(start, end):
|
|
if i != j:
|
|
f.write(f" CTX: {lines[j].strip()}\n")
|
|
f.write("-" * 20 + "\n")
|
|
|
|
print("Done. Check pdf_output.txt")
|