feat: Introduce PDF register extraction utility with a sample Deye Modbus manual and update README with comprehensive project details.

This commit is contained in:
Vladyslav Doloman
2025-11-22 23:28:57 +02:00
parent 0b377db6c2
commit 9f96a3e534
3 changed files with 126 additions and 4 deletions

39
docs/extract_pdf.py Normal file
View File

@@ -0,0 +1,39 @@
import os
import pypdf
import re
pdf_path = os.path.join(os.path.dirname(__file__), "Modbus储能-组串-微逆宁波德业V118-1.pdf")
def extract_text_from_pdf(pdf_path):
reader = pypdf.PdfReader(pdf_path)
text = ""
for page in reader.pages:
text += page.extract_text() + "\n"
return text
full_text = extract_text_from_pdf(pdf_path)
# Registers to look for
registers = [94, 245, 320]
print("Searching for registers...")
# Split text into lines for easier processing
lines = full_text.split('\n')
found_registers = {}
with open("pdf_output.txt", "w", encoding="utf-8") as f:
for i, line in enumerate(lines):
for reg in registers:
if re.search(r'\b' + str(reg) + r'\b', line):
f.write(f"MATCH {reg}: {line.strip()}\n")
# Write context
start = max(0, i - 5)
end = min(len(lines), i + 6)
for j in range(start, end):
if i != j:
f.write(f" CTX: {lines[j].strip()}\n")
f.write("-" * 20 + "\n")
print("Done. Check pdf_output.txt")