feat: Introduce PDF register extraction utility with a sample Deye Modbus manual and update README with comprehensive project details.

2025-11-22 23:28:57 +02:00
parent 0b377db6c2
commit 9f96a3e534
3 changed files with 126 additions and 4 deletions
--- a/docs/extract_pdf.py
+++ b/docs/extract_pdf.py
@@ -0,0 +1,39 @@
+import os
+import pypdf
+import re
+
+pdf_path = os.path.join(os.path.dirname(__file__), "Modbus储能-组串-微逆宁波德业V118-1.pdf")
+
+def extract_text_from_pdf(pdf_path):
+    reader = pypdf.PdfReader(pdf_path)
+    text = ""
+    for page in reader.pages:
+        text += page.extract_text() + "\n"
+    return text
+
+full_text = extract_text_from_pdf(pdf_path)
+
+# Registers to look for
+registers = [94, 245, 320]
+
+print("Searching for registers...")
+
+# Split text into lines for easier processing
+lines = full_text.split('\n')
+
+found_registers = {}
+
+with open("pdf_output.txt", "w", encoding="utf-8") as f:
+    for i, line in enumerate(lines):
+        for reg in registers:
+            if re.search(r'\b' + str(reg) + r'\b', line):
+                f.write(f"MATCH {reg}: {line.strip()}\n")
+                # Write context
+                start = max(0, i - 5)
+                end = min(len(lines), i + 6)
+                for j in range(start, end):
+                    if i != j:
+                        f.write(f"   CTX: {lines[j].strip()}\n")
+                f.write("-" * 20 + "\n")
+
+print("Done. Check pdf_output.txt")