feat: Introduce PDF register extraction utility with a sample Deye Modbus manual and update README with comprehensive project details.
This commit is contained in:
BIN
docs/Modbus储能-组串-微逆宁波德业V118-1.pdf
Normal file
BIN
docs/Modbus储能-组串-微逆宁波德业V118-1.pdf
Normal file
Binary file not shown.
39
docs/extract_pdf.py
Normal file
39
docs/extract_pdf.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import os
|
||||
import pypdf
|
||||
import re
|
||||
|
||||
pdf_path = os.path.join(os.path.dirname(__file__), "Modbus储能-组串-微逆宁波德业V118-1.pdf")
|
||||
|
||||
def extract_text_from_pdf(pdf_path):
|
||||
reader = pypdf.PdfReader(pdf_path)
|
||||
text = ""
|
||||
for page in reader.pages:
|
||||
text += page.extract_text() + "\n"
|
||||
return text
|
||||
|
||||
full_text = extract_text_from_pdf(pdf_path)
|
||||
|
||||
# Registers to look for
|
||||
registers = [94, 245, 320]
|
||||
|
||||
print("Searching for registers...")
|
||||
|
||||
# Split text into lines for easier processing
|
||||
lines = full_text.split('\n')
|
||||
|
||||
found_registers = {}
|
||||
|
||||
with open("pdf_output.txt", "w", encoding="utf-8") as f:
|
||||
for i, line in enumerate(lines):
|
||||
for reg in registers:
|
||||
if re.search(r'\b' + str(reg) + r'\b', line):
|
||||
f.write(f"MATCH {reg}: {line.strip()}\n")
|
||||
# Write context
|
||||
start = max(0, i - 5)
|
||||
end = min(len(lines), i + 6)
|
||||
for j in range(start, end):
|
||||
if i != j:
|
||||
f.write(f" CTX: {lines[j].strip()}\n")
|
||||
f.write("-" * 20 + "\n")
|
||||
|
||||
print("Done. Check pdf_output.txt")
|
||||
Reference in New Issue
Block a user