add convert.py
This commit is contained in:
parent
636f0aa123
commit
139e3476a0
|
|
@ -0,0 +1,118 @@
|
|||
import mammoth
|
||||
from weasyprint import HTML, CSS
|
||||
import os
|
||||
import sys
|
||||
|
||||
def convert_docx_to_pdf(input_docx, output_pdf):
|
||||
"""
|
||||
Konvertiert eine DOCX-Datei über HTML in ein PDF unter Verwendung von
|
||||
Mammoth für die Struktur und WeasyPrint für das PDF-Rendering.
|
||||
"""
|
||||
|
||||
# Sicherstellen, dass der Pfad absolut oder korrekt relativ ist
|
||||
input_path = os.path.abspath(input_docx)
|
||||
output_path = os.path.abspath(output_pdf)
|
||||
|
||||
if not os.path.exists(input_path):
|
||||
print(f"Fehler: '{input_path}' wurde nicht gefunden.")
|
||||
return
|
||||
|
||||
print(f"Lese Inhalt aus: {os.path.basename(input_path)}...")
|
||||
|
||||
try:
|
||||
# 1. DOCX zu HTML konvertieren
|
||||
with open(input_path, "rb") as docx_file:
|
||||
# Mammoth extrahiert die semantische Struktur (h1, p, table, etc.)
|
||||
result = mammoth.convert_to_html(docx_file)
|
||||
html_content = result.value
|
||||
|
||||
# Warnungen ausgeben (z.B. nicht unterstützte Word-Styles)
|
||||
for message in result.messages:
|
||||
print(f"Mammoth Hinweis: {message.message}")
|
||||
|
||||
print("Generiere PDF mit WeasyPrint...")
|
||||
|
||||
# 2. Modernes CSS-Layout definieren
|
||||
# Hier kannst du das Design zentral steuern
|
||||
styled_html = f"""
|
||||
<!DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<style>
|
||||
@page {{
|
||||
size: A4;
|
||||
margin: 2.5cm;
|
||||
@bottom-right {{
|
||||
content: "Seite " counter(page) " von " counter(pages);
|
||||
font-size: 9pt;
|
||||
color: #666;
|
||||
}}
|
||||
}}
|
||||
body {{
|
||||
font-family: 'Segoe UI', Roboto, Helvetica, Arial, sans-serif;
|
||||
line-height: 1.6;
|
||||
color: #333;
|
||||
text-align: justify;
|
||||
}}
|
||||
h1, h2, h3 {{
|
||||
color: #004488;
|
||||
line-height: 1.2;
|
||||
}}
|
||||
h1 {{ border-bottom: 2px solid #004488; padding-bottom: 10px; }}
|
||||
table {{
|
||||
border-collapse: collapse;
|
||||
width: 100%;
|
||||
margin: 20px 0;
|
||||
}}
|
||||
th, td {{
|
||||
border: 1px solid #ccc;
|
||||
padding: 10px;
|
||||
text-align: left;
|
||||
}}
|
||||
th {{
|
||||
background-color: #f2f2f2;
|
||||
font-weight: bold;
|
||||
}}
|
||||
img {{
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
display: block;
|
||||
margin: 20px auto;
|
||||
}}
|
||||
code {{
|
||||
background: #f4f4f4;
|
||||
padding: 2px 4px;
|
||||
border-radius: 4px;
|
||||
}}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
{html_content}
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
# 3. PDF-Erstellung durchführen
|
||||
HTML(string=styled_html).write_pdf(output_path)
|
||||
print(f"Erfolg! PDF gespeichert unter: {output_path}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Ein Fehler ist aufgetreten: {str(e)}")
|
||||
if "Cairo" in str(e) or "Pango" in str(e):
|
||||
print("\nHINWEIS: WeasyPrint benötigt externe Bibliotheken.")
|
||||
print("Windows: Installiere 'GTK for Windows Runtime'.")
|
||||
print("macOS: 'brew install pango'.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Standard-Pfade (können beim Aufruf angepasst werden)
|
||||
# Wenn du das Skript aus src/dlw/ startest, sucht es im selben Ordner.
|
||||
test_input = "test_dokument.docx"
|
||||
test_output = "test_ergebnis.pdf"
|
||||
|
||||
# Ermöglicht Aufruf via: python convert.py input.docx output.pdf
|
||||
if len(sys.argv) > 2:
|
||||
test_input = sys.argv[1]
|
||||
test_output = sys.argv[2]
|
||||
|
||||
convert_docx_to_pdf(test_input, test_output)
|
||||
Loading…
Reference in New Issue