-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpipeline.py
More file actions
94 lines (75 loc) · 3.23 KB
/
Copy pathpipeline.py
File metadata and controls
94 lines (75 loc) · 3.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import json
import os
from language_model import get_definition, get_relevance, normalize_entry
try:
from translate import translate_text as _translate_api
_API_AVAILABLE = True
except ImportError:
_API_AVAILABLE = False
def translate(text, target_lang="en"):
"""
Uebersetzt Text via Claude API (translate.py).
Erfordert: pip install anthropic && ANTHROPIC_API_KEY gesetzt.
Gibt leeren String zurück falls API nicht verfügbar.
"""
if not text:
return ""
if _API_AVAILABLE:
return _translate_api(text, target_lang)
return ""
def main():
print("Starte WikiStub-Seed Pipeline...")
base_path = os.path.dirname(os.path.abspath(__file__))
json_path = os.path.join(base_path, "wikistub_seed.json")
if not os.path.exists(json_path):
print(f"FEHLER: {json_path} nicht gefunden.")
return
# JSON laden
try:
with open(json_path, "r", encoding="utf-8") as f:
data = json.load(f)
except json.JSONDecodeError as e:
print(f"FEHLER beim Lesen von {json_path}: {e}")
return
# Verarbeitung
root_key = "MetaWiki"
if root_key not in data:
print(f"FEHLER: Root-Key '{root_key}' fehlt in JSON.")
return
count_processed = 0
for category, subcats in data[root_key].items():
print(f"Verarbeite Kategorie: {category}")
for subcat, stubs in subcats.items():
for stub in stubs:
title = stub.get("title", "Unbenannt")
print(f" - Generiere Stub: {title}")
# Auto-Übersetzung (Simulation)
stub.update(normalize_entry(stub))
if not get_definition(stub, "en"):
translated = translate(get_definition(stub, "de"))
stub["definition_en"] = translated
stub.setdefault("definitions", {})["en"] = translated
# Tags verarbeiten
tags_list = stub.get("tags", [])
tags_str = ', '.join(tags_list)
# Ordnerstruktur
# HINWEIS: Wir nutzen hier 'output' als Basis, um nichts zu ueberschreiben.
# Spaeter kann dies auf '.' geaendet werden.
folder = os.path.join(base_path, "output", category, subcat)
os.makedirs(folder, exist_ok=True)
# Markdown generieren
safe_title = title.replace(' ', '_')
filename = f"{folder}/{safe_title}.md"
try:
with open(filename, "w", encoding="utf-8") as md:
md.write(f"# {title}\n\n")
md.write(f"**Definition (DE):** {get_definition(stub, 'de')}\n\n")
md.write(f"**Definition (EN):** {get_definition(stub, 'en')}\n\n")
md.write(f"**Relevanz:** {get_relevance(stub, 'de')}\n\n")
md.write(f"**Tags:** {tags_str}\n")
count_processed += 1
except IOError as e:
print(f" FEHLER beim Schreiben von {filename}: {e}")
print(f"\nFertig! {count_processed} Stubs generiert.")
if __name__ == "__main__":
main()