Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 3096fa3

Browse files
committed
feat: ja alias generation
1 parent c5c9c01 commit 3096fa3

4 files changed

Lines changed: 263 additions & 0 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@
33
.envrc
44
.dir-locals.el
55
result
6+
__pycache__

CONFIGURATION.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,25 @@ And an `alias` key.
5353
]
5454
```
5555

56+
### Alias generation
57+
58+
Aliases for libraries with mostly Japanese albums/tracks may be
59+
generated with the included python script.
60+
61+
```bash
62+
# reqs: [python-mpd2, fugashi[unidic-lite], pykakasi]
63+
python3 -m pip install -r scripts/ja/requirements.txt
64+
python3 scripts/ja/generate.py
65+
```
66+
67+
- Be aware that unidic-lite takes ~250mb.
68+
- Romanization style defaults to `kunrei`; `--style
69+
hepburn` is also supported, but is probably suboptimal for searching
70+
if you know Japanese.
71+
- Use `--overwrite` to prefer newly generated aliases over existing
72+
entries.
73+
74+
PRs are welcome for alias generation scripts for other languages.
5675
## Keybindings
5776

5877
### Keybinding sets

scripts/ja/generate.py

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
#!/usr/bin/env python3
2+
"""Generate inori aliases.json entries from an MPD library.
3+
4+
Requires:
5+
- python-mpd2
6+
- fugashi
7+
- unidic-lite (or unidic)
8+
- pykakasi
9+
"""
10+
11+
from __future__ import annotations
12+
13+
import argparse
14+
import json
15+
import os
16+
import re
17+
import sys
18+
import unicodedata
19+
from collections.abc import Mapping
20+
from pathlib import Path
21+
from typing import Any, Literal, TypeGuard
22+
23+
from fugashi import Tagger
24+
from mpd import MPDClient
25+
from pykakasi import kakasi
26+
27+
JP_RE = re.compile(r"[\u3040-\u30ff\u3400-\u4dbf\u4e00-\u9fff]")
28+
SPACE_RE = re.compile(r"\s+")
29+
NON_ASCII_RE = re.compile(r"[^a-z0-9 ]+")
30+
31+
PathAliases = dict[str, str]
32+
AlbumAliases = dict[str, str]
33+
RomanizationStyle = Literal["hepburn", "kunrei", "passport"]
34+
35+
36+
def contains_japanese(text: str) -> bool:
37+
return JP_RE.search(text) is not None
38+
39+
40+
def normalize_ascii(text: str) -> str:
41+
text = unicodedata.normalize("NFKC", text).lower()
42+
text = NON_ASCII_RE.sub(" ", text)
43+
return SPACE_RE.sub(" ", text).strip()
44+
45+
46+
def token_reading(token: Any) -> str:
47+
feat = getattr(token, "feature", None)
48+
for attr in ("kana", "pron", "pronBase", "lemma"):
49+
val = getattr(feat, attr, None) if feat is not None else None
50+
if isinstance(val, str) and val != "*":
51+
return val
52+
return token.surface
53+
54+
55+
def romanize_japanese(
56+
text: str, tagger: Tagger, kks: Any, style: RomanizationStyle
57+
) -> str:
58+
readings = [token_reading(tok) for tok in tagger(text)]
59+
src = " ".join(readings) if readings else text
60+
converted = " ".join(item[style] for item in kks.convert(src))
61+
return normalize_ascii(converted)
62+
63+
64+
def is_obj_mapping(value: object) -> TypeGuard[Mapping[str, object]]:
65+
return isinstance(value, Mapping) and all(
66+
isinstance(k, str) for k in value.keys()
67+
)
68+
69+
70+
def first_str(value: object) -> str | None:
71+
if isinstance(value, str):
72+
return value
73+
if isinstance(value, list) and value and isinstance(value[0], str):
74+
return value[0]
75+
return None
76+
77+
78+
def parse_existing_entries(path: Path) -> tuple[PathAliases, AlbumAliases]:
79+
if not path.exists():
80+
return {}, {}
81+
raw = json.loads(path.read_text(encoding="utf-8"))
82+
if not isinstance(raw, list):
83+
raise ValueError(f"{path} must be a JSON array")
84+
out_path: PathAliases = {}
85+
out_album: AlbumAliases = {}
86+
for i, entry in enumerate(raw):
87+
if not is_obj_mapping(entry):
88+
raise ValueError(f"{path} entry {i} must be an object")
89+
alias = first_str(entry.get("alias"))
90+
if not isinstance(alias, str):
91+
raise ValueError(f"{path} entry {i} must include string key 'alias'")
92+
p = first_str(entry.get("path"))
93+
a = first_str(entry.get("album"))
94+
if p is not None and a is None:
95+
out_path[p] = alias
96+
elif a is not None and p is None:
97+
out_album[a] = alias
98+
else:
99+
raise ValueError(
100+
f"{path} entry {i} must include exactly one of 'path' or 'album'"
101+
)
102+
return out_path, out_album
103+
104+
105+
def get_tag(song: Mapping[str, object], key: str) -> str | None:
106+
candidates = (key, key.lower(), key.upper(), key.title())
107+
for cand in candidates:
108+
val = first_str(song.get(cand))
109+
if val is not None:
110+
return val
111+
return None
112+
113+
114+
def merge_aliases(
115+
old_path: PathAliases,
116+
old_album: AlbumAliases,
117+
new_path: PathAliases,
118+
new_album: AlbumAliases,
119+
overwrite: bool,
120+
) -> tuple[PathAliases, AlbumAliases]:
121+
if overwrite:
122+
merged_path = dict(old_path)
123+
merged_album = dict(old_album)
124+
merged_path.update(new_path)
125+
merged_album.update(new_album)
126+
else:
127+
merged_path = dict(new_path)
128+
merged_album = dict(new_album)
129+
merged_path.update(old_path)
130+
merged_album.update(old_album)
131+
return merged_path, merged_album
132+
133+
134+
def serialize_entries(
135+
path_aliases: PathAliases, album_aliases: AlbumAliases
136+
) -> list[dict[str, str]]:
137+
entries: list[dict[str, str]] = []
138+
for p in sorted(path_aliases):
139+
entries.append({"path": p, "alias": path_aliases[p]})
140+
for a in sorted(album_aliases):
141+
entries.append({"album": a, "alias": album_aliases[a]})
142+
return entries
143+
144+
145+
def default_alias_file() -> Path:
146+
xdg = os.environ.get("XDG_CONFIG_HOME")
147+
if xdg:
148+
return Path(xdg) / "inori" / "aliases.json"
149+
return Path.home() / ".config" / "inori" / "aliases.json"
150+
151+
152+
def build_parser() -> argparse.ArgumentParser:
153+
p = argparse.ArgumentParser(
154+
description="Generate inori aliases.json from MPD metadata."
155+
)
156+
p.add_argument("--host", default=os.environ.get("MPD_HOST", "localhost"))
157+
p.add_argument("--port", type=int, default=int(os.environ.get("MPD_PORT", "6600")))
158+
p.add_argument("--password", default=os.environ.get("MPD_PASSWORD"))
159+
p.add_argument("--output", type=Path, default=default_alias_file())
160+
p.add_argument(
161+
"--style",
162+
choices=("hepburn", "kunrei", "passport"),
163+
default="kunrei",
164+
help="Romanization style (default: kunrei).",
165+
)
166+
p.add_argument(
167+
"--overwrite",
168+
action="store_true",
169+
help="Prefer newly generated aliases over existing aliases on conflicts.",
170+
)
171+
return p
172+
173+
174+
def main() -> int:
175+
args = build_parser().parse_args()
176+
style: RomanizationStyle = args.style
177+
178+
tagger = Tagger()
179+
kks = kakasi()
180+
181+
client = MPDClient()
182+
client.timeout = 20
183+
client.idletimeout = None
184+
client.connect(args.host, args.port)
185+
if args.password:
186+
client.password(args.password)
187+
songs = client.listallinfo()
188+
client.close()
189+
client.disconnect()
190+
191+
new_path: PathAliases = {}
192+
new_album: AlbumAliases = {}
193+
for song in songs:
194+
if not is_obj_mapping(song):
195+
continue
196+
path = get_tag(song, "file")
197+
if not path:
198+
continue
199+
title = get_tag(song, "Title")
200+
album = get_tag(song, "Album")
201+
202+
if title and contains_japanese(title):
203+
alias = romanize_japanese(title, tagger, kks, style)
204+
if alias:
205+
new_path[path] = alias
206+
207+
if album and contains_japanese(album):
208+
alias = romanize_japanese(album, tagger, kks, style)
209+
if alias and album not in new_album:
210+
new_album[album] = alias
211+
212+
old_path, old_album = parse_existing_entries(args.output)
213+
merged_path, merged_album = merge_aliases(
214+
old_path, old_album, new_path, new_album, args.overwrite
215+
)
216+
217+
args.output.parent.mkdir(parents=True, exist_ok=True)
218+
entries = serialize_entries(merged_path, merged_album)
219+
args.output.write_text(
220+
json.dumps(entries, ensure_ascii=False, indent=2) + "\n",
221+
encoding="utf-8",
222+
)
223+
224+
print(
225+
"generated:"
226+
f" titles={len(new_path)} albums={len(new_album)} |"
227+
f" merged totals: paths={len(merged_path)} albums={len(merged_album)}"
228+
)
229+
return 0
230+
231+
232+
if __name__ == "__main__":
233+
try:
234+
raise SystemExit(main())
235+
except KeyboardInterrupt:
236+
raise SystemExit(130)
237+
except Exception as e:
238+
print(f"error: {e}", file=sys.stderr)
239+
raise SystemExit(1)

scripts/ja/requirements.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
fugashi>=1.3.2
2+
unidic-lite>=1.0.8
3+
pykakasi>=2.3.0
4+
python-mpd2>=3.1.1

0 commit comments

Comments
 (0)