summaryrefslogtreecommitdiff
path: root/ar/.local
diff options
context:
space:
mode:
authorTheSiahxyz <164138827+TheSiahxyz@users.noreply.github.com>2026-07-01 17:20:12 +0900
committerTheSiahxyz <164138827+TheSiahxyz@users.noreply.github.com>2026-07-01 17:20:12 +0900
commite89331bca279131def3ed40e72bff6bb1dc290aa (patch)
tree4ebf1d8b5b8a86c1d6fd9ad93b6e0b6f3ad58b34 /ar/.local
parentc3d23d3a4c77cb221aeb789ef92ecfee7fb45cbf (diff)
fix(qndl-artist): group bilingual (Hangul+Latin, no-paren) folders via script-split tokens
Diffstat (limited to 'ar/.local')
-rwxr-xr-xar/.local/bin/qndl-artist15
-rw-r--r--ar/.local/bin/tests/test-qndl-artist.sh13
2 files changed, 26 insertions, 2 deletions
diff --git a/ar/.local/bin/qndl-artist b/ar/.local/bin/qndl-artist
index 56dff45..ad68cbd 100755
--- a/ar/.local/bin/qndl-artist
+++ b/ar/.local/bin/qndl-artist
@@ -101,15 +101,26 @@ _group_awk() {
function find(a){ while(parent[a]!=a){ parent[a]=parent[parent[a]]; a=parent[a] } return a }
function union(a,b, ra,rb){ ra=find(a); rb=find(b); if(ra!=rb) parent[rb]=ra }
function addtok(idx,chunk, k){ k=norm(chunk); if(k=="") return; if(k in owner) union(owner[k],idx); else owner[k]=idx }
+ # For a chunk mixing Hangul + Latin without parens (e.g. "김나영 Kim na young"),
+ # also emit the Hangul-only and Latin-only pieces as tokens so it groups with
+ # its paren/split siblings ("Kim Na Young(김나영)", "김나영"). Length guards drop
+ # tiny fragments (e.g. "sg") that would over-merge unrelated artists.
+ function addchunk(idx,chunk, h,l){
+ addtok(idx, chunk)
+ if (chunk ~ /[가-힣]/ && chunk ~ /[A-Za-z]/) {
+ h=chunk; gsub(/[^가-힣]/,"",h); if (length(h) >= 2) addtok(idx, h)
+ l=tolower(chunk); gsub(/[^a-z0-9]/,"",l); if (length(l) >= 3) addtok(idx, l)
+ }
+ }
function caserank(s, u,l){ u=(s ~ /[A-Z]/); l=(s ~ /[a-z]/); return (u&&l)?2:1 }
{
name[NR]=$1; cnt[NR]=$2+0; parent[NR]=NR
s=$1; gsub(/(/,"(",s); gsub(/)/,")",s); rest=s
while (match(rest,/\([^)]*\)/)) {
- addtok(NR, substr(rest,RSTART+1,RLENGTH-2))
+ addchunk(NR, substr(rest,RSTART+1,RLENGTH-2))
rest=substr(rest,1,RSTART-1) " " substr(rest,RSTART+RLENGTH)
}
- addtok(NR, rest)
+ addchunk(NR, rest)
}
END{
for(i=1;i<=NR;i++){ r=find(i); members[r]=members[r] i " " }
diff --git a/ar/.local/bin/tests/test-qndl-artist.sh b/ar/.local/bin/tests/test-qndl-artist.sh
index 870d08b..d06081d 100644
--- a/ar/.local/bin/tests/test-qndl-artist.sh
+++ b/ar/.local/bin/tests/test-qndl-artist.sh
@@ -103,4 +103,17 @@ eq "merge --apply: variant 폴더 완전히 사라짐" "no" "$([ -d "$XDG_MUSIC
eq "merge --apply: cover.jpg가 표준 폴더로 이동" "yes" "$([ -f "$XDG_MUSIC_DIR/Zeddy/A/cover.jpg" ] && echo yes || echo no)"
eq "merge --apply: 잔여 이동 후 idempotent" "No case/paren duplicate groups found." "$("$BIN" merge)"
+# --- 병기(한글+영문, 괄호 없음) 폴더가 분리형 형제와 그룹핑됨 (addchunk) ---
+BTMP="$TMP/bimix"; export XDG_MUSIC_DIR="$BTMP/Music"; export QNDL_ALIASES="$BTMP/aliases.tsv"
+mkdir -p "$XDG_MUSIC_DIR"; : > "$QNDL_ALIASES"
+mkmp3 "$XDG_MUSIC_DIR/Kim Na Young/A/a.mp3"; mkmp3 "$XDG_MUSIC_DIR/Kim Na Young/A/b.mp3"
+mkmp3 "$XDG_MUSIC_DIR/김나영 Kim na young/A/c.mp3" # 병기, 괄호 없음 → 예전엔 미매칭
+mkmp3 "$XDG_MUSIC_DIR/Kim Na Young(김나영)/A/d.mp3" # 괄호 분리형
+BIDRY="$("$BIN" merge)"
+eq "bimix: 병기 폴더가 그룹에 포함" "yes" "$(printf '%s' "$BIDRY" | grep -q '김나영 Kim na young' && echo yes || echo no)"
+eq "bimix: 표준명은 영문 전용" "yes" "$(printf '%s' "$BIDRY" | grep -q '→ Kim Na Young (' && echo yes || echo no)"
+# 짧은 라틴 조각(sg 등)으로 무관 아티스트가 오합쳐지지 않아야 함
+mkmp3 "$XDG_MUSIC_DIR/SG 워너비/A/e.mp3"; mkmp3 "$XDG_MUSIC_DIR/Someguy/A/f.mp3"
+eq "bimix: 짧은 라틴조각 오합침 없음" "no" "$("$BIN" merge | grep -q 'Someguy' && echo yes || echo no)"
+
exit $FAIL