summaryrefslogtreecommitdiff
path: root/ar/.local/bin/mtag
blob: 5107b4b32720a282d96b9a9d29f8fa4d202d1e7f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
#!/bin/sh

# mtag — rebuild MP3 ID3 tags from each file's path/name, then file the song
# under <music>/<artist>/<album>/<title>.mp3 (qndl's layout).
#
# ncmpcpp/MPD show the *embedded* ID3 tags, not the filename — so renaming a
# file or folder doesn't change what you see. mtag reads the tags back out of
# the on-disk layout (so a rename "sticks") and then moves the file to match.
#
# Resolution rules (is_known = non-empty and not "Unknown Artist"/"Unknown Album"):
#   title  = filename without extension
#   artist = grandparent folder, if known
#   album  = parent folder, if known
#   When the artist is NOT known from a folder, a "Artist - Title" filename is
#   parsed (split on the FIRST " - ") to recover the artist and title.
#   When no real album is known, the album falls back to the title (single style).
#   If the artist still can't be determined, its TAG is left untouched, and the
#   move target falls back to the existing tag, then "Unknown Artist".

set -u

MUSIC="${XDG_MUSIC_DIR:-$HOME/Music}"
PLAYLIST="${XDG_CONFIG_HOME:-$HOME/.config}/mpd/playlists/entire.m3u"
TITLES="${XDG_DOTFILES_DIR:-$HOME/.dotfiles}/global/Music/.music_titles.txt"
DRY=0

usage() {
  cat <<EOF
Usage: mtag [--dry-run|-n] [DIR]

By default mtag REWRITES tags AND MOVES each *.mp3 into
<music>/<artist>/<album>/<title>.mp3, prunes emptied folders, regenerates the
'entire.m3u' playlist (if present), syncs qndl's '.music_titles.txt' restore/
delete labels (if present), and refreshes MPD.

  (no args)      apply everything (tags + move + playlist + mpc update)
  --dry-run, -n  preview only — show planned tag and move changes, touch nothing
  DIR            scan DIR instead of the whole music dir ($MUSIC)
  -h, --help     this help

Tags are written as ID3v2.3 (UTF-16) so Korean/Japanese text is safe, and
ffmpeg -c copy preserves all other embedded data (e.g. the source URL that
dmenudelmusic relies on). Collisions during a move are skipped, never clobbered.
EOF
}

# --- argument parsing -------------------------------------------------------
SCAN=""
for arg in "$@"; do
  case "$arg" in
  --dry-run | -n) DRY=1 ;;
  -h | --help)
    usage
    exit 0
    ;;
  -*)
    printf 'mtag: unknown option: %s\n\n' "$arg" >&2
    usage >&2
    exit 2
    ;;
  *) SCAN="$arg" ;;
  esac
done
[ -n "$SCAN" ] || SCAN="$MUSIC"

# --- prerequisites ----------------------------------------------------------
for bin in ffmpeg ffprobe; do
  command -v "$bin" >/dev/null 2>&1 || {
    printf 'mtag: %s not found (required)\n' "$bin" >&2
    exit 1
  }
done
[ -d "$SCAN" ] || {
  printf 'mtag: not a directory: %s\n' "$SCAN" >&2
  exit 1
}

# --- helpers ----------------------------------------------------------------
is_known() {
  case "$1" in
  "" | "Unknown Artist" | "Unknown Album") return 1 ;;
  *) return 0 ;;
  esac
}

# Make a string safe as a single path component: '/' is the only byte Linux
# forbids in a name, so replace just that (keeps spaces, brackets, CJK as-is).
san() {
  printf '%s' "$1" | tr '/' '_'
}

# Show a path relative to the music root when possible (nicer output).
reldisp() {
  case "$1" in
  "$MUSIC"/*) printf '%s' "${1#"$MUSIC"/}" ;;
  *) printf '%s' "$1" ;;
  esac
}

get_tag() {
  # get_tag <file> <tag-name>
  ffprobe -v error -show_entries "format_tags=$2" \
    -of default=nw=1:nk=1 "$1" 2>/dev/null | head -n 1
}

get_vid() {
  # Recover the YouTube ID from the embedded source URL (same method as
  # dmenudelmusic), or empty if there is none.
  strings "$1" 2>/dev/null | grep 'watch?v=' |
    sed 's/.*watch?v=\([A-Za-z0-9_-]*\).*/\1/' | head -n 1
}

# --- main loop --------------------------------------------------------------
_list="$(mktemp)"
_titlemap="$(mktemp)"
trap 'rm -f "$_list" "$_titlemap"' EXIT INT TERM
find "$SCAN" -type f -name '*.mp3' | sort >"$_list"

_total=0
_changed=0
_moved=0

while IFS= read -r f; do
  [ -n "$f" ] || continue
  _total=$((_total + 1))

  # Path relative to the music root, so artist/album come from the right depth
  # regardless of where SCAN points.
  case "$f" in
  "$MUSIC"/*) rel="${f#"$MUSIC"/}" ;;
  *) rel="$f" ;;
  esac

  fname="$(basename "$rel")"
  base="${fname%.*}" # title candidate (drop extension)

  # Determine artist/album folders by depth under the music root.
  dir="$(dirname "$rel")"
  artistdir=""
  albumdir=""
  if [ "$dir" != "." ]; then
    albumdir="$(basename "$dir")" # immediate parent
    pdir="$(dirname "$dir")"
    [ "$pdir" != "." ] && artistdir="$(basename "$pdir")"
    # Two-level case (artist/title.mp3): the single folder is the ARTIST,
    # not the album.
    if [ -z "$artistdir" ]; then
      artistdir="$albumdir"
      albumdir=""
    fi
  fi

  title="$base"
  artist=""
  album=""
  is_known "$artistdir" && artist="$artistdir"
  is_known "$albumdir" && album="$albumdir"

  # Recover artist from a "Artist - Title" filename only when we couldn't get
  # one from a folder — avoids mangling titles that legitimately contain " - ".
  if [ -z "$artist" ]; then
    case "$base" in
    *" - "*)
      artist="${base%%" - "*}"
      title="${base#*" - "}"
      ;;
    esac
  fi

  # Single style: no real album -> album is the title.
  [ -n "$album" ] || album="$title"

  old_title="$(get_tag "$f" title)"
  old_artist="$(get_tag "$f" artist)"
  old_album="$(get_tag "$f" album)"

  # What would actually change? (artist only counts when we have a value)
  diff=0
  [ "$old_title" != "$title" ] && diff=1
  [ "$old_album" != "$album" ] && diff=1
  [ -n "$artist" ] && [ "$old_artist" != "$artist" ] && diff=1
  [ "$diff" -eq 1 ] && _changed=$((_changed + 1))

  # Move target: <music>/<artist>/<album>/<title>.mp3. The path needs a
  # concrete artist even when we won't write the tag, so fall back to the
  # existing tag, then "Unknown Artist".
  path_artist="$artist"
  [ -n "$path_artist" ] || path_artist="$old_artist"
  [ -n "$path_artist" ] || path_artist="Unknown Artist"
  target="$MUSIC/$(san "$path_artist")/$(san "$album")/$(san "$title").mp3"
  needmove=0
  [ "$target" != "$f" ] && needmove=1
  [ "$needmove" -eq 1 ] && _moved=$((_moved + 1))

  # Display
  mark="  "
  { [ "$diff" -eq 1 ] || [ "$needmove" -eq 1 ]; } && mark="* "
  printf '%s%s\n' "$mark" "$rel"
  printf '    title : %s\n' "$title"
  if [ -n "$artist" ]; then
    printf '    artist: %s\n' "$artist"
  else
    printf '    artist: (tag left as-is: %s)\n' "${old_artist:-<empty>}"
  fi
  printf '    album : %s\n' "$album"
  [ "$needmove" -eq 1 ] && printf '    move  : -> %s\n' "$(reldisp "$target")"

  [ "$DRY" -eq 1 ] && continue

  # Record the restore/delete label for qndl's .music_titles.txt, keyed by the
  # embedded YouTube ID, so 'qndl -r'/'-d' show the corrected "artist - title"
  # instead of the stale download-time label. Done before any move (the file is
  # still at "$f"). Files without an embedded ID are skipped.
  vid="$(get_vid "$f")"
  [ -n "$vid" ] && printf '%s\t%s - %s\n' "$vid" "$path_artist" "$title" >>"$_titlemap"

  # Write tags (in place) first, so the moved file carries the new tags.
  # -map 0 + -c copy keep the audio stream and ALL other metadata (cover art,
  # the embedded source URL, etc.) untouched — only the listed tags change.
  if [ "$diff" -eq 1 ]; then
    tmp="${f%.mp3}.mtag.$$.mp3"
    set -- ffmpeg -v error -y -i "$f" -map 0 -c copy -id3v2_version 3 \
      -metadata "title=$title" -metadata "album=$album"
    [ -n "$artist" ] && set -- "$@" -metadata "artist=$artist"
    set -- "$@" "$tmp"
    if "$@" && [ -s "$tmp" ]; then
      mv -f "$tmp" "$f"
    else
      rm -f "$tmp"
      printf '    !! ffmpeg failed, left unchanged\n' >&2
    fi
  fi

  # Then relocate, skipping collisions and pruning emptied source folders.
  if [ "$needmove" -eq 1 ]; then
    if [ -e "$target" ]; then
      printf '    !! target exists, not moved: %s\n' "$(reldisp "$target")" >&2
      _moved=$((_moved - 1))
    else
      mkdir -p "$(dirname "$target")"
      if mv -n "$f" "$target"; then
        d="$(dirname "$f")"
        while [ "$d" != "$MUSIC" ] && [ "$d" != "/" ] && [ "$d" != "." ]; do
          rmdir "$d" 2>/dev/null || break
          d="$(dirname "$d")"
        done
      else
        printf '    !! move failed, left in place\n' >&2
        _moved=$((_moved - 1))
      fi
    fi
  fi
done <"$_list"

# --- finish -----------------------------------------------------------------
printf '\n'
if [ "$DRY" -eq 1 ]; then
  printf 'mtag: dry-run — %d tag change(s), %d move(s), of %d file(s).\n' \
    "$_changed" "$_moved" "$_total"
  printf 'mtag: (apply would also sync entire.m3u and qndl .music_titles.txt labels.)\n'
  printf 'mtag: run without --dry-run to apply.\n'
  exit 0
fi

printf 'mtag: updated %d tag(s), moved %d file(s), of %d total.\n' \
  "$_changed" "$_moved" "$_total"

# Keep qndl's playlist consistent: it lists music-relative *.mp3 paths, which
# moves invalidate. Regenerate it the same way qndl does, but only if it
# already exists (don't create the artifact on setups that don't use it).
if [ "$_moved" -gt 0 ] && [ -f "$PLAYLIST" ]; then
  find "$MUSIC" -name '*.mp3' | sed "s|$MUSIC/||" | sort >"$PLAYLIST"
  printf 'mtag: regenerated %s\n' "$(reldisp "$PLAYLIST")"
fi

# Keep qndl's restore/delete labels (.music_titles.txt) in sync with the new
# tags. Upsert by YouTube ID: update existing lines, append IDs not yet listed.
# Only rewrite the file when its content actually changes (avoids dotfiles noise).
if [ -s "$_titlemap" ] && [ -f "$TITLES" ]; then
  _tnew="$(mktemp)"
  awk '
    FNR==NR {
      t = index($0, "\t"); if (t == 0) next
      id = substr($0, 1, t - 1); lab = substr($0, t + 1)
      map[id] = lab; if (!(id in known)) { order[++n] = id; known[id] = 1 }
      next
    }
    {
      t = index($0, "\t"); lid = (t ? substr($0, 1, t - 1) : $0)
      if (lid in map) { print lid "\t" map[lid]; seen[lid] = 1 }
      else print $0
    }
    END { for (i = 1; i <= n; i++) { id = order[i]; if (!(id in seen)) print id "\t" map[id] } }
  ' "$_titlemap" "$TITLES" >"$_tnew"
  if cmp -s "$_tnew" "$TITLES"; then
    rm -f "$_tnew"
  else
    mv "$_tnew" "$TITLES"
    printf 'mtag: synced labels in %s\n' "$(reldisp "$TITLES")"
  fi
fi

if command -v mpc >/dev/null 2>&1; then
  printf 'mtag: running mpc update...\n'
  mpc update --wait >/dev/null 2>&1 && printf 'mtag: MPD database refreshed.\n'
else
  printf 'mtag: mpc not found — skipped database refresh.\n'
fi