diff options
Diffstat (limited to 'mac/.config/mpv/scripts/subtitle-search.lua')
| -rw-r--r-- | mac/.config/mpv/scripts/subtitle-search.lua | 682 |
1 files changed, 682 insertions, 0 deletions
diff --git a/mac/.config/mpv/scripts/subtitle-search.lua b/mac/.config/mpv/scripts/subtitle-search.lua new file mode 100644 index 0000000..53a2ddc --- /dev/null +++ b/mac/.config/mpv/scripts/subtitle-search.lua @@ -0,0 +1,682 @@ +--[[ +Based on sub-search script by kelciour (https://github.com/kelciour/mpv-scripts/blob/master/sub-search.lua) + +Differences from the original script: + +- Searches in a subtitle file active as a primary subtitle instead of attempting to find subtitle files matching video name +- Outputs all search results in OSD list instead of jumping between them with a hotkey (the closest subtitle is selected by default) +- Supports searching unicode text (subtitles should be encoded as utf8, please re-encode your subtitles if you get no results searching for unicode text) +- Embedded console replaced with more recent variant from mpv sources (to support unicode input) +- Takes into account current `sub-delay` value +- Can search in embedded subtitles (requires ffmpeg to be installed to extract subtitles from video files) +- Can search subtitles for youtube videos (requires ffmpeg to be installed to fetch remote subtitles) +- Supports `.srt`, `.vtt` and `.sub` (microdvd) subtitle formats +- Can use special phrase "*" to show all subtitle lines +- Use `ctrl+shift+f` shortcut to show all subtitle lines simultaneously and dynamically highlight the current line +- Press `Ctrl+Shift+Enter` in result list to adjust `sub-delay` so that selected subtitle line is displayed at the current position + +Requires `script-modules/utf8` repository, `script-modules/scroll-list.lua`, `script-modules/sha1.lua`, `script-modules/utf8_data.lua` and `script-modules/input-console.lua` to work. + +You can clone `script-modules/utf8` repository with the following command (assuming you are in mpv config directory): `git clone git@github.com:Stepets/utf8.lua.git script-modules/utf8` + +Usage: + Press Ctrl + F, print something and press Enter. +Example: + 'You are playing Empire Strikes Back and press Ctrl+F, type "I am you father" + Enter + and voilá, the scene pops up.' +--]] + + +package.path = package.path .. ";" .. mp.command_native({ "expand-path", "~~/script-modules/?.lua" }) + +local mp = require("mp") +local utils = require("mp.utils") +local msg = require("mp.msg") +local input_console = require("input-console") +local result_list = require("scroll-list") +local utf8 = require("utf8/init") +local utf8_data = require("utf8_data") +local sha1 = require("sha1") + +utf8.config = { + conversion = { + uc_lc = utf8_data.utf8_uc_lc, + lc_uc = utf8_data.utf8_lc_uc + }, +} + +utf8:init() + +table.insert(result_list.keybinds, { + "ENTER", "jump_to_result", function() + local selected_index = result_list.selected + if selected_index == nil then + return + end + + local selected = result_list.list[selected_index] + mp.commandv("seek", selected.time, "absolute+exact") + end, {} +}) +table.insert(result_list.keybinds, { + "Ctrl+Shift+ENTER", "sync_to_result", function() + local selected_index = result_list.selected + if selected_index == nil then + return + end + + local selected = result_list.list[selected_index] + local old_delay = mp.get_property_native("sub-delay") + local delay = -(selected.original_time - mp.get_property_native("time-pos")) + mp.set_property_native("sub-delay", delay) + end, {} +}) + +function sub_time_to_seconds(time, sep) + if time:match("%d%d:%d%d" .. sep .. "%d%d%d") then + time = "00:" .. time + end + + local major, minor = time:match("(%d%d:%d%d:%d%d)" .. sep .. "(%d%d%d)") + local hours, mins, secs = major:match("(%d%d):(%d%d):(%d%d)") + return hours * 3600 + mins * 60 + secs + minor / 1000 +end + +local subs_cache = {} + +function open_file(path) + local f, err = io.open(path, "r") + if f and err == nil then + return f + end + + return nil +end + +function is_supported_network_protocol(url) + local protocols = { "http", "https" } + + for _, protocol in pairs(protocols) do + if url:sub(1, #protocol + 3) == protocol .. "://" then + return true + end + end + + return false +end + +function get_sub_filename_async(track_name, on_done) + local active_track = mp.get_property_native("current-tracks/" .. track_name) + if active_track == nil then + on_done(nil) + return + end + + local is_external = active_track.external + local external_filename = active_track["external-filename"] + + -- youtube subtitles specified with edl format + if is_external and external_filename and external_filename:sub(1, 6) == "edl://" then + download_subtitle_async(external_filename:match("https://.*"), on_done) + return + end + + if is_external and external_filename and is_supported_network_protocol(external_filename) then + download_subtitle_async(external_filename, on_done) + return + end + + if is_external and external_filename then + on_done(external_filename) + return + end + + if is_external == false then + extract_subtitle_track_async(active_track, on_done) + return + end + + on_done(nil) +end + +function get_path_to_extract_sub(uniq_sub_id) + local sub_filename = sha1.hex(uniq_sub_id) + return utils.join_path(get_temp_dir(), "mpv-subtitle-search-extracted-" .. sub_filename .. ".srt") +end + +function download_subtitle_async(url, on_done) + local sub_path = get_path_to_extract_sub(mp.get_property_native("path") .. "#" .. url) + + if subs_cache[sub_path] then + on_done(sub_path) + return + end + + local extract_overlay = mp.create_osd_overlay("ass-events") + extract_overlay.data = "{\\a3\\fs20}Fetching remote subtitles, wait..." + extract_overlay:update() + + mp.command_native_async({ + name = "subprocess", + capture_stdout = true, + args = { "ffmpeg", "-y", "-hide_banner", "-loglevel", "error", "-i", url, "-vn", "-an", "-c:s", "srt", sub_path } + }, function(ok) + if not ok then + extract_overlay.data = "{\\a3\\fs20\\c&HFF&}Extraction failed" + extract_overlay:update() + + mp.add_timeout(2, function() + extract_overlay:remove() + end) + + on_done(nil) + else + extract_overlay:remove() + + on_done(sub_path) + end + end) +end + +function extract_subtitle_track_async(track, on_done) + if track.external then + on_done(nil) + return + end + + local video_file = mp.get_property_native("path") + local working_dir = mp.get_property_native("working-directory") + local full_path = utils.join_path(working_dir, video_file) + + local track_index = track["ff-index"] + local sub_path = get_path_to_extract_sub(full_path .. "#" .. track_index) + + -- check if file already exists + if open_file(sub_path) then + msg.info("Reusing extracted subtitle track from " .. sub_path) + + on_done(sub_path) + return + end + + msg.info("Extracting embedded subtitle track to " .. sub_path) + + local extract_overlay = mp.create_osd_overlay("ass-events") + extract_overlay.data = "{\\a3\\fs20}Extracting embedded subtitles, wait..." + extract_overlay:update() + + mp.command_native_async({ + name = "subprocess", + capture_stdout = true, + args = { "ffmpeg", "-y", "-hide_banner", "-loglevel", "error", "-i", full_path, "-map", "0:" .. track_index, "-vn", "-an", "-c:s", "srt", sub_path } + }, function(ok) + if not ok then + extract_overlay.data = "{\\a3\\fs20\\c&HFF&}Extraction failed" + extract_overlay:update() + + mp.add_timeout(2, function() + extract_overlay:remove() + end) + + on_done(nil) + else + extract_overlay:remove() + + on_done(sub_path) + end + end) +end + +function get_temp_dir() + local temp_dir = os.getenv("TMPDIR") + if temp_dir == nil then + temp_dir = os.getenv("TEMP") + end + + if temp_dir == nil then + temp_dir = os.getenv("TMP") + end + + if temp_dir == nil then + temp_dir = "/tmp" + end + + return temp_dir +end + +function get_lines(input) + local lines = {} + + local tail = 1 + for head = 1, #input do + local ch = input:sub(head, head) + if ch == "\n" then + table.insert(lines, input:sub(tail, head - 1)) + tail = head + 1 + elseif head == #input then + table.insert(lines, input:sub(tail, head)) + end + end + + return lines +end + +function trim(s) + return s:gsub("^%s*(.-)%s*$", "%1") +end + +function parse_vtt_sub(data) + local result = {} + local state = "header" + + local cur_line = {} + for _, line in ipairs(get_lines(data)) do + line = trim(line) + if state == "header" then + if line == "" then + state = "body" + end + elseif state == "body" then + if line == "" then + state = "header" + elseif line:match("^NOTE") or line:match("^STYLE") then + state = "comment" + else + local time_text = line:match("^(%d%d:%d%d:%d%d%.%d%d%d)") or line:match("^(%d%d:%d%d%.%d%d%d)") + if time_text then + cur_line.time = sub_time_to_seconds(time_text, ".") + state = "waiting_text" + else + state = "body" + end + end + elseif state == "comment" then + if #line == 0 then + state = "body" + end + elseif state == "waiting_text" then + if #line == 0 or line == nil then + if cur_line.text ~= nil then + table.insert(result, cur_line) + end + + cur_line = {} + state = "body" + else + line = remove_tags(line) + if cur_line.text then + cur_line.text = cur_line.text .. "\n" .. line + else + cur_line.text = line + end + end + end + end + + return result +end + +function remove_tags(text) + function remove_tag(tag_to_remove) + return string.gsub(text, "</?" .. tag_to_remove .. ">", "") + end + + text = remove_tag("b") + text = remove_tag("i") + text = remove_tag("u") + text = remove_tag("ruby") + text = remove_tag("rt") + + -- remove class tag + text = remove_tag("c") + text = string.gsub(text, "<c.[^>]*>", "") + + -- remove voice tag + text = remove_tag("v") + text = string.gsub(text, "<v [^>]*>", "") + + -- remove karaoke karaoke tags + text = string.gsub(text, "</?%d%d:%d%d.%d%d%d>", "") + text = string.gsub(text, "</?%d%d:%d%d:%d%d.%d%d%d>", "") + + -- remove font tag + text = string.gsub(text, '<font color="#?[%d%a]+">', "") + text = string.gsub(text, '</font>', "") + + return text +end + + +-- detects only most common encodings +function get_encoding_from_bom(data) + -- utf8 + local bom = data:sub(1, 3) + if bom == "\xEF\xBB\xBF" then + return "utf-8" + end + + -- utf16 + bom = data:sub(1, 2) + if bom == "\xFF\xFE" or bom == "\xFE\xFF" then + return "utf-16" + end + + -- utf32 + bom = data:sub(1, 4) + if bom == "\xFF\xFE\x00\x00" or bom == "\x00\x00\xFE\xFF" then + return "utf-32" + end + + return nil +end + +function is_microdvd_sub(data) + return data:match("{%d+}{%d+}") +end + +function parse_microdvd_sub(data) + local result = {} + local lines = get_lines(data) + + -- if the first line contains only number, it's a subtitle fps + local subtitle_fps = tonumber(lines[1]) + if subtitle_fps == nil or subtitle_fps == 0 then + subtitle_fps = mp.get_property_native("container-fps") + if subtitle_fps == nil or subtitle_fps == 0 then + subtitle_fps = 24 + end + end + + msg.info("Using " .. subtitle_fps .. "fps for microdvd subtitle") + + for _, line in ipairs(lines) do + local time_text = line:match("^{(%d+)}{(%d+)}") + if time_text then + local start_frame = tonumber(time_text:match("^(%d+)")) + + local text = line:match("^{%d+}{%d+}(.*)") + text = text:gsub("|", " ") + if text then + table.insert(result, { + time = frame_to_secs(start_frame, subtitle_fps), + text = text + }) + end + end + end + + return result +end + +function frame_to_secs(frame, subtitle_fps) + return frame / subtitle_fps +end + +function parse_sub(data) + bom_encoding = get_encoding_from_bom(data) + if bom_encoding ~= nil then + if bom_encoding == "utf-8" then + data = data:sub(3) + else + local error_overlay = mp.create_osd_overlay("ass-events") + error_overlay.data = "{\\a3\\fs20\\c&HFF&}Unsupported subtitle encoding: " .. bom_encoding .. ", please re-encode subtitle file to utf-8 to search" + error_overlay:update() + + msg.error("Unsupported subtitle encoding: " .. bom_encoding .. ", please re-encode subtitle file to utf-8 to search") + + mp.add_timeout(10, function() + error_overlay:remove() + end) + + return {} + end + end + + data = string.gsub(data, "\r\n", "\n") + + if data:sub(1, 6) == "WEBVTT" then + return parse_vtt_sub(data) + end + + if is_microdvd_sub(data) then + return parse_microdvd_sub(data) + end + + local result = {} + local state = "waiting_index" + local cur_line = {} + for _, line in ipairs(get_lines(data)) do + line = trim(line) + if state == "waiting_index" then + if cur_line.text then + table.insert(result, cur_line) + cur_line = {} + end + + if line:match("^%d+$") then + state = "waiting_time" + end + elseif state == "waiting_time" then + local time_text = line:match("^(%d%d:%d%d:%d%d,%d%d%d) ") + if time_text then + cur_line.time = sub_time_to_seconds(time_text, ",") + state = "waiting_text" + else + state = "waiting_index" + end + elseif state == "waiting_text" then + line = remove_tags(line) + if #line == 0 then + if cur_line.text then + table.insert(result, cur_line) + end + cur_line = {} + state = "waiting_index" + elseif cur_line.text then + cur_line.text = cur_line.text .. " " .. line + else + cur_line.text = line + end + end + end + + if cur_line.text then + table.insert(result, cur_line) + end + + return result +end + +function load_sub(path, prefix) + if not path then + return nil + end + + local cached = subs_cache[path] + if cached then + return cached + end + + local f = open_file(path) + if not f then + return nil + end + + local data = f:read("*all") + f:close() + + local sub = { + prefix = prefix, + lines = parse_sub(data) + } + subs_cache[path] = sub + return sub +end + +function make_nocase_pattern(s) + local result = "" + for _, code in utf8.codes(s) do + local c = utf8.char(code) + result = result .. string.format("[%s%s]", utf8.lower(c), utf8.upper(c)) + end + return result +end + +-- highlight found text with colored text in ass syntax +function highlight_match(text, match_text, style_reset) + local match_start, match_end = utf8.find(utf8.lower(text), utf8.lower(match_text)) + if match_start == nil then + return text + end + + local before = result_list.ass_escape(utf8.sub(text, 1, match_start - 1)) + local match = result_list.ass_escape(utf8.sub(text, match_start, match_end)) + local after = result_list.ass_escape(utf8.sub(text, match_end + 1)) + + if style_reset == "" then + style_reset = "{\\c&HFFFFFF&}" + end + + return before .. "{\\c&HFF00&}" .. match .. style_reset .. after +end + +function adjust_sub_time(time) + local delay = mp.get_property_native("sub-delay") + if delay == nil then + return time + end + return time + delay +end + +function divmod (a, b) + return math.floor(a / b), a % b +end + +function format_time(time) + decimals = 3 + sep = "." + local s = time + local h, s = divmod(s, 60 * 60) + local m, s = divmod(s, 60) + + local second_format = string.format("%%0%d.%df", 2 + (decimals > 0 and decimals + 1 or 0), decimals) + + return string.format("%02d" .. sep .. "%02d" .. sep .. second_format, h, m, s) +end + +function get_subs_to_search_in_async(on_done) + local result = {} + + get_sub_filename_async("sub", function(primary_filename) + local sub = load_sub(primary_filename, "P") + if sub then + table.insert(result, sub) + end + + get_sub_filename_async("sub2", function(secondary_filename) + sub = load_sub(secondary_filename, "S") + if sub then + table.insert(result, sub) + end + + on_done(result) + end) + end) +end + +function update_search_results_async(query, live) + get_subs_to_search_in_async(function(subs) + if #subs == 0 then + mp.osd_message("External subtitles not found") + return + end + + result_list.list = { + { + sub = nil, + time = mp.get_property_native("time-pos"), + ass = "Original position" + } + } + result_list.selected = 1 + result_list.live = live + + local closest_lower_index = 1 + local closest_lower_time = nil + local cur_time = mp.get_property_native("time-pos") + + local pat = "(" .. make_nocase_pattern(query) .. ")" + for _, sub in ipairs(subs) do + for _, sub_line in ipairs(sub.lines) do + if query == "*" or utf8.match(sub_line.text, pat) then + local sub_time = adjust_sub_time(sub_line.time) + + table.insert(result_list.list, { + sub = sub, + original_time = sub_line.time, + time = sub_time + 0.01, -- to ensure that the subtitle is visible + formatter = function(style_reset) + local sub_text = result_list.ass_escape(format_time(sub_time) .. ": ") .. + highlight_match(sub_line.text, query, style_reset) + + if #subs > 1 then + sub_text = "[" .. sub.prefix .. "] " .. sub_text + end + + return sub_text + end + }) + + if sub_time <= cur_time and (closest_lower_time == nil or closest_lower_time < sub_time) then + closest_lower_time = sub_time + closest_lower_index = #result_list.list + end + end + end + end + + result_list.selected = closest_lower_index + result_list.header = "Search results for \"" .. query .. "\"\\N ------------------------------------" + result_list.header = result_list.header .. "\\NENTER to jump to subtitle, Ctrl+Shift+Enter to adjust subtitle timing to selected line" + + result_list:update() + result_list:open() + end) +end + +mp.register_script_message('start-search', function() + if input_console.is_repl_active() then + input_console.set_active(false) + else + input_console.set_enter_handler(function(query) + update_search_results_async(query, false) + end) + input_console.set_active(true) + end +end) + +mp.register_script_message('show-all-lines', function() + update_search_results_async("*", true) +end) + +local function get_current_subtitle_index(list, pos) + local closest_lower_index = 1 + local closest_lower_time = nil + for i, item in ipairs(list) do + if item.time <= pos and (closest_lower_time == nil or closest_lower_time < item.time) then + closest_lower_time = item.time + closest_lower_index = i + end + end + return closest_lower_index +end + +mp.observe_property("time-pos", "native", function(_, pos) + if not result_list.hidden and result_list.live and pos ~= nil then + local index = get_current_subtitle_index(result_list.list, pos) + if index > 1 then + result_list.selected = index + result_list:update() + end + end +end) |
