diff options
Diffstat (limited to 'mac/.config/mpv/script-modules/utf8')
40 files changed, 3641 insertions, 0 deletions
diff --git a/mac/.config/mpv/script-modules/utf8/LICENSE b/mac/.config/mpv/script-modules/utf8/LICENSE new file mode 100644 index 0000000..fd3b301 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2016 Stepets + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/mac/.config/mpv/script-modules/utf8/README.md b/mac/.config/mpv/script-modules/utf8/README.md new file mode 100644 index 0000000..0c31574 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/README.md @@ -0,0 +1,93 @@ +# utf8.lua +pure-lua 5.3 regex library for Lua 5.3, Lua 5.1, LuaJIT + +This library provides simple way to add UTF-8 support into your application. + +#### Example: +```Lua +local utf8 = require('.utf8'):init() +for k,v in pairs(utf8) do + string[k] = v +end + +local str = "пыщпыщ ололоо я водитель нло" +print(str:find("(.л.+)н")) +-- 8 26 ололоо я водитель + +print(str:gsub("ло+", "보라")) +-- пыщпыщ о보라보라 я водитель н보라 3 + +print(str:match("^п[лопыщ ]*я")) +-- пыщпыщ ололоо я +``` + +#### Usage: + +This library can be used as drop-in replacement for vanilla string library. It exports all vanilla functions under `raw` sub-object. + +```Lua +local utf8 = require('.utf8'):init() +local str = "пыщпыщ ололоо я водитель нло" +utf8.gsub(str, "ло+", "보라") +-- пыщпыщ о보라보라 я водитель н보라 3 +utf8.raw.gsub(str, "ло+", "보라") +-- пыщпыщ о보라보라о я водитель н보라 3 +``` + +It also provides all functions from Lua 5.3 UTF-8 [module](https://www.lua.org/manual/5.3/manual.html#6.5) except `utf8.len (s [, i [, j]])`. If you need to validate your strings use `utf8.validate(str, byte_pos)` or iterate over with `utf8.validator`. + +Please note that library assumes regexes are valid UTF-8 strings, if you need to manipulate individual bytes use vanilla functions under `utf8.raw`. + + +#### Installation: + +Download repository to your project folder. (no rockspecs yet) + +Examples assume library placed under `utf8` subfolder not `utf8.lua`. + +As of Lua 5.3 default `utf8` module has precedence over user-provided. In this case you can specify full module path (`.utf8`). + +#### Configuration: + +Library is highly modular. You can provide your implementation for almost any function used. Library already has several back-ends: +- [Runtime character class processing](charclass/runtime/init.lua) using hardcoded codepoint ranges or using native functions through `ffi`. +- [Basic functions](primitives/init.lua) for working with UTF-8 characters have specializations for `ffi`-enabled runtime and for tarantool. + +Probably most interesting [customizations](init.lua) are `utf8.config.loadstring` and `utf8.config.cache` if you want to precompile your regexes. + +```Lua +local utf8 = require('.utf8') +utf8.config = { + cache = my_smart_cache, +} +utf8:init() +``` + +For `lower` and `upper` functions to work in environments where `ffi` cannot be used, you can specify substitution tables ([data example](https://github.com/artemshein/luv/blob/master/utf8data.lua)) + +```Lua +local utf8 = require('.utf8') +utf8.config = { + conversion = { + uc_lc = utf8_uc_lc, + lc_uc = utf8_lc_uc + }, +} +utf8:init() +``` +Customization is done before initialization. If you want, you can change configuration after `init`, it might work for everything but modules. All of them should be reloaded. + +#### [Documentation:](test/test.lua) + +#### Issue reporting: + +Please provide example script that causes error together with environment description and debug output. Debug output can be obtained like: +```Lua +local utf8 = require('.utf8') +utf8.config = { + debug = utf8:require("util").debug +} +utf8:init() +-- your code +``` +Default logger used is [`io.write`](https://www.lua.org/manual/5.3/manual.html#pdf-io.write) and can be changed by specifying `logger = my_logger` in configuration diff --git a/mac/.config/mpv/script-modules/utf8/begins/compiletime/parser.lua b/mac/.config/mpv/script-modules/utf8/begins/compiletime/parser.lua new file mode 100644 index 0000000..c54c0df --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/begins/compiletime/parser.lua @@ -0,0 +1,17 @@ +return function(utf8) + +utf8.config.begins = utf8.config.begins or { + utf8:require "begins.compiletime.vanilla" +} + +function utf8.regex.compiletime.begins.parse(regex, c, bs, ctx) + for _, m in ipairs(utf8.config.begins) do + local functions, move = m.parse(regex, c, bs, ctx) + utf8.debug("begins", _, c, bs, move, functions) + if functions then + return functions, move + end + end +end + +end diff --git a/mac/.config/mpv/script-modules/utf8/begins/compiletime/vanilla.lua b/mac/.config/mpv/script-modules/utf8/begins/compiletime/vanilla.lua new file mode 100644 index 0000000..bcafa17 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/begins/compiletime/vanilla.lua @@ -0,0 +1,60 @@ +return function(utf8) + +local matchers = { + sliding = function() + return [[ + add(function(ctx) -- sliding + while ctx.pos <= ctx.len do + local clone = ctx:clone() + -- debug('starting from', clone, "start_pos", clone.pos) + clone.result.start = clone.pos + clone:next_function() + clone:get_function()(clone) + + ctx:next_char() + end + ctx:terminate() + end) +]] + end, + fromstart = function(ctx) + return [[ + add(function(ctx) -- fromstart + if ctx.byte_pos > ctx.len then + return + end + ctx.result.start = ctx.pos + ctx:next_function() + ctx:get_function()(ctx) + ctx:terminate() + end) +]] + end, +} + +local function default() + return matchers.sliding() +end + +local function parse(regex, c, bs, ctx) + if bs ~= 1 then return end + + local functions + local skip = 0 + + if c == '^' then + functions = matchers.fromstart() + skip = 1 + else + functions = matchers.sliding() + end + + return functions, skip +end + +return { + parse = parse, + default = default, +} + +end diff --git a/mac/.config/mpv/script-modules/utf8/charclass/compiletime/builder.lua b/mac/.config/mpv/script-modules/utf8/charclass/compiletime/builder.lua new file mode 100644 index 0000000..9d9c603 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/charclass/compiletime/builder.lua @@ -0,0 +1,128 @@ +return function(utf8) + +local byte = utf8.byte +local unpack = utf8.config.unpack + +local builder = {} +local mt = {__index = builder} + +utf8.regex.compiletime.charclass.builder = builder + +function builder.new() + return setmetatable({}, mt) +end + +function builder:invert() + self.inverted = true + return self +end + +function builder:internal() -- is it enclosed in [] + self.internal = true + return self +end + +function builder:with_codes(...) + local codes = {...} + self.codes = self.codes or {} + + for _, v in ipairs(codes) do + table.insert(self.codes, type(v) == "number" and v or byte(v)) + end + + table.sort(self.codes) + return self +end + +function builder:with_ranges(...) + local ranges = {...} + self.ranges = self.ranges or {} + + for _, v in ipairs(ranges) do + table.insert(self.ranges, v) + end + + return self +end + +function builder:with_classes(...) + local classes = {...} + self.classes = self.classes or {} + + for _, v in ipairs(classes) do + table.insert(self.classes, v) + end + + return self +end + +function builder:without_classes(...) + local not_classes = {...} + self.not_classes = self.not_classes or {} + + for _, v in ipairs(not_classes) do + table.insert(self.not_classes, v) + end + + return self +end + +function builder:include(b) + if not b.inverted then + if b.codes then + self:with_codes(unpack(b.codes)) + end + if b.ranges then + self:with_ranges(unpack(b.ranges)) + end + if b.classes then + self:with_classes(unpack(b.classes)) + end + if b.not_classes then + self:without_classes(unpack(b.not_classes)) + end + else + self.includes = self.includes or {} + self.includes[#self.includes + 1] = b + end + return self +end + +function builder:build() + if self.codes and #self.codes == 1 and not self.inverted and not self.ranges and not self.classes and not self.not_classes and not self.includes then + return "{test = function(self, cc) return cc == " .. self.codes[1] .. " end}" + else + local codes_list = table.concat(self.codes or {}, ', ') + local ranges_list = '' + for i, r in ipairs(self.ranges or {}) do ranges_list = ranges_list .. (i > 1 and ', {' or '{') .. tostring(r[1]) .. ', ' .. tostring(r[2]) .. '}' end + local classes_list = '' + if self.classes then classes_list = "'" .. table.concat(self.classes, "', '") .. "'" end + local not_classes_list = '' + if self.not_classes then not_classes_list = "'" .. table.concat(self.not_classes, "', '") .. "'" end + + local subs_list = '' + for i, r in ipairs(self.includes or {}) do subs_list = subs_list .. (i > 1 and ', ' or '') .. r:build() .. '' end + + local src = [[cl.new():with_codes( + ]] .. codes_list .. [[ + ):with_ranges( + ]] .. ranges_list .. [[ + ):with_classes( + ]] .. classes_list .. [[ + ):without_classes( + ]] .. not_classes_list .. [[ + ):with_subs( + ]] .. subs_list .. [[ + )]] + + if self.inverted then + src = src .. ':invert()' + end + + return src + end +end + +return builder + +end diff --git a/mac/.config/mpv/script-modules/utf8/charclass/compiletime/parser.lua b/mac/.config/mpv/script-modules/utf8/charclass/compiletime/parser.lua new file mode 100644 index 0000000..4f1d4a9 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/charclass/compiletime/parser.lua @@ -0,0 +1,21 @@ +return function(utf8) + +utf8.config.compiletime_charclasses = utf8.config.compiletime_charclasses or { + utf8:require "charclass.compiletime.vanilla", + utf8:require "charclass.compiletime.range", + utf8:require "charclass.compiletime.stub", +} + +function utf8.regex.compiletime.charclass.parse(regex, c, bs, ctx) + utf8.debug("parse charclass():", regex, c, bs, regex[bs]) + for _, p in ipairs(utf8.config.compiletime_charclasses) do + local charclass, nbs = p(regex, c, bs, ctx) + if charclass then + ctx.prev_class = charclass:build() + utf8.debug("cc", ctx.prev_class, _, c, bs, nbs) + return charclass, nbs + end + end +end + +end diff --git a/mac/.config/mpv/script-modules/utf8/charclass/compiletime/range.lua b/mac/.config/mpv/script-modules/utf8/charclass/compiletime/range.lua new file mode 100644 index 0000000..2996234 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/charclass/compiletime/range.lua @@ -0,0 +1,44 @@ +return function(utf8) + +local cl = utf8.regex.compiletime.charclass.builder + +local next = utf8.util.next + +return function(str, c, bs, ctx) + if not ctx.internal then return end + + local nbs = bs + + local r1, r2 + + local c, nbs = c, bs + if c == '%' then + c, nbs = next(str, nbs) + r1 = c + else + r1 = c + end + + utf8.debug("range r1", r1, nbs) + + c, nbs = next(str, nbs) + if c ~= '-' then return end + + c, nbs = next(str, nbs) + if c == '%' then + c, nbs = next(str, nbs) + r2 = c + elseif c ~= '' and c ~= ']' then + r2 = c + end + + utf8.debug("range r2", r2, nbs) + + if r1 and r2 then + return cl.new():with_ranges{utf8.byte(r1), utf8.byte(r2)}, utf8.next(str, nbs) - bs + else + return + end +end + +end diff --git a/mac/.config/mpv/script-modules/utf8/charclass/compiletime/stub.lua b/mac/.config/mpv/script-modules/utf8/charclass/compiletime/stub.lua new file mode 100644 index 0000000..395d05c --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/charclass/compiletime/stub.lua @@ -0,0 +1,9 @@ +return function(utf8) + +local cl = utf8.regex.compiletime.charclass.builder + +return function(str, c, bs, ctx) + return cl.new():with_codes(c), utf8.next(str, bs) - bs +end + +end diff --git a/mac/.config/mpv/script-modules/utf8/charclass/compiletime/vanilla.lua b/mac/.config/mpv/script-modules/utf8/charclass/compiletime/vanilla.lua new file mode 100644 index 0000000..8e7f0b3 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/charclass/compiletime/vanilla.lua @@ -0,0 +1,131 @@ +return function(utf8) + +local cl = utf8:require "charclass.compiletime.builder" + +local next = utf8.util.next + +local token = 1 + +local function parse(str, c, bs, ctx) + local tttt = token + token = token + 1 + + local class + local nbs = bs + utf8.debug("cc_parse", tttt, str, c, nbs, next(str, nbs)) + + if c == '%' then + c, nbs = next(str, bs) + if c == '' then + error("malformed pattern (ends with '%')") + end + local _c = utf8.raw.lower(c) + local matched + if _c == 'a' then + matched = ('alpha') + elseif _c == 'c' then + matched = ('cntrl') + elseif _c == 'd' then + matched = ('digit') + elseif _c == 'g' then + matched = ('graph') + elseif _c == 'l' then + matched = ('lower') + elseif _c == 'p' then + matched = ('punct') + elseif _c == 's' then + matched = ('space') + elseif _c == 'u' then + matched = ('upper') + elseif _c == 'w' then + matched = ('alnum') + elseif _c == 'x' then + matched = ('xdigit') + end + + if matched then + if _c ~= c then + class = cl.new():without_classes(matched) + else + class = cl.new():with_classes(matched) + end + elseif _c == 'z' then + class = cl.new():with_codes(0) + if _c ~= c then + class = class:invert() + end + else + class = cl.new():with_codes(c) + end + elseif c == '[' and not ctx.internal then + local old_internal = ctx.internal + ctx.internal = true + class = cl.new() + local firstletter = true + while true do + local prev_nbs = nbs + c, nbs = next(str, nbs) + utf8.debug("next", tttt, c, nbs) + if c == '^' and firstletter then + class:invert() + local nc, nnbs = next(str, nbs) + if nc == ']' then + class:with_codes(nc) + nbs = nnbs + end + elseif c == ']' then + if firstletter then + class:with_codes(c) + else + utf8.debug('] on pos', tttt, nbs) + break + end + elseif c == '' then + error "malformed pattern (missing ']')" + else + local sub_class, skip = utf8.regex.compiletime.charclass.parse(str, c, nbs, ctx) + nbs = prev_nbs + skip + utf8.debug("include", tttt, bs, prev_nbs, nbs, skip) + class:include(sub_class) + end + firstletter = false + end + ctx.internal = old_internal + elseif c == '.' then + if not ctx.internal then + class = cl.new():invert() + else + class = cl.new():with_codes(c) + end + end + + return class, utf8.next(str, nbs) - bs +end + +return parse + +end + +--[[ + x: (where x is not one of the magic characters ^$()%.[]*+-?) represents the character x itself. + .: (a dot) represents all characters. + %a: represents all letters. + %c: represents all control characters. + %d: represents all digits. + %g: represents all printable characters except space. + %l: represents all lowercase letters. + %p: represents all punctuation characters. + %s: represents all space characters. + %u: represents all uppercase letters. + %w: represents all alphanumeric characters. + %x: represents all hexadecimal digits. + %x: (where x is any non-alphanumeric character) represents the character x. This is the standard way to escape the magic characters. Any non-alphanumeric character (including all punctuation characters, even the non-magical) can be preceded by a '%' when used to represent itself in a pattern. + [set]: represents the class which is the union of all characters in set. A range of characters can be specified by separating the end characters of the range, in ascending order, with a '-'. All classes %x described above can also be used as components in set. All other characters in set represent themselves. For example, [%w_] (or [_%w]) represents all alphanumeric characters plus the underscore, [0-7] represents the octal digits, and [0-7%l%-] represents the octal digits plus the lowercase letters plus the '-' character. + + You can put a closing square bracket in a set by positioning it as the first character in the set. You can put a hyphen in a set by positioning it as the first or the last character in the set. (You can also use an escape for both cases.) + + The interaction between ranges and classes is not defined. Therefore, patterns like [%a-z] or [a-%%] have no meaning. + [^set]: represents the complement of set, where set is interpreted as above. + +For all classes represented by single letters (%a, %c, etc.), the corresponding uppercase letter represents the complement of the class. For instance, %S represents all non-space characters. +]] diff --git a/mac/.config/mpv/script-modules/utf8/charclass/runtime/base.lua b/mac/.config/mpv/script-modules/utf8/charclass/runtime/base.lua new file mode 100644 index 0000000..33d7713 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/charclass/runtime/base.lua @@ -0,0 +1,184 @@ +return function(utf8) + +local class = {} +local mt = {__index = class} + +local utf8gensub = utf8.gensub + +function class.new() + return setmetatable({}, mt) +end + +function class:invert() + self.inverted = true + return self +end + +function class:with_codes(...) + local codes = {...} + self.codes = self.codes or {} + + for _, v in ipairs(codes) do + table.insert(self.codes, v) + end + + table.sort(self.codes) + return self +end + +function class:with_ranges(...) + local ranges = {...} + self.ranges = self.ranges or {} + + for _, v in ipairs(ranges) do + table.insert(self.ranges, v) + end + + return self +end + +function class:with_classes(...) + local classes = {...} + self.classes = self.classes or {} + + for _, v in ipairs(classes) do + table.insert(self.classes, v) + end + + return self +end + +function class:without_classes(...) + local not_classes = {...} + self.not_classes = self.not_classes or {} + + for _, v in ipairs(not_classes) do + table.insert(self.not_classes, v) + end + + return self +end + +function class:with_subs(...) + local subs = {...} + self.subs = self.subs or {} + + for _, v in ipairs(subs) do + table.insert(self.subs, v) + end + + return self +end + +function class:in_codes(item) + if not self.codes or #self.codes == 0 then return nil end + + local head, tail = 1, #self.codes + local mid = math.floor((head + tail)/2) + while (tail - head) > 1 do + if self.codes[mid] > item then + tail = mid + else + head = mid + end + mid = math.floor((head + tail)/2) + end + if self.codes[head] == item then + return true, head + elseif self.codes[tail] == item then + return true, tail + else + return false + end +end + +function class:in_ranges(char_code) + if not self.ranges or #self.ranges == 0 then return nil end + + for _,r in ipairs(self.ranges) do + if r[1] <= char_code and char_code <= r[2] then + return true + end + end + return false +end + +function class:in_classes(char_code) + if not self.classes or #self.classes == 0 then return nil end + + for _, class in ipairs(self.classes) do + if self:is(class, char_code) then + return true + end + end + return false +end + +function class:in_not_classes(char_code) + if not self.not_classes or #self.not_classes == 0 then return nil end + + for _, class in ipairs(self.not_classes) do + if self:is(class, char_code) then + return true + end + end + return false +end + +function class:is(class, char_code) + error("not implemented") +end + +function class:in_subs(char_code) + if not self.subs or #self.subs == 0 then return nil end + + for _, c in ipairs(self.subs) do + if not c:test(char_code) then + return false + end + end + return true +end + +function class:test(char_code) + local result = self:do_test(char_code) + -- utf8.debug('class:test', result, "'" .. (char_code and utf8.char(char_code) or 'nil') .. "'", char_code) + return result +end + +function class:do_test(char_code) + if not char_code then return false end + local in_not_classes = self:in_not_classes(char_code) + if in_not_classes then + return not not self.inverted + end + local in_codes = self:in_codes(char_code) + if in_codes then + return not self.inverted + end + local in_ranges = self:in_ranges(char_code) + if in_ranges then + return not self.inverted + end + local in_classes = self:in_classes(char_code) + if in_classes then + return not self.inverted + end + local in_subs = self:in_subs(char_code) + if in_subs then + return not self.inverted + end + if (in_codes == nil) + and (in_ranges == nil) + and (in_classes == nil) + and (in_subs == nil) + and (in_not_classes == false) then + return not self.inverted + else + return not not self.inverted + end +end + +return class + +end diff --git a/mac/.config/mpv/script-modules/utf8/charclass/runtime/dummy.lua b/mac/.config/mpv/script-modules/utf8/charclass/runtime/dummy.lua new file mode 100644 index 0000000..1faddc1 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/charclass/runtime/dummy.lua @@ -0,0 +1,41 @@ +return function(utf8) + +local base = utf8:require "charclass.runtime.base" + +local dummy = setmetatable({}, {__index = base}) +local mt = {__index = dummy} + +function dummy.new() + return setmetatable({}, mt) +end + +function dummy:with_classes(...) + local classes = {...} + for _, c in ipairs(classes) do + if c == 'alpha' then self:with_ranges({65, 90}, {97, 122}) + elseif c == 'cntrl' then self:with_ranges({0, 31}):with_codes(127) + elseif c == 'digit' then self:with_ranges({48, 57}) + elseif c == 'graph' then self:with_ranges({1, 8}, {14, 31}, {33, 132}, {134, 159}, {161, 5759}, {5761, 8191}, {8203, 8231}, {8234, 8238}, {8240, 8286}, {8288, 12287}) + elseif c == 'lower' then self:with_ranges({97, 122}) + elseif c == 'punct' then self:with_ranges({33, 47}, {58, 64}, {91, 96}, {123, 126}) + elseif c == 'space' then self:with_ranges({9, 13}):with_codes(32, 133, 160, 5760):with_ranges({8192, 8202}):with_codes(8232, 8233, 8239, 8287, 12288) + elseif c == 'upper' then self:with_ranges({65, 90}) + elseif c == 'alnum' then self:with_ranges({48, 57}, {65, 90}, {97, 122}) + elseif c == 'xdigit' then self:with_ranges({48, 57}, {65, 70}, {97, 102}) + end + end + return self +end + +function dummy:without_classes(...) + local classes = {...} + if #classes > 0 then + return self:with_subs(dummy.new():with_classes(...):invert()) + else + return self + end +end + +return dummy + +end diff --git a/mac/.config/mpv/script-modules/utf8/charclass/runtime/init.lua b/mac/.config/mpv/script-modules/utf8/charclass/runtime/init.lua new file mode 100644 index 0000000..e71d037 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/charclass/runtime/init.lua @@ -0,0 +1,22 @@ +return function(utf8) + +local provided = utf8.config.runtime_charclasses + +if provided then + if type(provided) == "table" then + return provided + elseif type(provided) == "function" then + return provided(utf8) + else + return utf8:require(provided) + end +end + +local ffi = pcall(require, "ffi") +if not ffi then + return utf8:require "charclass.runtime.dummy" +else + return utf8:require "charclass.runtime.native" +end + +end diff --git a/mac/.config/mpv/script-modules/utf8/charclass/runtime/native.lua b/mac/.config/mpv/script-modules/utf8/charclass/runtime/native.lua new file mode 100644 index 0000000..f7b7890 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/charclass/runtime/native.lua @@ -0,0 +1,47 @@ +return function(utf8) + +os.setlocale(utf8.config.locale, "ctype") + +local ffi = require("ffi") +ffi.cdef[[ + int iswalnum(int c); + int iswalpha(int c); + int iswascii(int c); + int iswblank(int c); + int iswcntrl(int c); + int iswdigit(int c); + int iswgraph(int c); + int iswlower(int c); + int iswprint(int c); + int iswpunct(int c); + int iswspace(int c); + int iswupper(int c); + int iswxdigit(int c); +]] + +local base = utf8:require "charclass.runtime.base" + +local native = setmetatable({}, {__index = base}) +local mt = {__index = native} + +function native.new() + return setmetatable({}, mt) +end + +function native:is(class, char_code) + if class == 'alpha' then return ffi.C.iswalpha(char_code) ~= 0 + elseif class == 'cntrl' then return ffi.C.iswcntrl(char_code) ~= 0 + elseif class == 'digit' then return ffi.C.iswdigit(char_code) ~= 0 + elseif class == 'graph' then return ffi.C.iswgraph(char_code) ~= 0 + elseif class == 'lower' then return ffi.C.iswlower(char_code) ~= 0 + elseif class == 'punct' then return ffi.C.iswpunct(char_code) ~= 0 + elseif class == 'space' then return ffi.C.iswspace(char_code) ~= 0 + elseif class == 'upper' then return ffi.C.iswupper(char_code) ~= 0 + elseif class == 'alnum' then return ffi.C.iswalnum(char_code) ~= 0 + elseif class == 'xdigit' then return ffi.C.iswxdigit(char_code) ~= 0 + end +end + +return native + +end diff --git a/mac/.config/mpv/script-modules/utf8/context/compiletime.lua b/mac/.config/mpv/script-modules/utf8/context/compiletime.lua new file mode 100644 index 0000000..621204d --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/context/compiletime.lua @@ -0,0 +1,18 @@ +return function(utf8) + +local begins = utf8.config.begins +local ends = utf8.config.ends + +return { + new = function() + return { + prev_class = nil, + begins = begins[1].default(), + ends = ends[1].default(), + funcs = {}, + internal = false, -- hack for ranges, flags if parser is in [] + } + end +} + +end diff --git a/mac/.config/mpv/script-modules/utf8/context/runtime.lua b/mac/.config/mpv/script-modules/utf8/context/runtime.lua new file mode 100644 index 0000000..6fb024c --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/context/runtime.lua @@ -0,0 +1,112 @@ +return function(utf8) + +local utf8unicode = utf8.unicode +local utf8sub = utf8.sub +local sub = utf8.raw.sub +local byte = utf8.raw.byte +local utf8len = utf8.len +local utf8next = utf8.next +local rawgsub = utf8.raw.gsub +local utf8offset = utf8.offset +local utf8char = utf8.char + +local util = utf8.util + +local ctx = {} +local mt = { + __index = ctx, + __tostring = function(self) + return rawgsub([[str: '${str}', char: ${pos} '${char}', func: ${func_pos}]], "${(.-)}", { + str = self.str, + pos = self.pos, + char = self:get_char(), + func_pos = self.func_pos, + }) + end +} + +function ctx.new(obj) + obj = obj or {} + local res = setmetatable({ + pos = obj.pos or 1, + byte_pos = obj.pos or 1, + str = assert(obj.str, "str is required"), + len = obj.len, + rawlen = obj.rawlen, + bytes = obj.bytes, + offsets = obj.offsets, + starts = obj.starts or nil, + functions = obj.functions or {}, + func_pos = obj.func_pos or 1, + ends = obj.ends or nil, + result = obj.result and util.copy(obj.result) or {}, + captures = obj.captures and util.copy(obj.captures, true) or {active = {}}, + modified = false, + }, mt) + if not res.bytes then + local str = res.str + local l = #str + local bytes = utf8.config.int32array(l) + local offsets = utf8.config.int32array(l) + local c, bs, i = nil, 1, 1 + while bs <= l do + bytes[i] = utf8unicode(str, bs, bs) + offsets[i] = bs + bs = utf8.next(str, bs) + i = i + 1 + end + res.bytes = bytes + res.offsets = offsets + res.byte_pos = res.pos + res.len = i + res.rawlen = l + end + + return res +end + +function ctx:clone() + return self:new() +end + +function ctx:next_char() + self.pos = self.pos + 1 + self.byte_pos = self.pos +end + +function ctx:prev_char() + self.pos = self.pos - 1 + self.byte_pos = self.pos +end + +function ctx:get_char() + if self.len <= self.pos then return "" end + return utf8char(self.bytes[self.pos]) +end + +function ctx:get_charcode() + if self.len <= self.pos then return nil end + return self.bytes[self.pos] +end + +function ctx:next_function() + self.func_pos = self.func_pos + 1 +end + +function ctx:get_function() + return self.functions[self.func_pos] +end + +function ctx:done() + utf8.debug('done', self) + coroutine.yield(self, self.result, self.captures) +end + +function ctx:terminate() + utf8.debug('terminate', self) + coroutine.yield(nil) +end + +return ctx + +end diff --git a/mac/.config/mpv/script-modules/utf8/ends/compiletime/parser.lua b/mac/.config/mpv/script-modules/utf8/ends/compiletime/parser.lua new file mode 100644 index 0000000..f966e94 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/ends/compiletime/parser.lua @@ -0,0 +1,17 @@ +return function(utf8) + +utf8.config.ends = utf8.config.ends or { + utf8:require "ends.compiletime.vanilla" +} + +function utf8.regex.compiletime.ends.parse(regex, c, bs, ctx) + for _, m in ipairs(utf8.config.ends) do + local functions, move = m.parse(regex, c, bs, ctx) + utf8.debug("ends", _, c, bs, move, functions) + if functions then + return functions, move + end + end +end + +end diff --git a/mac/.config/mpv/script-modules/utf8/ends/compiletime/vanilla.lua b/mac/.config/mpv/script-modules/utf8/ends/compiletime/vanilla.lua new file mode 100644 index 0000000..5fe7eb3 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/ends/compiletime/vanilla.lua @@ -0,0 +1,46 @@ +return function(utf8) + +local matchers = { + any = function() + return [[ + add(function(ctx) -- any + ctx.result.finish = ctx.pos - 1 + ctx:done() + end) +]] + end, + toend = function(ctx) + return [[ + add(function(ctx) -- toend + ctx.result.finish = ctx.pos - 1 + ctx.modified = true + if ctx.pos == utf8len(ctx.str) + 1 then ctx:done() end + end) +]] + end, +} + +local len = utf8.raw.len + +local function default() + return matchers.any() +end + +local function parse(regex, c, bs, ctx) + local functions + local skip = 0 + + if bs == len(regex) and c == '$' then + functions = matchers.toend() + skip = 1 + end + + return functions, skip +end + +return { + parse = parse, + default = default, +} + +end diff --git a/mac/.config/mpv/script-modules/utf8/functions/lua53.lua b/mac/.config/mpv/script-modules/utf8/functions/lua53.lua new file mode 100644 index 0000000..26e6f23 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/functions/lua53.lua @@ -0,0 +1,152 @@ +return function(utf8) + +local utf8sub = utf8.sub +local utf8gensub = utf8.gensub +local unpack = utf8.config.unpack +local generate_matcher_function = utf8:require 'regex_parser' + +local +function get_matcher_function(regex, plain) + local res + if utf8.config.cache then + res = utf8.config.cache[plain and "plain" or "regex"][regex] + end + if res then + return res + end + res = generate_matcher_function(regex, plain) + if utf8.config.cache then + utf8.config.cache[plain and "plain" or "regex"][regex] = res + end + return res +end + +local function utf8find(str, regex, init, plain) + local func = get_matcher_function(regex, plain) + init = ((init or 1) < 0) and (utf8.len(str) + init + 1) or init + local ctx, result, captures = func(str, init, utf8) + if not ctx then return nil end + + utf8.debug('ctx:', ctx) + utf8.debug('result:', result) + utf8.debug('captures:', captures) + + return result.start, result.finish, unpack(captures) +end + +local function utf8match(str, regex, init) + local func = get_matcher_function(regex, false) + init = ((init or 1) < 0) and (utf8.len(str) + init + 1) or init + local ctx, result, captures = func(str, init, utf8) + if not ctx then return nil end + + utf8.debug('ctx:', ctx) + utf8.debug('result:', result) + utf8.debug('captures:', captures) + + if #captures > 0 then return unpack(captures) end + + return utf8sub(str, result.start, result.finish) +end + +local function utf8gmatch(str, regex) + regex = (utf8sub(regex,1,1) ~= '^') and regex or '%' .. regex + local func = get_matcher_function(regex, false) + local ctx, result, captures + local continue_pos = 1 + + return function() + ctx, result, captures = func(str, continue_pos, utf8) + + if not ctx then return nil end + + utf8.debug('ctx:', ctx) + utf8.debug('result:', result) + utf8.debug('captures:', captures) + + continue_pos = math.max(result.finish + 1, result.start + 1) + if #captures > 0 then + return unpack(captures) + else + return utf8sub(str, result.start, result.finish) + end + end +end + +local function replace(repl, args) + local ret = '' + if type(repl) == 'string' then + local ignore = false + local num + for _, c in utf8gensub(repl) do + if not ignore then + if c == '%' then + ignore = true + else + ret = ret .. c + end + else + num = tonumber(c) + if num then + ret = ret .. assert(args[num], "invalid capture index %" .. c) + else + ret = ret .. c + end + ignore = false + end + end + elseif type(repl) == 'table' then + ret = repl[args[1]] or args[0] + elseif type(repl) == 'function' then + ret = repl(unpack(args, 1)) or args[0] + end + return ret +end + +local function utf8gsub(str, regex, repl, limit) + limit = limit or -1 + local subbed = '' + local prev_sub_finish = 1 + + local func = get_matcher_function(regex, false) + local ctx, result, captures + local continue_pos = 1 + + local n = 0 + while limit ~= n do + ctx, result, captures = func(str, continue_pos, utf8) + if not ctx then break end + + utf8.debug('ctx:', ctx) + utf8.debug('result:', result) + utf8.debug('result:', utf8sub(str, result.start, result.finish)) + utf8.debug('captures:', captures) + + continue_pos = math.max(result.finish + 1, result.start + 1) + local args + if #captures > 0 then + args = {[0] = utf8sub(str, result.start, result.finish), unpack(captures)} + else + args = {[0] = utf8sub(str, result.start, result.finish)} + args[1] = args[0] + end + + subbed = subbed .. utf8sub(str, prev_sub_finish, result.start - 1) + subbed = subbed .. replace(repl, args) + prev_sub_finish = result.finish + 1 + n = n + 1 + + end + + return subbed .. utf8sub(str, prev_sub_finish), n +end + +-- attaching high-level functions +utf8.find = utf8find +utf8.match = utf8match +utf8.gmatch = utf8gmatch +utf8.gsub = utf8gsub + +return utf8 + +end diff --git a/mac/.config/mpv/script-modules/utf8/init.lua b/mac/.config/mpv/script-modules/utf8/init.lua new file mode 100644 index 0000000..d2f72a4 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/init.lua @@ -0,0 +1,71 @@ +local module_path = ... +module_path = module_path:match("^(.-)init$") or (module_path .. '.') + +local ffi_enabled, ffi = pcall(require, 'ffi') + +local utf8 = { + config = {}, + default = { + debug = nil, + logger = io.write, + loadstring = (loadstring or load), + unpack = (unpack or table.unpack), + cache = { + regex = setmetatable({},{ + __mode = 'kv' + }), + plain = setmetatable({},{ + __mode = 'kv' + }), + }, + locale = nil, + int32array = function(size) + if ffi_enabled then + return ffi.new("uint32_t[?]", size + 1) + else + return {} + end + end, + conversion = { + uc_lc = nil, + lc_uc = nil + } + }, + regex = { + compiletime = { + charclass = {}, + begins = {}, + ends = {}, + modifier = {}, + } + }, + util = {}, +} + +function utf8:require(name) + local full_module_path = module_path .. name + if package.loaded[full_module_path] then + return package.loaded[full_module_path] + end + + local mod = require(full_module_path) + if type(mod) == 'function' then + mod = mod(self) + package.loaded[full_module_path] = mod + end + return mod +end + +function utf8:init() + for k, v in pairs(self.default) do + self.config[k] = self.config[k] or v + end + + self:require "util" + self:require "primitives.init" + self:require "functions.lua53" + + return self +end + +return utf8 diff --git a/mac/.config/mpv/script-modules/utf8/modifier/compiletime/frontier.lua b/mac/.config/mpv/script-modules/utf8/modifier/compiletime/frontier.lua new file mode 100644 index 0000000..cf0f4ab --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/modifier/compiletime/frontier.lua @@ -0,0 +1,50 @@ +return function(utf8) + +local matchers = { + frontier = function(class, name) + local class_name = 'class' .. name + return [[ + local ]] .. class_name .. [[ = ]] .. class .. [[ + + add(function(ctx) -- frontier + ctx:prev_char() + local prev_charcode = ctx:get_charcode() or 0 + ctx:next_char() + local charcode = ctx:get_charcode() or 0 + -- debug("frontier pos", ctx.pos, "prev_charcode", prev_charcode, "charcode", charcode) + if ]] .. class_name .. [[:test(prev_charcode) then return end + if ]] .. class_name .. [[:test(charcode) then + ctx:next_function() + return ctx:get_function()(ctx) + end + end) +]] + end, + simple = utf8:require("modifier.compiletime.simple").simple, +} + +local function parse(regex, c, bs, ctx) + local functions, nbs, class + + if c == '%' then + if utf8.raw.sub(regex, bs + 1, bs + 1) ~= 'f' then return end + if utf8.raw.sub(regex, bs + 2, bs + 2) ~= '[' then error("missing '[' after '%f' in pattern") end + + functions = {} + if ctx.prev_class then + table.insert(functions, matchers.simple(ctx.prev_class, tostring(bs))) + ctx.prev_class = nil + end + class, nbs = utf8.regex.compiletime.charclass.parse(regex, '[', bs + 2, ctx) + nbs = nbs + 2 + table.insert(functions, matchers.frontier(class:build(), tostring(bs))) + end + + return functions, nbs +end + +return { + parse = parse, +} + +end diff --git a/mac/.config/mpv/script-modules/utf8/modifier/compiletime/parser.lua b/mac/.config/mpv/script-modules/utf8/modifier/compiletime/parser.lua new file mode 100644 index 0000000..9149f71 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/modifier/compiletime/parser.lua @@ -0,0 +1,20 @@ +return function(utf8) + +utf8.config.modifier = utf8.config.modifier or { + utf8:require "modifier.compiletime.vanilla", + utf8:require "modifier.compiletime.frontier", + utf8:require "modifier.compiletime.stub", +} + +function utf8.regex.compiletime.modifier.parse(regex, c, bs, ctx) + for _, m in ipairs(utf8.config.modifier) do + local functions, move = m.parse(regex, c, bs, ctx) + utf8.debug("mod", _, c, bs, move, functions and utf8.config.unpack(functions)) + if functions then + ctx.prev_class = nil + return functions, move + end + end +end + +end diff --git a/mac/.config/mpv/script-modules/utf8/modifier/compiletime/simple.lua b/mac/.config/mpv/script-modules/utf8/modifier/compiletime/simple.lua new file mode 100644 index 0000000..1a28b85 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/modifier/compiletime/simple.lua @@ -0,0 +1,23 @@ +return function(utf8) + +local matchers = { + simple = function(class, name) + local class_name = 'class' .. name + return [[ + local ]] .. class_name .. [[ = ]] .. class .. [[ + + add(function(ctx) -- simple + -- debug(ctx, 'simple', ']] .. class_name .. [[') + if ]] .. class_name .. [[:test(ctx:get_charcode()) then + ctx:next_char() + ctx:next_function() + return ctx:get_function()(ctx) + end + end) +]] + end, +} + +return matchers + +end diff --git a/mac/.config/mpv/script-modules/utf8/modifier/compiletime/stub.lua b/mac/.config/mpv/script-modules/utf8/modifier/compiletime/stub.lua new file mode 100644 index 0000000..e1289a6 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/modifier/compiletime/stub.lua @@ -0,0 +1,28 @@ +return function(utf8) + +local matchers = utf8:require("modifier.compiletime.simple") + +local function parse(regex, c, bs, ctx) + local functions + + if ctx.prev_class then + functions = { matchers.simple(ctx.prev_class, tostring(bs)) } + ctx.prev_class = nil + end + + return functions, 0 +end + +local function check(ctx) + if ctx.prev_class then + table.insert(ctx.funcs, matchers.simple(ctx.prev_class, tostring(ctx.pos))) + ctx.prev_class = nil + end +end + +return { + parse = parse, + check = check, +} + +end diff --git a/mac/.config/mpv/script-modules/utf8/modifier/compiletime/vanilla.lua b/mac/.config/mpv/script-modules/utf8/modifier/compiletime/vanilla.lua new file mode 100644 index 0000000..96e79d2 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/modifier/compiletime/vanilla.lua @@ -0,0 +1,270 @@ +return function(utf8) + +local utf8unicode = utf8.byte +local sub = utf8.raw.sub + +local matchers = { + star = function(class, name) + local class_name = 'class' .. name + return [[ + local ]] .. class_name .. [[ = ]] .. class .. [[ + + add(function(ctx) -- star + -- debug(ctx, 'star', ']] .. class_name .. [[') + local clone = ctx:clone() + while ]] .. class_name .. [[:test(clone:get_charcode()) do + clone:next_char() + end + local pos = clone.pos + while pos >= ctx.pos do + clone.pos = pos + clone.func_pos = ctx.func_pos + clone:next_function() + clone:get_function()(clone) + if clone.modified then + clone = ctx:clone() + end + pos = pos - 1 + end + end) +]] + end, + minus = function(class, name) + local class_name = 'class' .. name + return [[ + local ]] .. class_name .. [[ = ]] .. class .. [[ + + add(function(ctx) -- minus + -- debug(ctx, 'minus', ']] .. class_name .. [[') + + local clone = ctx:clone() + local pos + repeat + pos = clone.pos + clone:next_function() + clone:get_function()(clone) + if clone.modified then + clone = ctx:clone() + clone.pos = pos + else + clone.pos = pos + clone.func_pos = ctx.func_pos + end + local match = ]] .. class_name .. [[:test(clone:get_charcode()) + clone:next_char() + until not match + end) +]] + end, + question = function(class, name) + local class_name = 'class' .. name + return [[ + local ]] .. class_name .. [[ = ]] .. class .. [[ + + add(function(ctx) -- question + -- debug(ctx, 'question', ']] .. class_name .. [[') + local saved = ctx:clone() + if ]] .. class_name .. [[:test(ctx:get_charcode()) then + ctx:next_char() + ctx:next_function() + ctx:get_function()(ctx) + end + ctx = saved + ctx:next_function() + return ctx:get_function()(ctx) + end) +]] + end, + capture_start = function(number) + return [[ + add(function(ctx) + ctx.modified = true + -- debug(ctx, 'capture_start', ']] .. tostring(number) .. [[') + table.insert(ctx.captures.active, { id = ]] .. tostring(number) .. [[, start = ctx.pos }) + ctx:next_function() + return ctx:get_function()(ctx) + end) +]] + end, + capture_finish = function(number) + return [[ + add(function(ctx) + ctx.modified = true + -- debug(ctx, 'capture_finish', ']] .. tostring(number) .. [[') + local cap = table.remove(ctx.captures.active) + cap.finish = ctx.pos + local b, e = ctx.offsets[cap.start], ctx.offsets[cap.finish] + if cap.start < 1 then + b = 1 + elseif cap.start >= ctx.len then + b = ctx.rawlen + 1 + end + if cap.finish < 1 then + e = 1 + elseif cap.finish >= ctx.len then + e = ctx.rawlen + 1 + end + ctx.captures[cap.id] = rawsub(ctx.str, b, e - 1) + -- debug('capture#' .. tostring(cap.id), '[' .. tostring(cap.start).. ',' .. tostring(cap.finish) .. ']' , 'is', ctx.captures[cap.id]) + ctx:next_function() + return ctx:get_function()(ctx) + end) +]] + end, + capture_position = function(number) + return [[ + add(function(ctx) + ctx.modified = true + -- debug(ctx, 'capture_position', ']] .. tostring(number) .. [[') + ctx.captures[ ]] .. tostring(number) .. [[ ] = ctx.pos + ctx:next_function() + return ctx:get_function()(ctx) + end) +]] + end, + capture = function(number) + return [[ + add(function(ctx) + -- debug(ctx, 'capture', ']] .. tostring(number) .. [[') + local cap = ctx.captures[ ]] .. tostring(number) .. [[ ] + local len = utf8len(cap) + local check = utf8sub(ctx.str, ctx.pos, ctx.pos + len - 1) + -- debug("capture check:", cap, check) + if cap == check then + ctx.pos = ctx.pos + len + ctx:next_function() + return ctx:get_function()(ctx) + end + end) +]] + end, + balancer = function(pair, name) + local class_name = 'class' .. name + return [[ + + add(function(ctx) -- balancer + local d, b = ]] .. tostring(utf8unicode(pair[1])) .. [[, ]] .. tostring(utf8unicode(pair[2])) .. [[ + if ctx:get_charcode() ~= d then return end + local balance = 0 + repeat + local c = ctx:get_charcode() + if c == nil then return end + + if c == d then + balance = balance + 1 + elseif c == b then + balance = balance - 1 + end + -- debug("balancer: balance=", balance, ", d=", d, ", b=", b, ", charcode=", ctx:get_charcode()) + ctx:next_char() + until balance == 0 or (balance == 2 and d == b) + ctx:next_function() + return ctx:get_function()(ctx) + end) +]] + end, + simple = utf8:require("modifier.compiletime.simple").simple, +} + +local next = utf8.util.next + +local function parse(regex, c, bs, ctx) + local functions, nbs = nil, bs + if c == '%' then + c, nbs = next(regex, bs) + utf8.debug("next", c, bs) + if c == '' then + error("malformed pattern (ends with '%')") + end + if utf8.raw.find('123456789', c, 1, true) then + functions = { matchers.capture(tonumber(c)) } + nbs = utf8.next(regex, nbs) + elseif c == 'b' then + local d, b + d, nbs = next(regex, nbs) + b, nbs = next(regex, nbs) + assert(d ~= '' and b ~= '', "unbalanced pattern") + functions = { matchers.balancer({d, b}, tostring(bs)) } + nbs = utf8.next(regex, nbs) + end + + if functions and ctx.prev_class then + table.insert(functions, 1, matchers.simple(ctx.prev_class, tostring(bs))) + end + elseif c == '*' and ctx.prev_class then + functions = { + matchers.star( + ctx.prev_class, + tostring(bs) + ) + } + nbs = bs + 1 + elseif c == '+' and ctx.prev_class then + functions = { + matchers.simple( + ctx.prev_class, + tostring(bs) + ), + matchers.star( + ctx.prev_class, + tostring(bs) + ) + } + nbs = bs + 1 + elseif c == '-' and ctx.prev_class then + functions = { + matchers.minus( + ctx.prev_class, + tostring(bs) + ) + } + nbs = bs + 1 + elseif c == '?' and ctx.prev_class then + functions = { + matchers.question( + ctx.prev_class, + tostring(bs) + ) + } + nbs = bs + 1 + elseif c == '(' then + ctx.capture = ctx.capture or {balance = 0, id = 0} + ctx.capture.id = ctx.capture.id + 1 + local nc = next(regex, nbs) + if nc == ')' then + functions = {matchers.capture_position(ctx.capture.id)} + nbs = bs + 2 + else + ctx.capture.balance = ctx.capture.balance + 1 + functions = {matchers.capture_start(ctx.capture.id)} + nbs = bs + 1 + end + if ctx.prev_class then + table.insert(functions, 1, matchers.simple(ctx.prev_class, tostring(bs))) + end + elseif c == ')' then + ctx.capture = ctx.capture or {balance = 0, id = 0} + functions = { matchers.capture_finish(ctx.capture.id) } + + ctx.capture.balance = ctx.capture.balance - 1 + assert(ctx.capture.balance >= 0, 'invalid capture: "(" missing') + + if ctx.prev_class then + table.insert(functions, 1, matchers.simple(ctx.prev_class, tostring(bs))) + end + nbs = bs + 1 + end + + return functions, nbs - bs +end + +local function check(ctx) + if ctx.capture then assert(ctx.capture.balance == 0, 'invalid capture: ")" missing') end +end + +return { + parse = parse, + check = check, +} + +end diff --git a/mac/.config/mpv/script-modules/utf8/primitives/dummy.lua b/mac/.config/mpv/script-modules/utf8/primitives/dummy.lua new file mode 100644 index 0000000..a4665f5 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/primitives/dummy.lua @@ -0,0 +1,555 @@ +-- $Id: utf8.lua 179 2009-04-03 18:10:03Z pasta $ +-- +-- Provides UTF-8 aware string functions implemented in pure lua: +-- * utf8len(s) +-- * utf8sub(s, i, j) +-- * utf8reverse(s) +-- * utf8char(unicode) +-- * utf8unicode(s, i, j) +-- * utf8gensub(s, sub_len) +-- * utf8find(str, regex, init, plain) +-- * utf8match(str, regex, init) +-- * utf8gmatch(str, regex, all) +-- * utf8gsub(str, regex, repl, limit) +-- +-- All functions behave as their non UTF-8 aware counterparts with the exception +-- that UTF-8 characters are used instead of bytes for all units. + +--[[ +Copyright (c) 2006-2007, Kyle Smith +All rights reserved. + +Contributors: + Alimov Stepan + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the author nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +--]] + +-- ABNF from RFC 3629 +-- +-- UTF8-octets = *( UTF8-char ) +-- UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4 +-- UTF8-1 = %x00-7F +-- UTF8-2 = %xC2-DF UTF8-tail +-- UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / +-- %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) +-- UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / +-- %xF4 %x80-8F 2( UTF8-tail ) +-- UTF8-tail = %x80-BF +-- +return function(utf8) + +local byte = string.byte +local char = string.char +local dump = string.dump +local find = string.find +local format = string.format +local len = string.len +local lower = string.lower +local rep = string.rep +local sub = string.sub +local upper = string.upper + +local utf8charpattern = '[%z\1-\127\194-\244][\128-\191]*' + +local function utf8symbollen(byte) + return not byte and 0 or (byte < 0x80 and 1) or (byte >= 0xF0 and 4) or (byte >= 0xE0 and 3) or (byte >= 0xC0 and 2) or 1 +end + +local head_table = utf8.config.int32array(256) +for i = 0, 255 do + head_table[i] = utf8symbollen(i) +end +head_table[256] = 0 + +local function utf8charbytes(str, bs) + return head_table[byte(str, bs) or 256] +end + +local function utf8next(str, bs) + return bs + utf8charbytes(str, bs) +end + +-- returns the number of characters in a UTF-8 string +local function utf8len (str) + local bs = 1 + local bytes = len(str) + local length = 0 + + while bs <= bytes do + length = length + 1 + bs = utf8next(str, bs) + end + + return length +end + +-- functions identically to string.sub except that i and j are UTF-8 characters +-- instead of bytes +local function utf8sub (s, i, j) + -- argument defaults + j = j or -1 + + local bs = 1 + local bytes = len(s) + local length = 0 + + local l = (i >= 0 and j >= 0) or utf8len(s) + i = (i >= 0) and i or l + i + 1 + j = (j >= 0) and j or l + j + 1 + + if i > j then + return "" + end + + local start, finish = 1, bytes + + while bs <= bytes do + length = length + 1 + + if length == i then + start = bs + end + + bs = utf8next(s, bs) + + if length == j then + finish = bs - 1 + break + end + end + + if i > length then start = bytes + 1 end + if j < 1 then finish = 0 end + + return sub(s, start, finish) +end + +-- http://en.wikipedia.org/wiki/Utf8 +-- http://developer.coronalabs.com/code/utf-8-conversion-utility +local function utf8char(...) + local codes = {...} + local result = {} + + for _, unicode in ipairs(codes) do + + if unicode <= 0x7F then + result[#result + 1] = unicode + elseif unicode <= 0x7FF then + local b0 = 0xC0 + math.floor(unicode / 0x40); + local b1 = 0x80 + (unicode % 0x40); + result[#result + 1] = b0 + result[#result + 1] = b1 + elseif unicode <= 0xFFFF then + local b0 = 0xE0 + math.floor(unicode / 0x1000); + local b1 = 0x80 + (math.floor(unicode / 0x40) % 0x40); + local b2 = 0x80 + (unicode % 0x40); + result[#result + 1] = b0 + result[#result + 1] = b1 + result[#result + 1] = b2 + elseif unicode <= 0x10FFFF then + local code = unicode + local b3= 0x80 + (code % 0x40); + code = math.floor(code / 0x40) + local b2= 0x80 + (code % 0x40); + code = math.floor(code / 0x40) + local b1= 0x80 + (code % 0x40); + code = math.floor(code / 0x40) + local b0= 0xF0 + code; + + result[#result + 1] = b0 + result[#result + 1] = b1 + result[#result + 1] = b2 + result[#result + 1] = b3 + else + error 'Unicode cannot be greater than U+10FFFF!' + end + + end + + return char(utf8.config.unpack(result)) +end + + +local shift_6 = 2^6 +local shift_12 = 2^12 +local shift_18 = 2^18 + +local utf8unicode +utf8unicode = function(str, ibs, jbs) + if ibs > jbs then return end + + local ch,bytes + + bytes = utf8charbytes(str, ibs) + if bytes == 0 then return end + + local unicode + + if bytes == 1 then unicode = byte(str, ibs, ibs) end + if bytes == 2 then + local byte0,byte1 = byte(str, ibs, ibs + 1) + if byte0 and byte1 then + local code0,code1 = byte0-0xC0,byte1-0x80 + unicode = code0*shift_6 + code1 + else + unicode = byte0 + end + end + if bytes == 3 then + local byte0,byte1,byte2 = byte(str, ibs, ibs + 2) + if byte0 and byte1 and byte2 then + local code0,code1,code2 = byte0-0xE0,byte1-0x80,byte2-0x80 + unicode = code0*shift_12 + code1*shift_6 + code2 + else + unicode = byte0 + end + end + if bytes == 4 then + local byte0,byte1,byte2,byte3 = byte(str, ibs, ibs + 3) + if byte0 and byte1 and byte2 and byte3 then + local code0,code1,code2,code3 = byte0-0xF0,byte1-0x80,byte2-0x80,byte3-0x80 + unicode = code0*shift_18 + code1*shift_12 + code2*shift_6 + code3 + else + unicode = byte0 + end + end + + if ibs == jbs then + return unicode + else + return unicode,utf8unicode(str, ibs+bytes, jbs) + end +end + +local function utf8byte(str, i, j) + if #str == 0 then return end + + local ibs, jbs + + if i or j then + i = i or 1 + j = j or i + + local str_len = utf8len(str) + i = i < 0 and str_len + i + 1 or i + j = j < 0 and str_len + j + 1 or j + j = j > str_len and str_len or j + + if i > j then return end + + for p = 1, i - 1 do + ibs = utf8next(str, ibs or 1) + end + + if i == j then + jbs = ibs + else + for p = 1, j - 1 do + jbs = utf8next(str, jbs or 1) + end + end + + if not ibs or not jbs then + return nil + end + else + ibs, jbs = 1, 1 + end + + return utf8unicode(str, ibs, jbs) +end + +local function utf8gensub(str, sub_len) + sub_len = sub_len or 1 + local max_len = #str + return function(skip_ptr, bs) + bs = (bs and bs or 1) + (skip_ptr and (skip_ptr[1] or 0) or 0) + + local nbs = bs + if bs > max_len then return nil end + for i = 1, sub_len do + nbs = utf8next(str, nbs) + end + + return nbs, sub(str, bs, nbs - 1), bs + end +end + +local function utf8reverse (s) + local result = '' + for _, w in utf8gensub(s) do result = w .. result end + return result +end + +local function utf8validator(str, bs) + bs = bs or 1 + + if type(str) ~= "string" then + error("bad argument #1 to 'utf8charbytes' (string expected, got ".. type(str).. ")") + end + if type(bs) ~= "number" then + error("bad argument #2 to 'utf8charbytes' (number expected, got ".. type(bs).. ")") + end + + local c = byte(str, bs) + if not c then return end + + -- determine bytes needed for character, based on RFC 3629 + + -- UTF8-1 + if c >= 0 and c <= 127 then + return bs + 1 + elseif c >= 128 and c <= 193 then + return bs + 1, bs, 1, c + -- UTF8-2 + elseif c >= 194 and c <= 223 then + local c2 = byte(str, bs + 1) + if not c2 or c2 < 128 or c2 > 191 then + return bs + 2, bs, 2, c2 + end + + return bs + 2 + -- UTF8-3 + elseif c >= 224 and c <= 239 then + local c2 = byte(str, bs + 1) + + if not c2 then + return bs + 2, bs, 2, c2 + end + + -- validate byte 2 + if c == 224 and (c2 < 160 or c2 > 191) then + return bs + 2, bs, 2, c2 + elseif c == 237 and (c2 < 128 or c2 > 159) then + return bs + 2, bs, 2, c2 + elseif c2 < 128 or c2 > 191 then + return bs + 2, bs, 2, c2 + end + + local c3 = byte(str, bs + 2) + if not c3 or c3 < 128 or c3 > 191 then + return bs + 3, bs, 3, c3 + end + + return bs + 3 + -- UTF8-4 + elseif c >= 240 and c <= 244 then + local c2 = byte(str, bs + 1) + + if not c2 then + return bs + 2, bs, 2, c2 + end + + -- validate byte 2 + if c == 240 and (c2 < 144 or c2 > 191) then + return bs + 2, bs, 2, c2 + elseif c == 244 and (c2 < 128 or c2 > 143) then + return bs + 2, bs, 2, c2 + elseif c2 < 128 or c2 > 191 then + return bs + 2, bs, 2, c2 + end + + local c3 = byte(str, bs + 2) + if not c3 or c3 < 128 or c3 > 191 then + return bs + 3, bs, 3, c3 + end + + local c4 = byte(str, bs + 3) + if not c4 or c4 < 128 or c4 > 191 then + return bs + 4, bs, 4, c4 + end + + return bs + 4 + else -- c > 245 + return bs + 1, bs, 1, c + end +end + +local function utf8validate(str, byte_pos) + local result = {} + for nbs, bs, part, code in utf8validator, str, byte_pos do + if bs then + result[#result + 1] = { pos = bs, part = part, code = code } + end + end + return #result == 0, result +end + +local function utf8codes(str) + local max_len = #str + local bs = 1 + return function(skip_ptr) + if bs > max_len then return nil end + local pbs = bs + bs = utf8next(str, pbs) + + return pbs, utf8unicode(str, pbs, pbs), pbs + end +end + + +--[[-- +differs from Lua 5.3 utf8.offset in accepting any byte positions (not only head byte) for all n values + +h - head, c - continuation, t - tail +hhhccthccthccthcthhh + ^ start byte pos +searching current charracter head by moving backwards +hhhccthccthccthcthhh + ^ head + +n == 0: current position +n > 0: n jumps forward +n < 0: n more scans backwards +--]]-- +local function utf8offset(str, n, bs) + local l = #str + if not bs then + if n < 0 then + bs = l + 1 + else + bs = 1 + end + end + if bs <= 0 or bs > l + 1 then + error("bad argument #3 to 'offset' (position out of range)") + end + + if n == 0 then + if bs == l + 1 then + return bs + end + while true do + local b = byte(str, bs) + if (0 < b and b < 127) + or (194 < b and b < 244) then + return bs + end + bs = bs - 1 + if bs < 1 then + return + end + end + elseif n < 0 then + bs = bs - 1 + repeat + if bs < 1 then + return + end + + local b = byte(str, bs) + if (0 < b and b < 127) + or (194 < b and b < 244) then + n = n + 1 + end + bs = bs - 1 + until n == 0 + return bs + 1 + else + while true do + if bs > l then + return + end + + local b = byte(str, bs) + if (0 < b and b < 127) + or (194 < b and b < 244) then + n = n - 1 + for i = 1, n do + if bs > l then + return + end + bs = utf8next(str, bs) + end + return bs + end + bs = bs - 1 + end + end + +end + +local function utf8replace (s, mapping) + if type(s) ~= "string" then + error("bad argument #1 to 'utf8replace' (string expected, got ".. type(s).. ")") + end + if type(mapping) ~= "table" then + error("bad argument #2 to 'utf8replace' (table expected, got ".. type(mapping).. ")") + end + local result = utf8.raw.gsub( s, utf8charpattern, mapping ) + return result +end + +local function utf8upper (s) + return utf8replace(s, utf8.config.conversion.lc_uc) +end + +if utf8.config.conversion.lc_uc then + upper = utf8upper +end + +local function utf8lower (s) + return utf8replace(s, utf8.config.conversion.uc_lc) +end + +if utf8.config.conversion.uc_lc then + lower = utf8lower +end + +utf8.len = utf8len +utf8.sub = utf8sub +utf8.reverse = utf8reverse +utf8.char = utf8char +utf8.unicode = utf8unicode +utf8.byte = utf8byte +utf8.next = utf8next +utf8.gensub = utf8gensub +utf8.validator = utf8validator +utf8.validate = utf8validate +utf8.dump = dump +utf8.format = format +utf8.lower = lower +utf8.upper = upper +utf8.rep = rep +utf8.raw = {} +for k,v in pairs(string) do + utf8.raw[k] = v +end + +utf8.charpattern = utf8charpattern +utf8.offset = utf8offset +if _VERSION == 'Lua 5.3' then + local utf8_53 = require "utf8" + utf8.codes = utf8_53.codes + utf8.codepoint = utf8_53.codepoint + utf8.len53 = utf8_53.len +else + utf8.codes = utf8codes + utf8.codepoint = utf8unicode +end + +return utf8 + +end diff --git a/mac/.config/mpv/script-modules/utf8/primitives/init.lua b/mac/.config/mpv/script-modules/utf8/primitives/init.lua new file mode 100644 index 0000000..df28ef3 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/primitives/init.lua @@ -0,0 +1,23 @@ +return function(utf8) + +local provided = utf8.config.primitives + +if provided then + if type(provided) == "table" then + return provided + elseif type(provided) == "function" then + return provided(utf8) + else + return utf8:require(provided) + end +end + +if pcall(require, "tarantool") then + return utf8:require "primitives.tarantool" +elseif pcall(require, "ffi") then + return utf8:require "primitives.native" +else + return utf8:require "primitives.dummy" +end + +end diff --git a/mac/.config/mpv/script-modules/utf8/primitives/native.lua b/mac/.config/mpv/script-modules/utf8/primitives/native.lua new file mode 100644 index 0000000..c9aca54 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/primitives/native.lua @@ -0,0 +1,57 @@ +return function(utf8) + +local ffi = require("ffi") +if ffi.os == "Windows" then + os.setlocale(utf8.config.locale or "english_us.65001", "ctype") + ffi.cdef[[ + short towupper(short c); + short towlower(short c); + ]] +else + os.setlocale(utf8.config.locale or "C.UTF-8", "ctype") + ffi.cdef[[ + int towupper(int c); + int towlower(int c); + ]] +end + +utf8:require "primitives.dummy" + +if not utf8.config.conversion.uc_lc then + function utf8.lower(str) + local bs = 1 + local nbs + local bytes = utf8.raw.len(str) + local res = {} + + while bs <= bytes do + nbs = utf8.next(str, bs) + local cp = utf8.unicode(str, bs, nbs) + res[#res + 1] = ffi.C.towlower(cp) + bs = nbs + end + + return utf8.char(utf8.config.unpack(res)) + end +end + +if not utf8.config.conversion.lc_uc then + function utf8.upper(str) + local bs = 1 + local nbs + local bytes = utf8.raw.len(str) + local res = {} + + while bs <= bytes do + nbs = utf8.next(str, bs) + local cp = utf8.unicode(str, bs, nbs) + res[#res + 1] = ffi.C.towupper(cp) + bs = nbs + end + + return utf8.char(utf8.config.unpack(res)) + end +end + +return utf8 +end diff --git a/mac/.config/mpv/script-modules/utf8/primitives/tarantool.lua b/mac/.config/mpv/script-modules/utf8/primitives/tarantool.lua new file mode 100644 index 0000000..c38acf6 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/primitives/tarantool.lua @@ -0,0 +1,13 @@ +return function(utf8) + +utf8:require "primitives.dummy" + +local tnt_utf8 = utf8.config.tarantool_utf8 or require("utf8") + +utf8.lower = tnt_utf8.lower +utf8.upper = tnt_utf8.upper +utf8.len = tnt_utf8.len +utf8.char = tnt_utf8.char + +return utf8 +end diff --git a/mac/.config/mpv/script-modules/utf8/regex_parser.lua b/mac/.config/mpv/script-modules/utf8/regex_parser.lua new file mode 100644 index 0000000..3190f1b --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/regex_parser.lua @@ -0,0 +1,80 @@ +return function(utf8) + +utf8:require "modifier.compiletime.parser" +utf8:require "charclass.compiletime.parser" +utf8:require "begins.compiletime.parser" +utf8:require "ends.compiletime.parser" + +local gensub = utf8.gensub +local sub = utf8.sub + +local parser_context = utf8:require "context.compiletime" + +return function(regex, plain) + utf8.debug("regex", regex) + local ctx = parser_context:new() + + local skip = {0} + for nbs, c, bs in gensub(regex, 0), skip do + repeat -- continue + skip[1] = 0 + + c = utf8.raw.sub(regex, bs, utf8.next(regex, bs) - 1) + + local functions, move = utf8.regex.compiletime.begins.parse(regex, c, bs, ctx) + if functions then + ctx.begins = functions + skip[1] = move + end + if skip[1] ~= 0 then break end + + local functions, move = utf8.regex.compiletime.ends.parse(regex, c, bs, ctx) + if functions then + ctx.ends = functions + skip[1] = move + end + if skip[1] ~= 0 then break end + + local functions, move = utf8.regex.compiletime.modifier.parse(regex, c, bs, ctx) + if functions then + for _, f in ipairs(functions) do + ctx.funcs[#ctx.funcs + 1] = f + end + skip[1] = move + end + if skip[1] ~= 0 then break end + + local charclass, move = utf8.regex.compiletime.charclass.parse(regex, c, bs, ctx) + if charclass then skip[1] = move end + until true -- continue + end + + for _, m in ipairs(utf8.config.modifier) do + if m.check then m.check(ctx) end + end + + local src = [[ + return function(str, init, utf8) + local ctx = utf8:require("context.runtime").new({str = str, pos = init or 1}) + local cl = utf8:require("charclass.runtime.init") + local utf8sub = utf8.sub + local rawsub = utf8.raw.sub + local utf8len = utf8.len + local utf8next = utf8.next + local debug = utf8.debug + local function add(fun) + ctx.functions[#ctx.functions + 1] = fun + end + ]] .. ctx.begins + for _, v in ipairs(ctx.funcs) do src = src .. v end + src = src .. ctx.ends .. [[ + return coroutine.wrap(ctx:get_function())(ctx) + end + ]] + + utf8.debug(regex, src) + + return assert(utf8.config.loadstring(src, (plain and "plain " or "") .. regex))() +end + +end diff --git a/mac/.config/mpv/script-modules/utf8/test.sh b/mac/.config/mpv/script-modules/utf8/test.sh new file mode 100755 index 0000000..b8d2d63 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/test.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +set -xe + +lua53=$(which lua5.3 || which true) +lua51=$(which lua5.1 || which true) +luajit=$(which luajit || which true) + +for test in \ + test/charclass_compiletime.lua \ + test/charclass_runtime.lua \ + test/context_runtime.lua \ + test/test.lua \ + test/test_compat.lua \ + test/test_pm.lua \ + test/test_utf8data.lua +do + $lua53 $test + $lua51 $test + $luajit $test +done + +echo "tests passed" diff --git a/mac/.config/mpv/script-modules/utf8/test/charclass_compiletime.lua b/mac/.config/mpv/script-modules/utf8/test/charclass_compiletime.lua new file mode 100644 index 0000000..05d762d --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/test/charclass_compiletime.lua @@ -0,0 +1,165 @@ +local utf8 = require "init" +utf8.config = { + debug = nil, +-- debug = utf8:require("util").debug, +} +utf8:init() + +local ctx = utf8:require("context.compiletime"):new() + +local equals = require 'test.util'.equals +local assert = require 'test.util'.assert +local assert_equals = require 'test.util'.assert_equals +local parse = utf8.regex.compiletime.charclass.parse + +assert_equals({parse("aabb", "a", 1, ctx)}, {{codes = {utf8.byte("a")}}, 1}) +assert_equals({parse("aabb", "a", 2, ctx)}, {{codes = {utf8.byte("a")}}, 1}) +assert_equals({parse("aabb", "b", 3, ctx)}, {{codes = {utf8.byte("b")}}, 1}) +assert_equals({parse("aabb", "b", 4, ctx)}, {{codes = {utf8.byte("b")}}, 1}) + +assert_equals({parse("aa%ab", "%", 3, ctx)}, {{classes = {'alpha'}}, 2}) +assert_equals({parse("aac%Ab", "%", 4, ctx)}, {{not_classes = {'alpha'}}, 2}) +assert_equals({parse("aa.b", ".", 3, ctx)}, {{inverted = true}, 1}) + +assert_equals({parse("aa[c]b", "[", 3, ctx)}, { + {codes = {utf8.byte("c")}, ranges = nil, classes = nil, not_classes = nil}, + utf8.raw.len("[c]") +}) + +assert_equals({parse("aa[%A]b", "[", 3, ctx)}, { + {codes = nil, ranges = nil, classes = nil, not_classes = {'alpha'}}, + utf8.raw.len("[%A]") +}) + +assert_equals({parse("[^%p%d%s%c]+", "[", 1, ctx)}, { + {codes = nil, ranges = nil, classes = {'punct', 'digit', 'space', 'cntrl'}, not_classes = nil, inverted = true}, + utf8.raw.len("[^%p%d%s%c]") +}) + +assert_equals({parse("aa[[c]]b", "[", 3, ctx)}, { + {codes = {utf8.byte("["), utf8.byte("c")}, ranges = nil, classes = nil, not_classes = nil}, + utf8.raw.len("[[c]") +}) + +assert_equals({parse("aa[%a[c]]b", "[", 3, ctx)}, { + {codes = {utf8.byte("["), utf8.byte("c")}, ranges = nil, classes = {'alpha'}, not_classes = nil}, + utf8.raw.len("[%a[c]") +}) + +assert_equals({parse("aac-db", "c", 3, ctx)}, { + {codes = {utf8.byte("c")}}, + utf8.raw.len("c") +}) + +assert_equals({parse("aa[c-d]b", "[", 3, ctx)}, { + {codes = nil, ranges = {{utf8.byte("c"),utf8.byte("d")}}, classes = nil, not_classes = nil}, + utf8.raw.len("[c-d]") +}) +assert_equals(ctx.internal, false) + +assert_equals({parse("aa[c-]]b", "[", 3, ctx)}, { + {codes = {utf8.byte("-"), utf8.byte("c")}, ranges = nil, classes = nil, not_classes = nil}, + utf8.raw.len("[c-]") +}) +assert_equals(ctx.internal, false) + +assert_equals({parse("aad-", "d", 3, ctx)}, { + {codes = {utf8.byte("d")}}, + utf8.raw.len("d") +}) +assert_equals(ctx.internal, false) + +ctx.internal = false +assert_equals({parse(".", ".", 1, ctx)}, { + {inverted = true}, + utf8.raw.len(".") +}) + +assert_equals({parse("[.]", "[", 1, ctx)}, { + {codes = {utf8.byte(".")}}, + utf8.raw.len("[.]") +}) + +assert_equals({parse("%?", "%", 1, ctx)}, { + {codes = {utf8.byte("?")}}, + utf8.raw.len("%?") +}) + +assert_equals({parse("[]]", "[", 1, ctx)}, { + {codes = {utf8.byte("]")}}, + utf8.raw.len("[]]") +}) + +assert_equals({parse("[^]]", "[", 1, ctx)}, { + {codes = {utf8.byte("]")}, inverted = true}, + utf8.raw.len("[^]]") +}) + +--[[-- +multibyte chars +--]]-- + +assert_equals({parse("ббюю", "б", #"" + 1, ctx)}, {{codes = {utf8.byte("б")}}, utf8.raw.len("б")}) +assert_equals({parse("ббюю", "б", #"б" + 1, ctx)}, {{codes = {utf8.byte("б")}}, utf8.raw.len("б")}) +assert_equals({parse("ббюю", "ю", #"бб" + 1, ctx)}, {{codes = {utf8.byte("ю")}}, utf8.raw.len("ю")}) +assert_equals({parse("ббюю", "ю", #"ббю" + 1, ctx)}, {{codes = {utf8.byte("ю")}}, utf8.raw.len("ю")}) + +assert_equals({parse("бб%aю", "%", #"бб" + 1, ctx)}, {{classes = {'alpha'}}, 2}) +assert_equals({parse("ббц%Aю", "%", #"ббц" + 1, ctx)}, {{not_classes = {'alpha'}}, 2}) +assert_equals({parse("бб.ю", ".", #"бб" + 1, ctx)}, {{inverted = true}, 1}) + +assert_equals({parse("бб[ц]ю", "[", #"бб" + 1, ctx)}, { + {codes = {utf8.byte("ц")}, ranges = nil, classes = nil, not_classes = nil}, + utf8.raw.len("[ц]") +}) + +assert_equals({parse("бб[%A]ю", "[", #"бб" + 1, ctx)}, { + {codes = nil, ranges = nil, classes = nil, not_classes = {'alpha'}}, + utf8.raw.len("[%A]") +}) + +assert_equals({parse("бб[[ц]]ю", "[", #"бб" + 1, ctx)}, { + {codes = {utf8.byte("["), utf8.byte("ц")}, ranges = nil, classes = nil, not_classes = nil}, + utf8.raw.len("[[ц]") +}) + +assert_equals({parse("бб[%a[ц]]ю", "[", #"бб" + 1, ctx)}, { + {codes = {utf8.byte("["), utf8.byte("ц")}, ranges = nil, classes = {'alpha'}, not_classes = nil}, + utf8.raw.len("[%a[ц]") +}) + +ctx.internal = true +assert_equals({parse("ббц-ыю", "ц", #"бб" + 1, ctx)}, { + {ranges = {{utf8.byte("ц"),utf8.byte("ы")}}}, + utf8.raw.len("ц-ы") +}) + +ctx.internal = false +assert_equals({parse("бб[ц-ы]ю", "[", #"бб" + 1, ctx)}, { + {codes = nil, ranges = {{utf8.byte("ц"),utf8.byte("ы")}}, classes = nil, not_classes = nil}, + utf8.raw.len("[ц-ы]") +}) + +assert_equals({parse("бб[ц-]]ю", "[", #"бб" + 1, ctx)}, { + {codes = {utf8.byte("-"), utf8.byte("ц")}, ranges = nil, classes = nil, not_classes = nil}, + utf8.raw.len("[ц-]") +}) + +assert_equals({parse("ббы-", "ы", #"бб" + 1, ctx)}, { + {codes = {utf8.byte("ы")}}, + utf8.raw.len("ы") +}) + +ctx.internal = true +assert_equals({parse("ббы-цю", "ы", #"бб" + 1, ctx)}, { + {ranges = {{utf8.byte("ы"),utf8.byte("ц")}}}, + utf8.raw.len("ы-ц") +}) + +ctx.internal = false +assert_equals({parse("бб[ы]ю", "[", #"бб" + 1, ctx)}, { + {codes = {utf8.byte("ы")}, ranges = nil, classes = nil, not_classes = nil}, + utf8.raw.len("[ы]") +}) + +print "OK" diff --git a/mac/.config/mpv/script-modules/utf8/test/charclass_runtime.lua b/mac/.config/mpv/script-modules/utf8/test/charclass_runtime.lua new file mode 100644 index 0000000..616af14 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/test/charclass_runtime.lua @@ -0,0 +1,116 @@ +local utf8 = require("init") +utf8.config = { + debug = nil, --utf8:require("util").debug +} +utf8:init() + +local cl = utf8:require("charclass.runtime.init") + +local equals = require('test.util').equals +local assert = require('test.util').assert +local assert_equals = require('test.util').assert_equals + +assert_equals(true, cl.new() + :with_codes(utf8.byte' ') + :invert() + :in_codes(utf8.byte' ')) + +assert_equals(false, cl.new() + :with_codes(utf8.byte' ') + :invert() + :test(utf8.byte' ')) + +assert_equals(false, cl.new() + :with_codes() + :with_ranges() + :with_classes('space') + :without_classes() + :with_subs() + :invert() + :test(utf8.byte(' '))) + +assert_equals(true, cl.new() + :with_codes() + :with_ranges() + :with_classes() + :without_classes('space') + :with_subs() + :invert() + :test(utf8.byte(' '))) + +assert_equals(false, cl.new() + :with_codes() + :with_ranges() + :with_classes() + :without_classes() + :with_subs(cl.new():with_classes('space')) + :invert() + :test(utf8.byte(' '))) + +assert_equals(true, cl.new() + :with_codes() + :with_ranges() + :with_classes() + :without_classes() + :with_subs(cl.new():with_classes('space'):invert()) + :invert() + :test(utf8.byte(' '))) + +assert_equals(true, cl.new() + :with_codes() + :with_ranges() + :with_classes('punct', 'digit', 'space', 'cntrl') + :without_classes() + :with_subs() + :invert() + :test(utf8.byte'П') +) + +assert_equals(true, cl.new() + :with_codes() + :with_ranges() + :with_classes('punct', 'digit', 'space', 'cntrl') + :without_classes() + :with_subs() + :invert() + :test(utf8.byte'и') +) + +assert_equals(true, cl.new() + :with_codes() + :with_ranges() + :with_classes() + :without_classes('space') + :with_subs() + :test(utf8.byte'f') +) + +assert_equals(false, cl.new() + :with_codes() + :with_ranges() + :with_classes() + :without_classes('space') + :with_subs() + :test(utf8.byte'\n') +) + +assert_equals(false, cl.new() + :with_codes() + :with_ranges() + :with_classes('lower') + :without_classes() + :with_subs() + :invert() + :test(nil) +) + +assert_equals(false, cl.new() + :with_codes() + :with_ranges() + :with_classes('lower') + :without_classes() + :with_subs() + :test(nil) +) + +print "OK" diff --git a/mac/.config/mpv/script-modules/utf8/test/context_runtime.lua b/mac/.config/mpv/script-modules/utf8/test/context_runtime.lua new file mode 100644 index 0000000..9a177bf --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/test/context_runtime.lua @@ -0,0 +1,82 @@ +local utf8 = require("init"):init() + +local context = utf8:require('context.runtime') + +local equals = require('test.util').equals +local assert = require('test.util').assert +local assert_equals = require('test.util').assert_equals + +local ctx_en +local ctx_ru +local function setup() + ctx_en = context.new({str = 'asdf'}) + ctx_ru = context.new({str = 'фыва'}) +end + +local test_get_char = (function() + setup() + + assert_equals('a', ctx_en:get_char()) + assert_equals('ф', ctx_ru:get_char()) +end)() + +local test_get_charcode = (function() + setup() + + assert_equals(utf8.byte'a', ctx_en:get_charcode()) + assert_equals(utf8.byte'ф', ctx_ru:get_charcode()) +end)() + +local test_next_char = (function() + setup() + + assert_equals(1, ctx_en.pos) + assert_equals(1, ctx_ru.pos) + + ctx_ru:next_char() + ctx_en:next_char() + + assert_equals(2, ctx_en.pos) + assert_equals(2, ctx_ru.pos) + + assert_equals('s', ctx_en:get_char()) + assert_equals('ы', ctx_ru:get_char()) + assert_equals(utf8.byte's', ctx_en:get_charcode()) + assert_equals(utf8.byte'ы', ctx_ru:get_charcode()) +end)() + +local test_clone = (function() + setup() + + local clone = ctx_en:clone() + + assert(getmetatable(clone) == getmetatable(ctx_en)) + assert_equals(clone, ctx_en) + + ctx_en:next_char() + + assert_equals('a', clone:get_char()) + assert_equals('s', ctx_en:get_char()) + +end)() + +local test_last_char = (function() + ctx_en = context.new({str = 'asdf', pos = 4}) + ctx_ru = context.new({str = 'фыва', pos = 4}) + + assert_equals('f', ctx_en:get_char()) + assert_equals('а', ctx_ru:get_char()) + + ctx_ru:next_char() + ctx_en:next_char() + + assert_equals(5, ctx_en.pos) + assert_equals(5, ctx_ru.pos) + + assert_equals("", ctx_en:get_char()) + assert_equals("", ctx_ru:get_char()) + assert_equals(nil, ctx_en:get_charcode()) + assert_equals(nil, ctx_ru:get_charcode()) +end)() + +print('OK') diff --git a/mac/.config/mpv/script-modules/utf8/test/strict.lua b/mac/.config/mpv/script-modules/utf8/test/strict.lua new file mode 100644 index 0000000..7324644 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/test/strict.lua @@ -0,0 +1,42 @@ +--[[-- +strict.lua from http://metalua.luaforge.net/src/lib/strict.lua.html +--]]-- + +-- +-- strict.lua +-- checks uses of undeclared global variables +-- All global variables must be 'declared' through a regular assignment +-- (even assigning nil will do) in a main chunk before being used +-- anywhere or assigned to inside a function. +-- + +local mt = getmetatable(_G) +if mt == nil then + mt = {} + setmetatable(_G, mt) +end + +__STRICT = true +mt.__declared = {} + +mt.__newindex = function (t, n, v) + if __STRICT and not mt.__declared[n] then + local w = debug.getinfo(2, "S").what + if w ~= "main" and w ~= "C" then + error("assign to undeclared variable '"..n.."'", 2) + end + mt.__declared[n] = true + end + rawset(t, n, v) +end + +mt.__index = function (t, n) + if not mt.__declared[n] and debug.getinfo(2, "S").what ~= "C" then + error("variable '"..n.."' is not declared", 2) + end + return rawget(t, n) +end + +function global(...) + for _, v in ipairs{...} do mt.__declared[v] = true end +end diff --git a/mac/.config/mpv/script-modules/utf8/test/test.lua b/mac/.config/mpv/script-modules/utf8/test/test.lua new file mode 100644 index 0000000..8653b5d --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/test/test.lua @@ -0,0 +1,205 @@ +local utf8 = require('init') +utf8.config = { + debug = nil, +-- debug = utf8:require("util").debug, +} +utf8:init() + +for k,v in pairs(utf8) do + string[k] = v +end + +local LUA_51, LUA_53 = false, false +if "\xe4" == "xe4" then -- lua5.1 + LUA_51 = true +else -- luajit lua5.3 + LUA_53 = true +end + +local FFI_ENABLED = false +if pcall(require, "ffi") then + FFI_ENABLED = true +end + +local res = {} + +local equals = require 'test.util'.equals +local assert = require 'test.util'.assert +local assert_equals = require 'test.util'.assert_equals + +if FFI_ENABLED then + assert_equals(("АБВ"):lower(), "абв") + assert_equals(("абв"):upper(), "АБВ") +end + +res = {} +for _, w in ("123456789"):gensub(2), {1} do res[#res + 1] = w end +assert_equals({"23", "56", "89"}, res) + +assert_equals(0, ("фыва"):next(0)) +assert_equals(100, ("фыва"):next(100)) +assert_equals(#"ф" + 1, ("фыва"):next(1)) +assert_equals("ыва", utf8.raw.sub("фыва", ("фыва"):next(1))) + +res = {} +for p, c in ("абвгд"):codes() do res[#res + 1] = {p, c} end +assert_equals({ + {1, utf8.byte'а'}, + {#'а' + 1, utf8.byte'б'}, + {#'аб' + 1, utf8.byte'в'}, + {#'абв' + 1, utf8.byte'г'}, + {#'абвг' + 1, utf8.byte'д'}, +}, res) + +assert_equals(1, utf8.offset('abcde', 0)) + +assert_equals(1, utf8.offset('abcde', 1)) +assert_equals(5, utf8.offset('abcde', 5)) +assert_equals(6, utf8.offset('abcde', 6)) +assert_equals(nil, utf8.offset('abcde', 7)) + +assert_equals(5, utf8.offset('abcde', -1)) +assert_equals(1, utf8.offset('abcde', -5)) +assert_equals(nil, utf8.offset('abcde', -6)) + +assert_equals(1, utf8.offset('abcde', 0, 1)) +assert_equals(3, utf8.offset('abcde', 0, 3)) +assert_equals(6, utf8.offset('abcde', 0, 6)) + +assert_equals(3, utf8.offset('abcde', 1, 3)) +assert_equals(5, utf8.offset('abcde', 3, 3)) +assert_equals(6, utf8.offset('abcde', 4, 3)) +assert_equals(nil, utf8.offset('abcde', 5, 3)) + +assert_equals(2, utf8.offset('abcde', -1, 3)) +assert_equals(1, utf8.offset('abcde', -2, 3)) +assert_equals(5, utf8.offset('abcde', -1, 6)) +assert_equals(nil, utf8.offset('abcde', -3, 3)) + +assert_equals(1, utf8.offset('абвгд', 0)) + +assert_equals(1, utf8.offset('абвгд', 1)) +assert_equals(#'абвг' + 1, utf8.offset('абвгд', 5)) +assert_equals(#'абвгд' + 1, utf8.offset('абвгд', 6)) +assert_equals(nil, utf8.offset('абвгд', 7)) + +assert_equals(#'абвг' + 1, utf8.offset('абвгд', -1)) +assert_equals(1, utf8.offset('абвгд', -5)) +assert_equals(nil, utf8.offset('абвгд', -6)) + +assert_equals(1, utf8.offset('абвгд', 0, 1)) +assert_equals(1, utf8.offset('абвгд', 0, 2)) +assert_equals(#'аб' + 1, utf8.offset('абвгд', 0, #'аб' + 1)) +assert_equals(#'аб' + 1, utf8.offset('абвгд', 0, #'аб' + 2)) +assert_equals(#'абвгд' + 1, utf8.offset('абвгд', 0, #'абвгд' + 1)) + +assert_equals(#'аб' + 1, utf8.offset('абвгд', 1, #'аб' + 1)) +assert_equals(#'абвг' + 1, utf8.offset('абвгд', 3, #'аб' + 1)) +assert_equals(#'абвгд' + 1, utf8.offset('абвгд', 4, #'аб' + 1)) +assert_equals(#'абвгд' + 1, utf8.offset('абвгд', 4, #'аб' + 2)) +assert_equals(nil, utf8.offset('абвгд', 5, #'аб' + 1)) + +assert_equals(#'а' + 1, utf8.offset('абвгд', -1, #'аб' + 1)) +assert_equals(1, utf8.offset('абвгд', -2, #'аб' + 1)) +assert_equals(#'абвг' + 1, utf8.offset('абвгд', -1, #'абвгд' + 1)) +assert_equals(nil, utf8.offset('абвгд', -3, #'аб' + 1)) + +assert(("фыва"):validate()) +assert_equals({false, {{ pos = #"ф" + 1, part = 1, code = 255 }} }, {("ф\255ыва"):validate()}) +if LUA_53 then + assert_equals({false, {{ pos = #"ф" + 1, part = 1, code = 0xFF }} }, {("ф\xffыва"):validate()}) +end + +assert_equals(nil, ("aabb"):find("%bcd")) +assert_equals({1, 4}, {("aabb"):find("%bab")}) +assert_equals({1, 2}, {("aba"):find('%bab')}) + +res = {} +for w in ("aacaabbcabbacbaacab"):gmatch('%bab') do res[#res + 1] = w end +assert_equals({"acaabbcabb", "acb", "ab"}, res) + +assert_equals({1, 0}, {("aacaabbcabbacbaacab"):find('%f[acb]')}) +assert_equals("a", ("aba"):match('%f[ab].')) + +res = {} +for w in ("aacaabbcabbacbaacab"):gmatch('%f[ab]') do res[#res + 1] = w end +assert_equals({"", "", "", "", ""}, res) + +assert_equals({"HaacHaabbcHabbacHbaacHab", 5}, {("aacaabbcabbacbaacab"):gsub('%f[ab]', 'H')}) + +res = {} +for w in ("Привет, мир, от Lua"):gmatch("[^%p%d%s%c]+") do res[#res + 1] = w end +assert_equals({"Привет", "мир", "от", "Lua"}, res) + +res = {} +for k, v in ("从=世界, 到=Lua"):gmatch("([^%p%s%c]+)=([^%p%s%c]+)") do res[k] = v end +assert_equals({["到"] = "Lua", ["从"] = "世界"}, res) + +assert_equals("Ahoj Ahoj světe světe", ("Ahoj světe"):gsub("([^%p%s%c]+)", "%1 %1")) + +assert_equals("Ahoj Ahoj světe", ("Ahoj světe"):gsub("[^%p%s%c]+", "%0 %0", 1)) + +assert_equals("κόσμο γεια Lua από", ("γεια κόσμο από Lua"):gsub("([^%p%s%c]+)%s*([^%p%s%c]+)", "%2 %1")) + +assert_equals({8, 27, "ололоо я водитель э"}, {("пыщпыщ ололоо я водитель энло"):find("(.л.+)н")}) + +assert_equals({"пыщпыщ о보라보라 я водитель эн보라", 3}, {("пыщпыщ ололоо я водитель энло"):gsub("ло+", "보라")}) + +assert_equals("пыщпыщ ололоо я", ("пыщпыщ ололоо я водитель энло"):match("^п[лопыщ ]*я")) + +assert_equals("в", ("пыщпыщ ололоо я водитель энло"):match("[в-д]+")) + +assert_equals(nil, ('abc abc'):match('([^%s]+)%s%s')) -- https://github.com/Stepets/utf8.lua/issues/2 + +res = {} +for w in ("aacabbacbbcaabbcbacaa"):gmatch("a+b") do res[#res + 1] = w end +assert_equals({"ab","aab"}, res) + +res = {} +for w in ("aacabbacbbcaabbcbacaa"):gmatch("a-b") do res[#res + 1] = w end +assert_equals({"ab","b","b","b","aab","b","b"}, res) + +res = {} +for w in ("aacabbacbbcaabbcbacaa"):gmatch("a*b") do res[#res + 1] = w end +assert_equals({"ab","b","b","b","aab","b","b"}, res) + +res = {} +for w in ("aacabbacbbcaabbcbacaa"):gmatch("ba+") do res[#res + 1] = w end +assert_equals({"ba","ba"}, res) + +res = {} +for w in ("aacabbacbbcaabbcbacaa"):gmatch("ba-") do res[#res + 1] = w end +assert_equals({"b","b","b","b","b","b","b"}, res) + +res = {} +for w in ("aacabbacbbcaabbcbacaa"):gmatch("ba*") do res[#res + 1] = w end +assert_equals({"b","ba","b","b","b","b","ba"}, res) + +assert_equals({"bacbbcaabbcba", "ba"}, {("aacabbacbbcaabbcbacaa"):match("((ba+).*%2)")}) +assert_equals({"bbacbbcaabbcb", "b"}, {("aacabbacbbcaabbcbacaa"):match("((ba*).*%2)")}) + +res = {} +for w in ("aacabbacbbcaabbcbacaa"):gmatch("((b+a*).-%2)") do res[#res + 1] = w end +assert_equals({"bbacbb", "bb"}, res) + +assert_equals("a**", ("a**v"):match("a**+")) +assert_equals("a", ("a**v"):match("a**-")) + +assert_equals({"test", "."}, {("test.lua"):match("(.-)([.])")}) + +-- https://github.com/Stepets/utf8.lua/issues/3 +assert_equals({"ab", "c"}, {("abc"):match("^([ab]-)([^b]*)$")}) +assert_equals({"ab", ""}, {("ab"):match("^([ab]-)([^b]*)$")}) +assert_equals({"items.", ""}, {("items."):match("^(.-)([^.]*)$")}) +assert_equals({"", "items"}, {("items"):match("^(.-)([^.]*)$")}) + +-- https://github.com/Stepets/utf8.lua/issues/4 +assert_equals({"ab.123", 1}, {("ab.?"):gsub("%?", "123")}) + +-- https://github.com/Stepets/utf8.lua/issues/5 +assert_equals({"ab", 1}, {("ab"):gsub("a", "%0")}) +assert_equals({"ab", 1}, {("ab"):gsub("a", "%1")}) + +assert_equals("c", ("abc"):match("c", -1)) + +print("\ntests passed\n") diff --git a/mac/.config/mpv/script-modules/utf8/test/test_compat.lua b/mac/.config/mpv/script-modules/utf8/test/test_compat.lua new file mode 100644 index 0000000..d5042a5 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/test/test_compat.lua @@ -0,0 +1,109 @@ +local utf8 = require 'init' +utf8.config = { + debug = nil, --utf8:require("util").debug +} +utf8:init() +print('testing utf8 library') + +local LUA_51, LUA_53 = false, false +if "\xe4" == "xe4" then -- lua5.1 + LUA_51 = true +else -- luajit lua5.3 + LUA_53 = true +end + +assert(utf8.sub("123456789",2,4) == "234") +assert(utf8.sub("123456789",7) == "789") +assert(utf8.sub("123456789",7,6) == "") +assert(utf8.sub("123456789",7,7) == "7") +assert(utf8.sub("123456789",0,0) == "") +assert(utf8.sub("123456789",-10,10) == "123456789") +assert(utf8.sub("123456789",1,9) == "123456789") +assert(utf8.sub("123456789",-10,-20) == "") +assert(utf8.sub("123456789",-1) == "9") +assert(utf8.sub("123456789",-4) == "6789") +assert(utf8.sub("123456789",-6, -4) == "456") +if not _no32 then + assert(utf8.sub("123456789",-2^31, -4) == "123456") + assert(utf8.sub("123456789",-2^31, 2^31 - 1) == "123456789") + assert(utf8.sub("123456789",-2^31, -2^31) == "") +end +assert(utf8.sub("\000123456789",3,5) == "234") +assert(utf8.sub("\000123456789", 8) == "789") +print('+') + +assert(utf8.find("123456789", "345") == 3) +local a,b = utf8.find("123456789", "345") +assert(utf8.sub("123456789", a, b) == "345") +assert(utf8.find("1234567890123456789", "345", 3) == 3) +assert(utf8.find("1234567890123456789", "345", 4) == 13) +assert(utf8.find("1234567890123456789", "346", 4) == nil) +assert(utf8.find("1234567890123456789", ".45", -9) == 13) +assert(utf8.find("abcdefg", "\0", 5, 1) == nil) +assert(utf8.find("", "") == 1) +assert(utf8.find("", "", 1) == 1) +assert(not utf8.find("", "", 2)) +assert(utf8.find('', 'aaa', 1) == nil) +assert(('alo(.)alo'):find('(.)', 1, 1) == 4) +print('+') + +assert(utf8.len("") == 0) +assert(utf8.len("\0\0\0") == 3) +assert(utf8.len("1234567890") == 10) + +assert(utf8.byte("a") == 97) +if LUA_51 then + assert(utf8.byte("�") > 127) +else + assert(utf8.byte("\xe4") > 127) +end +assert(utf8.byte(utf8.char(255)) == 255) +assert(utf8.byte(utf8.char(0)) == 0) +assert(utf8.byte("\0") == 0) +assert(utf8.byte("\0\0alo\0x", -1) == string.byte('x')) +assert(utf8.byte("ba", 2) == 97) +assert(utf8.byte("\n\n", 2, -1) == 10) +assert(utf8.byte("\n\n", 2, 2) == 10) +assert(utf8.byte("") == nil) +assert(utf8.byte("hi", -3) == nil) +assert(utf8.byte("hi", 3) == nil) +assert(utf8.byte("hi", 9, 10) == nil) +assert(utf8.byte("hi", 2, 1) == nil) +assert(utf8.char() == "") +if LUA_53 then + assert(utf8.raw.char(0, 255, 0) == "\0\255\0") -- fails due 255 can't be utf8 byte + assert(utf8.char(0, 255, 0) == "\0\195\191\0") + assert(utf8.raw.char(0, utf8.byte("\xe4"), 0) == "\0\xe4\0") + assert(utf8.char(0, utf8.byte("\xe4"), 0) == "\0\195\164\0") + assert(utf8.raw.char(utf8.raw.byte("\xe4l\0�u", 1, -1)) == "\xe4l\0�u") + assert(utf8.raw.char(utf8.raw.byte("\xe4l\0�u", 1, -1)) == "\xe4l\0�u") + assert(utf8.raw.char(utf8.raw.byte("\xe4l\0�u", 1, 0)) == "") + assert(utf8.raw.char(utf8.raw.byte("\xe4l\0�u", -10, 100)) == "\xe4l\0�u") +end + +assert(utf8.upper("ab\0c") == "AB\0C") +assert(utf8.lower("\0ABCc%$") == "\0abcc%$") +assert(utf8.rep('teste', 0) == '') +assert(utf8.rep('t�s\00t�', 2) == 't�s\0t�t�s\000t�') +assert(utf8.rep('', 10) == '') +print('+') + +assert(utf8.upper("ab\0c") == "AB\0C") +assert(utf8.lower("\0ABCc%$") == "\0abcc%$") + +assert(utf8.reverse"" == "") +assert(utf8.reverse"\0\1\2\3" == "\3\2\1\0") +assert(utf8.reverse"\0001234" == "4321\0") + +for i=0,30 do assert(utf8.len(string.rep('a', i)) == i) end + +print('+') + +do + local f = utf8.gmatch("1 2 3 4 5", "%d+") + assert(f() == "1") + local co = coroutine.wrap(f) + assert(co() == "2") +end + +print('OK') diff --git a/mac/.config/mpv/script-modules/utf8/test/test_pm.lua b/mac/.config/mpv/script-modules/utf8/test/test_pm.lua new file mode 100644 index 0000000..9c8e472 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/test/test_pm.lua @@ -0,0 +1,392 @@ +--[[-- +MIT License + +Copyright (c) 2018 Xavier Wang + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +--]]-- + +local utf8 = require 'init' +utf8.config = { + debug = nil, --utf8:require("util").debug, +} +utf8:init() + +print('testing pattern matching') + +local +function f(s, p) + local i,e = utf8.find(s, p) + if i then return utf8.sub(s, i, e) end +end + +local +function f1(s, p) + p = utf8.gsub(p, "%%([0-9])", function (s) return "%" .. (tonumber(s)+1) end) + p = utf8.gsub(p, "^(^?)", "%1()", 1) + p = utf8.gsub(p, "($?)$", "()%1", 1) + local t = {utf8.match(s, p)} + return utf8.sub(s, t[1], t[#t] - 1) +end + +local +a,b = utf8.find('', '') -- empty patterns are tricky +assert(a == 1 and b == 0); +a,b = utf8.find('alo', '') +assert(a == 1 and b == 0) +a,b = utf8.find('a\0o a\0o a\0o', 'a', 1) -- first position +assert(a == 1 and b == 1) +a,b = utf8.find('a\0o a\0o a\0o', 'a\0o', 2) -- starts in the midle +assert(a == 5 and b == 7) +a,b = utf8.find('a\0o a\0o a\0o', 'a\0o', 9) -- starts in the midle +assert(a == 9 and b == 11) +a,b = utf8.find('a\0a\0a\0a\0\0ab', '\0ab', 2); -- finds at the end +assert(a == 9 and b == 11); +a,b = utf8.find('a\0a\0a\0a\0\0ab', 'b') -- last position +assert(a == 11 and b == 11) +assert(utf8.find('a\0a\0a\0a\0\0ab', 'b\0') == nil) -- check ending +assert(utf8.find('', '\0') == nil) +assert(utf8.find('alo123alo', '12') == 4) +assert(utf8.find('alo123alo', '^12') == nil) + +assert(utf8.match("aaab", ".*b") == "aaab") +assert(utf8.match("aaa", ".*a") == "aaa") +assert(utf8.match("b", ".*b") == "b") + +assert(utf8.match("aaab", ".+b") == "aaab") +assert(utf8.match("aaa", ".+a") == "aaa") +assert(not utf8.match("b", ".+b")) + +assert(utf8.match("aaab", ".?b") == "ab") +assert(utf8.match("aaa", ".?a") == "aa") +assert(utf8.match("b", ".?b") == "b") + +assert(f('aloALO', '%l*') == 'alo') +assert(f('aLo_ALO', '%a*') == 'aLo') + +assert(f(" \n\r*&\n\r xuxu \n\n", "%g%g%g+") == "xuxu") + +assert(f('aaab', 'a*') == 'aaa'); +assert(f('aaa', '^.*$') == 'aaa'); +assert(f('aaa', 'b*') == ''); +assert(f('aaa', 'ab*a') == 'aa') +assert(f('aba', 'ab*a') == 'aba') +assert(f('aaab', 'a+') == 'aaa') +assert(f('aaa', '^.+$') == 'aaa') +assert(f('aaa', 'b+') == nil) +assert(f('aaa', 'ab+a') == nil) +assert(f('aba', 'ab+a') == 'aba') +assert(f('a$a', '.$') == 'a') +assert(f('a$a', '.%$') == 'a$') +assert(f('a$a', '.$.') == 'a$a') +assert(f('a$a', '$$') == nil) +assert(f('a$b', 'a$') == nil) +assert(f('a$a', '$') == '') +assert(f('', 'b*') == '') +assert(f('aaa', 'bb*') == nil) +assert(f('aaab', 'a-') == '') +assert(f('aaa', '^.-$') == 'aaa') +assert(f('aabaaabaaabaaaba', 'b.*b') == 'baaabaaabaaab') +assert(f('aabaaabaaabaaaba', 'b.-b') == 'baaab') +assert(f('alo xo', '.o$') == 'xo') +assert(f(' \n isto é assim', '%S%S*') == 'isto') +assert(f(' \n isto é assim', '%S*$') == 'assim') +assert(f(' \n isto é assim', '[a-z]*$') == 'assim') +assert(f('um caracter ? extra', '[^%sa-z]') == '?') +assert(f('', 'a?') == '') +assert(f('á', 'á?') == 'á') +assert(f('ábl', 'á?b?l?') == 'ábl') +assert(f(' ábl', 'á?b?l?') == '') +assert(f('aa', '^aa?a?a') == 'aa') +assert(f(']]]áb', '[^]]') == 'á') +assert(f("0alo alo", "%x*") == "0a") +assert(f("alo alo", "%C+") == "alo alo") +print('+') + +assert(f1('alo alx 123 b\0o b\0o', '(..*) %1') == "b\0o b\0o") +assert(f1('axz123= 4= 4 34', '(.+)=(.*)=%2 %1') == '3= 4= 4 3') +assert(f1('=======', '^(=*)=%1$') == '=======') +assert(utf8.match('==========', '^([=]*)=%1$') == nil) + +local function range (i, j) + if i <= j then + return i, range(i+1, j) + end +end + +local abc = utf8.char(range(0, 255)); + +assert(utf8.len(abc) == 256) +assert(string.len(abc) == 384) + +local +function strset (p) + local res = {s=''} + utf8.gsub(abc, p, function (c) res.s = res.s .. c end) + return res.s +end; + +local a, b, c, d, e, t + +-- local E = utf8.escape +-- assert(utf8.len(strset(E'[%200-%210]')) == 11) + +assert(strset('[a-z]') == "abcdefghijklmnopqrstuvwxyz") +assert(strset('[a-z%d]') == strset('[%da-uu-z]')) +assert(strset('[a-]') == "-a") +assert(strset('[^%W]') == strset('[%w]')) +assert(strset('[]%%]') == '%]') +assert(strset('[a%-z]') == '-az') +assert(strset('[%^%[%-a%]%-b]') == '-[]^ab') +-- assert(strset('%Z') == strset(E'[%1-%255]')) +-- assert(strset('.') == strset(E'[%1-%255%%z]')) +print('+'); + +assert(utf8.match("alo xyzK", "(%w+)K") == "xyz") +assert(utf8.match("254 K", "(%d*)K") == "") +assert(utf8.match("alo ", "(%w*)$") == "") +assert(utf8.match("alo ", "(%w+)$") == nil) +assert(utf8.find("(álo)", "%(á") == 1) +a, b, c, d, e = utf8.match("âlo alo", "^(((.).).* (%w*))$") +assert(a == 'âlo alo' and b == 'âl' and c == 'â' and d == 'alo' and e == nil) +a, b, c, d = utf8.match('0123456789', '(.+(.?)())') +assert(a == '0123456789' and b == '' and c == 11 and d == nil) +print('+') + +assert(utf8.gsub('ülo ülo', 'ü', 'x') == 'xlo xlo') +assert(utf8.gsub('alo úlo ', ' +$', '') == 'alo úlo') -- trim +assert(utf8.gsub(' alo alo ', '^%s*(.-)%s*$', '%1') == 'alo alo') -- double trim +assert(utf8.gsub('alo alo \n 123\n ', '%s+', ' ') == 'alo alo 123 ') +t = "abç d" +a, b = utf8.gsub(t, '(.)', '%1@') +assert('@'..a == utf8.gsub(t, '', '@') and b == 5) +a, b = utf8.gsub('abçd', '(.)', '%0@', 2) +assert(a == 'a@b@çd' and b == 2) +assert(utf8.gsub('alo alo', '()[al]', '%1') == '12o 56o') +assert(utf8.gsub("abc=xyz", "(%w*)(%p)(%w+)", "%3%2%1-%0") == + "xyz=abc-abc=xyz") +assert(utf8.gsub("abc", "%w", "%1%0") == "aabbcc") +assert(utf8.gsub("abc", "%w+", "%0%1") == "abcabc") +assert(utf8.gsub('áéí', '$', '\0óú') == 'áéí\0óú') +assert(utf8.gsub('', '^', 'r') == 'r') +assert(utf8.gsub('', '$', 'r') == 'r') +print('+') + +assert(utf8.gsub("um (dois) tres (quatro)", "(%(%w+%))", utf8.upper) == + "um (DOIS) tres (QUATRO)") + +do + local function setglobal (n,v) rawset(_G, n, v) end + utf8.gsub("a=roberto,roberto=a", "(%w+)=(%w%w*)", setglobal) + assert(_G.a=="roberto" and _G.roberto=="a") +end + +function f(a,b) return utf8.gsub(a,'.',b) end +assert(utf8.gsub("trocar tudo em |teste|b| é |beleza|al|", "|([^|]*)|([^|]*)|", f) == + "trocar tudo em bbbbb é alalalalalal") + +local function dostring (s) return (loadstring or load)(s)() or "" end +assert(utf8.gsub("alo $a=1$ novamente $return a$", "$([^$]*)%$", dostring) == + "alo novamente 1") + +x = utf8.gsub("$local utf8=require'init' x=utf8.gsub('alo', '.', utf8.upper)$ assim vai para $return x$", + "$([^$]*)%$", dostring) +assert(x == ' assim vai para ALO') + +local s,r +t = {} +s = 'a alo jose joao' +r = utf8.gsub(s, '()(%w+)()', function (a,w,b) + assert(utf8.len(w) == b-a); + t[a] = b-a; + end) +assert(s == r and t[1] == 1 and t[3] == 3 and t[7] == 4 and t[13] == 4) + +local +function isbalanced (s) + return utf8.find(utf8.gsub(s, "%b()", ""), "[()]") == nil +end + +assert(isbalanced("(9 ((8))(\0) 7) \0\0 a b ()(c)() a")) +assert(not isbalanced("(9 ((8) 7) a b (\0 c) a")) +assert(utf8.gsub("alo 'oi' alo", "%b''", '"') == 'alo " alo') + + +local t = {"apple", "orange", "lime"; n=0} +assert(utf8.gsub("x and x and x", "x", function () t.n=t.n+1; return t[t.n] end) + == "apple and orange and lime") + +t = {n=0} +utf8.gsub("first second word", "%w%w*", function (w) t.n=t.n+1; t[t.n] = w end) +assert(t[1] == "first" and t[2] == "second" and t[3] == "word" and t.n == 3) + +t = {n=0} +assert(utf8.gsub("first second word", "%w+", + function (w) t.n=t.n+1; t[t.n] = w end, 2) == "first second word") +assert(t[1] == "first" and t[2] == "second" and t[3] == nil) + +assert(not pcall(utf8.gsub, "alo", "(.", print)) +assert(not pcall(utf8.gsub, "alo", ".)", print)) +assert(not pcall(utf8.gsub, "alo", "(.", {})) +assert(not pcall(utf8.gsub, "alo", "(.)", "%2")) +assert(not pcall(utf8.gsub, "alo", "(%1)", "a")) +--[[-- +Stepets: ignoring this test because it's probably bug in Lua. + %0 should be interpreted as capture reference only in replacement arg + it doesn't have sense in pattern +--]]-- +-- assert(not pcall(utf8.gsub, "alo", "(%0)", "a")) + +-- bug since 2.5 (C-stack overflow) +-- todo: benchmark OOM +-- do +-- local function f (size) +-- local s = string.rep("a", size) +-- local p = string.rep(".?", size) +-- return pcall(utf8.match, s, p) +-- end +-- local r, m = f(80) +-- assert(r and #m == 80) +-- r, m = f(200000) +-- assert(not r and utf8.find(m, "too complex")) +-- end + +-- if not _soft then +-- -- big strings +-- local a = string.rep('a', 300000) +-- assert(utf8.find(a, '^a*.?$')) +-- assert(not utf8.find(a, '^a*.?b$')) +-- assert(utf8.find(a, '^a-.?$')) + +-- -- bug in 5.1.2 +-- a = string.rep('a', 10000) .. string.rep('b', 10000) +-- assert(not pcall(utf8.gsub, a, 'b')) +-- end + +-- recursive nest of gsubs +local function rev (s) + return utf8.gsub(s, "(.)(.+)", function (c,s1) return rev(s1)..c end) +end + +local x = "abcdef" +assert(rev(rev(x)) == x) + + +-- gsub with tables +assert(utf8.gsub("alo alo", ".", {}) == "alo alo") +assert(utf8.gsub("alo alo", "(.)", {a="AA", l=""}) == "AAo AAo") +assert(utf8.gsub("alo alo", "(.).", {a="AA", l="K"}) == "AAo AAo") +assert(utf8.gsub("alo alo", "((.)(.?))", {al="AA", o=false}) == "AAo AAo") + +assert(utf8.gsub("alo alo", "().", {2,5,6}) == "256 alo") + +t = {}; setmetatable(t, {__index = function (t,s) return utf8.upper(s) end}) +assert(utf8.gsub("a alo b hi", "%w%w+", t) == "a ALO b HI") + + +-- tests for gmatch +local a = 0 +for i in utf8.gmatch('abcde', '()') do assert(i == a+1); a=i end +assert(a==6) + +t = {n=0} +for w in utf8.gmatch("first second word", "%w+") do + t.n=t.n+1; t[t.n] = w +end +assert(t[1] == "first" and t[2] == "second" and t[3] == "word") + +t = {3, 6, 9} +for i in utf8.gmatch ("xuxx uu ppar r", "()(.)%2") do + assert(i == table.remove(t, 1)) +end +assert(#t == 0) + +t = {} +for i,j in utf8.gmatch("13 14 10 = 11, 15= 16, 22=23", "(%d+)%s*=%s*(%d+)") do + t[i] = j +end +a = 0 +for k,v in pairs(t) do assert(k+1 == v+0); a=a+1 end +assert(a == 3) + + +-- tests for `%f' (`frontiers') + +assert(utf8.gsub("aaa aa a aaa a", "%f[%w]a", "x") == "xaa xa x xaa x") +assert(utf8.gsub("[[]] [][] [[[[", "%f[[].", "x") == "x[]] x]x] x[[[") +assert(utf8.gsub("01abc45de3", "%f[%d]", ".") == ".01abc.45de.3") +assert(utf8.gsub("01abc45 de3x", "%f[%D]%w", ".") == "01.bc45 de3.") +-- local u = utf8.escape +-- assert(utf8.gsub("function", u"%%f[%1-%255]%%w", ".") == ".unction") +-- assert(utf8.gsub("function", u"%%f[^%1-%255]", ".") == "function.") + +--[[-- +Stepets: %z is Lua 5.1 class for representing \0 + Lua 5.2, Lua 5.3 doesn't have it in documentation. So it's considered deprecated. +--]]-- +assert(utf8.find("a", "%f[a]") == 1) +assert(utf8.find("a", "%f[^%z]") == 1) +assert(utf8.find("a", "%f[^%l]") == 2) +assert(utf8.find("aba", "%f[a%z]") == 3) +assert(utf8.find("aba", "%f[%z]") == 4) +assert(not utf8.find("aba", "%f[%l%z]")) +assert(not utf8.find("aba", "%f[^%l%z]")) + +local i, e = utf8.find(" alo aalo allo", "%f[%S].-%f[%s].-%f[%S]") +assert(i == 2 and e == 5) +local k = utf8.match(" alo aalo allo", "%f[%S](.-%f[%s].-%f[%S])") +assert(k == 'alo ') + +local a = {1, 5, 9, 14, 17,} +for k in utf8.gmatch("alo alo th02 is 1hat", "()%f[%w%d]") do + assert(table.remove(a, 1) == k) +end +assert(#a == 0) + +-- malformed patterns +local function malform (p, m) + m = m or "malformed" + local r, msg = pcall(utf8.find, "a", p) + assert(not r and utf8.find(msg, m)) +end + +malform("[a") +malform("[]") +malform("[^]") +malform("[a%]") +malform("[a%") +malform("%b", "unbalanced") +malform("%ba", "unbalanced") +malform("%") +malform("%f", "missing") + +-- \0 in patterns +assert(utf8.match("ab\0\1\2c", "[\0-\2]+") == "\0\1\2") +assert(utf8.match("ab\0\1\2c", "[\0-\0]+") == "\0") +assert(utf8.find("b$a", "$\0?") == 2) +assert(utf8.find("abc\0efg", "%\0") == 4) +assert(utf8.match("abc\0efg\0\1e\1g", "%b\0\1") == "\0efg\0\1e\1") +assert(utf8.match("abc\0\0\0", "%\0+") == "\0\0\0") +assert(utf8.match("abc\0\0\0", "%\0%\0?") == "\0\0") + +-- magic char after \0 +assert(utf8.find("abc\0\0","\0.") == 4) +assert(utf8.find("abcx\0\0abc\0abc","x\0\0abc\0a.") == 4) + +print('OK') diff --git a/mac/.config/mpv/script-modules/utf8/test/test_utf8data.lua b/mac/.config/mpv/script-modules/utf8/test/test_utf8data.lua new file mode 100644 index 0000000..e915b2b --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/test/test_utf8data.lua @@ -0,0 +1,15 @@ +local utf8uclc = require('init') +utf8uclc.config = { + debug = nil, +-- debug = utf8:require("util").debug, + conversion = { + uc_lc = setmetatable({}, {__index = function(self, idx) return "l" end}), + lc_uc = setmetatable({}, {__index = function(self, idx) return "u" end}), + } +} +utf8uclc:init() + +local assert_equals = require 'test.util'.assert_equals + +assert_equals(utf8uclc.lower("фыва"), "llll") +assert_equals(utf8uclc.upper("фыва"), "uuuu") diff --git a/mac/.config/mpv/script-modules/utf8/test/util.lua b/mac/.config/mpv/script-modules/utf8/test/util.lua new file mode 100644 index 0000000..bdc25e5 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/test/util.lua @@ -0,0 +1,75 @@ +require "test.strict" + +local function equals(t1, t2) + for k,v in pairs(t1) do + if t2[k] == nil then return false end + if type(t2[k]) == 'cdata' and type(v) == 'cdata' then + return true -- don't know how to compare + elseif type(t2[k]) == 'table' and type(v) == 'table' then + if not equals(t2[k], v) then return false end + else + if t2[k] ~= v then return false end + end + end + for k,v in pairs(t2) do + if t1[k] == nil then return false end + if type(t1[k]) == 'cdata' and type(v) == 'cdata' then + return true -- don't know how to compare + elseif type(t1[k]) == 'table' and type(v) == 'table' then + if not equals(t1[k], v) then return false end + else + if t1[k] ~= v then return false end + end + end + return true +end + +local old_tostring = tostring +local function tostring(v) + local type = type(v) + if type == 'table' then + local tbl = "{" + for k,v in pairs(v) do + tbl = tbl .. tostring(k) .. ' = ' .. tostring(v) .. ', ' + end + return tbl .. '}' + else + return old_tostring(v) + end +end + +local old_assert = assert +local assert = function(cond, ...) + if not cond then + local data = {...} + local msg = "" + for _, v in pairs(data) do + local type = type(v) + if type == 'table' then + local tbl = "{" + for k,v in pairs(v) do + tbl = tbl .. tostring(k) .. ' = ' .. tostring(v) .. ', ' + end + msg = msg .. tbl .. '}' + else + msg = msg .. tostring(v) + end + end + error(#data > 0 and msg or "assertion failed!") + end + return cond +end + +local function assert_equals(a,b) + assert( + type(a) == 'table' and type(b) == 'table' and equals(a,b) or a == b, + "expected: ", a and a or tostring(a), "\n", + "got: ", b and b or tostring(b) + ) +end + +return { + equals = equals, + assert = assert, + assert_equals = assert_equals, +} diff --git a/mac/.config/mpv/script-modules/utf8/util.lua b/mac/.config/mpv/script-modules/utf8/util.lua new file mode 100644 index 0000000..7723626 --- /dev/null +++ b/mac/.config/mpv/script-modules/utf8/util.lua @@ -0,0 +1,64 @@ +return function(utf8) + +function utf8.util.copy(obj, deep) + if type(obj) == 'table' then + local result = {} + if deep then + for k,v in pairs(obj) do + result[k] = utf8.util.copy(v, true) + end + else + for k,v in pairs(obj) do + result[k] = v + end + end + return result + else + return obj + end +end + +local function dump(val, tab) + tab = tab or '' + + if type(val) == 'table' then + utf8.config.logger('{\n') + for k,v in pairs(val) do + utf8.config.logger(tab .. tostring(k) .. " = ") + dump(v, tab .. '\t') + utf8.config.logger("\n") + end + utf8.config.logger(tab .. '}\n') + else + utf8.config.logger(tostring(val)) + end +end + +function utf8.util.debug(...) + local t = {...} + for _, v in ipairs(t) do + if type(v) == "table" and not (getmetatable(v) or {}).__tostring then + dump(v, '\t') + else + utf8.config.logger(tostring(v), " ") + end + end + + utf8.config.logger('\n') +end + +function utf8.debug(...) + if utf8.config.debug then + utf8.config.debug(...) + end +end + +function utf8.util.next(str, bs) + local nbs1 = utf8.next(str, bs) + local nbs2 = utf8.next(str, nbs1) + return utf8.raw.sub(str, nbs1, nbs2 - 1), nbs1 +end + +return utf8.util + +end |
