summaryrefslogtreecommitdiff
path: root/mac/.config/mpv/script-modules/utf8/charclass
diff options
context:
space:
mode:
Diffstat (limited to 'mac/.config/mpv/script-modules/utf8/charclass')
-rw-r--r--mac/.config/mpv/script-modules/utf8/charclass/compiletime/builder.lua128
-rw-r--r--mac/.config/mpv/script-modules/utf8/charclass/compiletime/parser.lua21
-rw-r--r--mac/.config/mpv/script-modules/utf8/charclass/compiletime/range.lua44
-rw-r--r--mac/.config/mpv/script-modules/utf8/charclass/compiletime/stub.lua9
-rw-r--r--mac/.config/mpv/script-modules/utf8/charclass/compiletime/vanilla.lua131
-rw-r--r--mac/.config/mpv/script-modules/utf8/charclass/runtime/base.lua184
-rw-r--r--mac/.config/mpv/script-modules/utf8/charclass/runtime/dummy.lua41
-rw-r--r--mac/.config/mpv/script-modules/utf8/charclass/runtime/init.lua22
-rw-r--r--mac/.config/mpv/script-modules/utf8/charclass/runtime/native.lua47
9 files changed, 627 insertions, 0 deletions
diff --git a/mac/.config/mpv/script-modules/utf8/charclass/compiletime/builder.lua b/mac/.config/mpv/script-modules/utf8/charclass/compiletime/builder.lua
new file mode 100644
index 0000000..9d9c603
--- /dev/null
+++ b/mac/.config/mpv/script-modules/utf8/charclass/compiletime/builder.lua
@@ -0,0 +1,128 @@
+return function(utf8)
+
+local byte = utf8.byte
+local unpack = utf8.config.unpack
+
+local builder = {}
+local mt = {__index = builder}
+
+utf8.regex.compiletime.charclass.builder = builder
+
+function builder.new()
+ return setmetatable({}, mt)
+end
+
+function builder:invert()
+ self.inverted = true
+ return self
+end
+
+function builder:internal() -- is it enclosed in []
+ self.internal = true
+ return self
+end
+
+function builder:with_codes(...)
+ local codes = {...}
+ self.codes = self.codes or {}
+
+ for _, v in ipairs(codes) do
+ table.insert(self.codes, type(v) == "number" and v or byte(v))
+ end
+
+ table.sort(self.codes)
+ return self
+end
+
+function builder:with_ranges(...)
+ local ranges = {...}
+ self.ranges = self.ranges or {}
+
+ for _, v in ipairs(ranges) do
+ table.insert(self.ranges, v)
+ end
+
+ return self
+end
+
+function builder:with_classes(...)
+ local classes = {...}
+ self.classes = self.classes or {}
+
+ for _, v in ipairs(classes) do
+ table.insert(self.classes, v)
+ end
+
+ return self
+end
+
+function builder:without_classes(...)
+ local not_classes = {...}
+ self.not_classes = self.not_classes or {}
+
+ for _, v in ipairs(not_classes) do
+ table.insert(self.not_classes, v)
+ end
+
+ return self
+end
+
+function builder:include(b)
+ if not b.inverted then
+ if b.codes then
+ self:with_codes(unpack(b.codes))
+ end
+ if b.ranges then
+ self:with_ranges(unpack(b.ranges))
+ end
+ if b.classes then
+ self:with_classes(unpack(b.classes))
+ end
+ if b.not_classes then
+ self:without_classes(unpack(b.not_classes))
+ end
+ else
+ self.includes = self.includes or {}
+ self.includes[#self.includes + 1] = b
+ end
+ return self
+end
+
+function builder:build()
+ if self.codes and #self.codes == 1 and not self.inverted and not self.ranges and not self.classes and not self.not_classes and not self.includes then
+ return "{test = function(self, cc) return cc == " .. self.codes[1] .. " end}"
+ else
+ local codes_list = table.concat(self.codes or {}, ', ')
+ local ranges_list = ''
+ for i, r in ipairs(self.ranges or {}) do ranges_list = ranges_list .. (i > 1 and ', {' or '{') .. tostring(r[1]) .. ', ' .. tostring(r[2]) .. '}' end
+ local classes_list = ''
+ if self.classes then classes_list = "'" .. table.concat(self.classes, "', '") .. "'" end
+ local not_classes_list = ''
+ if self.not_classes then not_classes_list = "'" .. table.concat(self.not_classes, "', '") .. "'" end
+
+ local subs_list = ''
+ for i, r in ipairs(self.includes or {}) do subs_list = subs_list .. (i > 1 and ', ' or '') .. r:build() .. '' end
+
+ local src = [[cl.new():with_codes(
+ ]] .. codes_list .. [[
+ ):with_ranges(
+ ]] .. ranges_list .. [[
+ ):with_classes(
+ ]] .. classes_list .. [[
+ ):without_classes(
+ ]] .. not_classes_list .. [[
+ ):with_subs(
+ ]] .. subs_list .. [[
+ )]]
+
+ if self.inverted then
+ src = src .. ':invert()'
+ end
+
+ return src
+ end
+end
+
+return builder
+
+end
diff --git a/mac/.config/mpv/script-modules/utf8/charclass/compiletime/parser.lua b/mac/.config/mpv/script-modules/utf8/charclass/compiletime/parser.lua
new file mode 100644
index 0000000..4f1d4a9
--- /dev/null
+++ b/mac/.config/mpv/script-modules/utf8/charclass/compiletime/parser.lua
@@ -0,0 +1,21 @@
+return function(utf8)
+
+utf8.config.compiletime_charclasses = utf8.config.compiletime_charclasses or {
+ utf8:require "charclass.compiletime.vanilla",
+ utf8:require "charclass.compiletime.range",
+ utf8:require "charclass.compiletime.stub",
+}
+
+function utf8.regex.compiletime.charclass.parse(regex, c, bs, ctx)
+ utf8.debug("parse charclass():", regex, c, bs, regex[bs])
+ for _, p in ipairs(utf8.config.compiletime_charclasses) do
+ local charclass, nbs = p(regex, c, bs, ctx)
+ if charclass then
+ ctx.prev_class = charclass:build()
+ utf8.debug("cc", ctx.prev_class, _, c, bs, nbs)
+ return charclass, nbs
+ end
+ end
+end
+
+end
diff --git a/mac/.config/mpv/script-modules/utf8/charclass/compiletime/range.lua b/mac/.config/mpv/script-modules/utf8/charclass/compiletime/range.lua
new file mode 100644
index 0000000..2996234
--- /dev/null
+++ b/mac/.config/mpv/script-modules/utf8/charclass/compiletime/range.lua
@@ -0,0 +1,44 @@
+return function(utf8)
+
+local cl = utf8.regex.compiletime.charclass.builder
+
+local next = utf8.util.next
+
+return function(str, c, bs, ctx)
+ if not ctx.internal then return end
+
+ local nbs = bs
+
+ local r1, r2
+
+ local c, nbs = c, bs
+ if c == '%' then
+ c, nbs = next(str, nbs)
+ r1 = c
+ else
+ r1 = c
+ end
+
+ utf8.debug("range r1", r1, nbs)
+
+ c, nbs = next(str, nbs)
+ if c ~= '-' then return end
+
+ c, nbs = next(str, nbs)
+ if c == '%' then
+ c, nbs = next(str, nbs)
+ r2 = c
+ elseif c ~= '' and c ~= ']' then
+ r2 = c
+ end
+
+ utf8.debug("range r2", r2, nbs)
+
+ if r1 and r2 then
+ return cl.new():with_ranges{utf8.byte(r1), utf8.byte(r2)}, utf8.next(str, nbs) - bs
+ else
+ return
+ end
+end
+
+end
diff --git a/mac/.config/mpv/script-modules/utf8/charclass/compiletime/stub.lua b/mac/.config/mpv/script-modules/utf8/charclass/compiletime/stub.lua
new file mode 100644
index 0000000..395d05c
--- /dev/null
+++ b/mac/.config/mpv/script-modules/utf8/charclass/compiletime/stub.lua
@@ -0,0 +1,9 @@
+return function(utf8)
+
+local cl = utf8.regex.compiletime.charclass.builder
+
+return function(str, c, bs, ctx)
+ return cl.new():with_codes(c), utf8.next(str, bs) - bs
+end
+
+end
diff --git a/mac/.config/mpv/script-modules/utf8/charclass/compiletime/vanilla.lua b/mac/.config/mpv/script-modules/utf8/charclass/compiletime/vanilla.lua
new file mode 100644
index 0000000..8e7f0b3
--- /dev/null
+++ b/mac/.config/mpv/script-modules/utf8/charclass/compiletime/vanilla.lua
@@ -0,0 +1,131 @@
+return function(utf8)
+
+local cl = utf8:require "charclass.compiletime.builder"
+
+local next = utf8.util.next
+
+local token = 1
+
+local function parse(str, c, bs, ctx)
+ local tttt = token
+ token = token + 1
+
+ local class
+ local nbs = bs
+ utf8.debug("cc_parse", tttt, str, c, nbs, next(str, nbs))
+
+ if c == '%' then
+ c, nbs = next(str, bs)
+ if c == '' then
+ error("malformed pattern (ends with '%')")
+ end
+ local _c = utf8.raw.lower(c)
+ local matched
+ if _c == 'a' then
+ matched = ('alpha')
+ elseif _c == 'c' then
+ matched = ('cntrl')
+ elseif _c == 'd' then
+ matched = ('digit')
+ elseif _c == 'g' then
+ matched = ('graph')
+ elseif _c == 'l' then
+ matched = ('lower')
+ elseif _c == 'p' then
+ matched = ('punct')
+ elseif _c == 's' then
+ matched = ('space')
+ elseif _c == 'u' then
+ matched = ('upper')
+ elseif _c == 'w' then
+ matched = ('alnum')
+ elseif _c == 'x' then
+ matched = ('xdigit')
+ end
+
+ if matched then
+ if _c ~= c then
+ class = cl.new():without_classes(matched)
+ else
+ class = cl.new():with_classes(matched)
+ end
+ elseif _c == 'z' then
+ class = cl.new():with_codes(0)
+ if _c ~= c then
+ class = class:invert()
+ end
+ else
+ class = cl.new():with_codes(c)
+ end
+ elseif c == '[' and not ctx.internal then
+ local old_internal = ctx.internal
+ ctx.internal = true
+ class = cl.new()
+ local firstletter = true
+ while true do
+ local prev_nbs = nbs
+ c, nbs = next(str, nbs)
+ utf8.debug("next", tttt, c, nbs)
+ if c == '^' and firstletter then
+ class:invert()
+ local nc, nnbs = next(str, nbs)
+ if nc == ']' then
+ class:with_codes(nc)
+ nbs = nnbs
+ end
+ elseif c == ']' then
+ if firstletter then
+ class:with_codes(c)
+ else
+ utf8.debug('] on pos', tttt, nbs)
+ break
+ end
+ elseif c == '' then
+ error "malformed pattern (missing ']')"
+ else
+ local sub_class, skip = utf8.regex.compiletime.charclass.parse(str, c, nbs, ctx)
+ nbs = prev_nbs + skip
+ utf8.debug("include", tttt, bs, prev_nbs, nbs, skip)
+ class:include(sub_class)
+ end
+ firstletter = false
+ end
+ ctx.internal = old_internal
+ elseif c == '.' then
+ if not ctx.internal then
+ class = cl.new():invert()
+ else
+ class = cl.new():with_codes(c)
+ end
+ end
+
+ return class, utf8.next(str, nbs) - bs
+end
+
+return parse
+
+end
+
+--[[
+ x: (where x is not one of the magic characters ^$()%.[]*+-?) represents the character x itself.
+ .: (a dot) represents all characters.
+ %a: represents all letters.
+ %c: represents all control characters.
+ %d: represents all digits.
+ %g: represents all printable characters except space.
+ %l: represents all lowercase letters.
+ %p: represents all punctuation characters.
+ %s: represents all space characters.
+ %u: represents all uppercase letters.
+ %w: represents all alphanumeric characters.
+ %x: represents all hexadecimal digits.
+ %x: (where x is any non-alphanumeric character) represents the character x. This is the standard way to escape the magic characters. Any non-alphanumeric character (including all punctuation characters, even the non-magical) can be preceded by a '%' when used to represent itself in a pattern.
+ [set]: represents the class which is the union of all characters in set. A range of characters can be specified by separating the end characters of the range, in ascending order, with a '-'. All classes %x described above can also be used as components in set. All other characters in set represent themselves. For example, [%w_] (or [_%w]) represents all alphanumeric characters plus the underscore, [0-7] represents the octal digits, and [0-7%l%-] represents the octal digits plus the lowercase letters plus the '-' character.
+
+ You can put a closing square bracket in a set by positioning it as the first character in the set. You can put a hyphen in a set by positioning it as the first or the last character in the set. (You can also use an escape for both cases.)
+
+ The interaction between ranges and classes is not defined. Therefore, patterns like [%a-z] or [a-%%] have no meaning.
+ [^set]: represents the complement of set, where set is interpreted as above.
+
+For all classes represented by single letters (%a, %c, etc.), the corresponding uppercase letter represents the complement of the class. For instance, %S represents all non-space characters.
+]]
diff --git a/mac/.config/mpv/script-modules/utf8/charclass/runtime/base.lua b/mac/.config/mpv/script-modules/utf8/charclass/runtime/base.lua
new file mode 100644
index 0000000..33d7713
--- /dev/null
+++ b/mac/.config/mpv/script-modules/utf8/charclass/runtime/base.lua
@@ -0,0 +1,184 @@
+return function(utf8)
+
+local class = {}
+local mt = {__index = class}
+
+local utf8gensub = utf8.gensub
+
+function class.new()
+ return setmetatable({}, mt)
+end
+
+function class:invert()
+ self.inverted = true
+ return self
+end
+
+function class:with_codes(...)
+ local codes = {...}
+ self.codes = self.codes or {}
+
+ for _, v in ipairs(codes) do
+ table.insert(self.codes, v)
+ end
+
+ table.sort(self.codes)
+ return self
+end
+
+function class:with_ranges(...)
+ local ranges = {...}
+ self.ranges = self.ranges or {}
+
+ for _, v in ipairs(ranges) do
+ table.insert(self.ranges, v)
+ end
+
+ return self
+end
+
+function class:with_classes(...)
+ local classes = {...}
+ self.classes = self.classes or {}
+
+ for _, v in ipairs(classes) do
+ table.insert(self.classes, v)
+ end
+
+ return self
+end
+
+function class:without_classes(...)
+ local not_classes = {...}
+ self.not_classes = self.not_classes or {}
+
+ for _, v in ipairs(not_classes) do
+ table.insert(self.not_classes, v)
+ end
+
+ return self
+end
+
+function class:with_subs(...)
+ local subs = {...}
+ self.subs = self.subs or {}
+
+ for _, v in ipairs(subs) do
+ table.insert(self.subs, v)
+ end
+
+ return self
+end
+
+function class:in_codes(item)
+ if not self.codes or #self.codes == 0 then return nil end
+
+ local head, tail = 1, #self.codes
+ local mid = math.floor((head + tail)/2)
+ while (tail - head) > 1 do
+ if self.codes[mid] > item then
+ tail = mid
+ else
+ head = mid
+ end
+ mid = math.floor((head + tail)/2)
+ end
+ if self.codes[head] == item then
+ return true, head
+ elseif self.codes[tail] == item then
+ return true, tail
+ else
+ return false
+ end
+end
+
+function class:in_ranges(char_code)
+ if not self.ranges or #self.ranges == 0 then return nil end
+
+ for _,r in ipairs(self.ranges) do
+ if r[1] <= char_code and char_code <= r[2] then
+ return true
+ end
+ end
+ return false
+end
+
+function class:in_classes(char_code)
+ if not self.classes or #self.classes == 0 then return nil end
+
+ for _, class in ipairs(self.classes) do
+ if self:is(class, char_code) then
+ return true
+ end
+ end
+ return false
+end
+
+function class:in_not_classes(char_code)
+ if not self.not_classes or #self.not_classes == 0 then return nil end
+
+ for _, class in ipairs(self.not_classes) do
+ if self:is(class, char_code) then
+ return true
+ end
+ end
+ return false
+end
+
+function class:is(class, char_code)
+ error("not implemented")
+end
+
+function class:in_subs(char_code)
+ if not self.subs or #self.subs == 0 then return nil end
+
+ for _, c in ipairs(self.subs) do
+ if not c:test(char_code) then
+ return false
+ end
+ end
+ return true
+end
+
+function class:test(char_code)
+ local result = self:do_test(char_code)
+ -- utf8.debug('class:test', result, "'" .. (char_code and utf8.char(char_code) or 'nil') .. "'", char_code)
+ return result
+end
+
+function class:do_test(char_code)
+ if not char_code then return false end
+ local in_not_classes = self:in_not_classes(char_code)
+ if in_not_classes then
+ return not not self.inverted
+ end
+ local in_codes = self:in_codes(char_code)
+ if in_codes then
+ return not self.inverted
+ end
+ local in_ranges = self:in_ranges(char_code)
+ if in_ranges then
+ return not self.inverted
+ end
+ local in_classes = self:in_classes(char_code)
+ if in_classes then
+ return not self.inverted
+ end
+ local in_subs = self:in_subs(char_code)
+ if in_subs then
+ return not self.inverted
+ end
+ if (in_codes == nil)
+ and (in_ranges == nil)
+ and (in_classes == nil)
+ and (in_subs == nil)
+ and (in_not_classes == false) then
+ return not self.inverted
+ else
+ return not not self.inverted
+ end
+end
+
+return class
+
+end
diff --git a/mac/.config/mpv/script-modules/utf8/charclass/runtime/dummy.lua b/mac/.config/mpv/script-modules/utf8/charclass/runtime/dummy.lua
new file mode 100644
index 0000000..1faddc1
--- /dev/null
+++ b/mac/.config/mpv/script-modules/utf8/charclass/runtime/dummy.lua
@@ -0,0 +1,41 @@
+return function(utf8)
+
+local base = utf8:require "charclass.runtime.base"
+
+local dummy = setmetatable({}, {__index = base})
+local mt = {__index = dummy}
+
+function dummy.new()
+ return setmetatable({}, mt)
+end
+
+function dummy:with_classes(...)
+ local classes = {...}
+ for _, c in ipairs(classes) do
+ if c == 'alpha' then self:with_ranges({65, 90}, {97, 122})
+ elseif c == 'cntrl' then self:with_ranges({0, 31}):with_codes(127)
+ elseif c == 'digit' then self:with_ranges({48, 57})
+ elseif c == 'graph' then self:with_ranges({1, 8}, {14, 31}, {33, 132}, {134, 159}, {161, 5759}, {5761, 8191}, {8203, 8231}, {8234, 8238}, {8240, 8286}, {8288, 12287})
+ elseif c == 'lower' then self:with_ranges({97, 122})
+ elseif c == 'punct' then self:with_ranges({33, 47}, {58, 64}, {91, 96}, {123, 126})
+ elseif c == 'space' then self:with_ranges({9, 13}):with_codes(32, 133, 160, 5760):with_ranges({8192, 8202}):with_codes(8232, 8233, 8239, 8287, 12288)
+ elseif c == 'upper' then self:with_ranges({65, 90})
+ elseif c == 'alnum' then self:with_ranges({48, 57}, {65, 90}, {97, 122})
+ elseif c == 'xdigit' then self:with_ranges({48, 57}, {65, 70}, {97, 102})
+ end
+ end
+ return self
+end
+
+function dummy:without_classes(...)
+ local classes = {...}
+ if #classes > 0 then
+ return self:with_subs(dummy.new():with_classes(...):invert())
+ else
+ return self
+ end
+end
+
+return dummy
+
+end
diff --git a/mac/.config/mpv/script-modules/utf8/charclass/runtime/init.lua b/mac/.config/mpv/script-modules/utf8/charclass/runtime/init.lua
new file mode 100644
index 0000000..e71d037
--- /dev/null
+++ b/mac/.config/mpv/script-modules/utf8/charclass/runtime/init.lua
@@ -0,0 +1,22 @@
+return function(utf8)
+
+local provided = utf8.config.runtime_charclasses
+
+if provided then
+ if type(provided) == "table" then
+ return provided
+ elseif type(provided) == "function" then
+ return provided(utf8)
+ else
+ return utf8:require(provided)
+ end
+end
+
+local ffi = pcall(require, "ffi")
+if not ffi then
+ return utf8:require "charclass.runtime.dummy"
+else
+ return utf8:require "charclass.runtime.native"
+end
+
+end
diff --git a/mac/.config/mpv/script-modules/utf8/charclass/runtime/native.lua b/mac/.config/mpv/script-modules/utf8/charclass/runtime/native.lua
new file mode 100644
index 0000000..f7b7890
--- /dev/null
+++ b/mac/.config/mpv/script-modules/utf8/charclass/runtime/native.lua
@@ -0,0 +1,47 @@
+return function(utf8)
+
+os.setlocale(utf8.config.locale, "ctype")
+
+local ffi = require("ffi")
+ffi.cdef[[
+ int iswalnum(int c);
+ int iswalpha(int c);
+ int iswascii(int c);
+ int iswblank(int c);
+ int iswcntrl(int c);
+ int iswdigit(int c);
+ int iswgraph(int c);
+ int iswlower(int c);
+ int iswprint(int c);
+ int iswpunct(int c);
+ int iswspace(int c);
+ int iswupper(int c);
+ int iswxdigit(int c);
+]]
+
+local base = utf8:require "charclass.runtime.base"
+
+local native = setmetatable({}, {__index = base})
+local mt = {__index = native}
+
+function native.new()
+ return setmetatable({}, mt)
+end
+
+function native:is(class, char_code)
+ if class == 'alpha' then return ffi.C.iswalpha(char_code) ~= 0
+ elseif class == 'cntrl' then return ffi.C.iswcntrl(char_code) ~= 0
+ elseif class == 'digit' then return ffi.C.iswdigit(char_code) ~= 0
+ elseif class == 'graph' then return ffi.C.iswgraph(char_code) ~= 0
+ elseif class == 'lower' then return ffi.C.iswlower(char_code) ~= 0
+ elseif class == 'punct' then return ffi.C.iswpunct(char_code) ~= 0
+ elseif class == 'space' then return ffi.C.iswspace(char_code) ~= 0
+ elseif class == 'upper' then return ffi.C.iswupper(char_code) ~= 0
+ elseif class == 'alnum' then return ffi.C.iswalnum(char_code) ~= 0
+ elseif class == 'xdigit' then return ffi.C.iswxdigit(char_code) ~= 0
+ end
+end
+
+return native
+
+end