summaryrefslogtreecommitdiff
path: root/mac/.config/mpv/script-modules/utf8/primitives/dummy.lua
diff options
context:
space:
mode:
Diffstat (limited to 'mac/.config/mpv/script-modules/utf8/primitives/dummy.lua')
-rw-r--r--mac/.config/mpv/script-modules/utf8/primitives/dummy.lua555
1 files changed, 555 insertions, 0 deletions
diff --git a/mac/.config/mpv/script-modules/utf8/primitives/dummy.lua b/mac/.config/mpv/script-modules/utf8/primitives/dummy.lua
new file mode 100644
index 0000000..a4665f5
--- /dev/null
+++ b/mac/.config/mpv/script-modules/utf8/primitives/dummy.lua
@@ -0,0 +1,555 @@
+-- $Id: utf8.lua 179 2009-04-03 18:10:03Z pasta $
+--
+-- Provides UTF-8 aware string functions implemented in pure lua:
+-- * utf8len(s)
+-- * utf8sub(s, i, j)
+-- * utf8reverse(s)
+-- * utf8char(unicode)
+-- * utf8unicode(s, i, j)
+-- * utf8gensub(s, sub_len)
+-- * utf8find(str, regex, init, plain)
+-- * utf8match(str, regex, init)
+-- * utf8gmatch(str, regex, all)
+-- * utf8gsub(str, regex, repl, limit)
+--
+-- All functions behave as their non UTF-8 aware counterparts with the exception
+-- that UTF-8 characters are used instead of bytes for all units.
+
+--[[
+Copyright (c) 2006-2007, Kyle Smith
+All rights reserved.
+
+Contributors:
+ Alimov Stepan
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the author nor the names of its contributors may be
+ used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+--]]
+
+-- ABNF from RFC 3629
+--
+-- UTF8-octets = *( UTF8-char )
+-- UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
+-- UTF8-1 = %x00-7F
+-- UTF8-2 = %xC2-DF UTF8-tail
+-- UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
+-- %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
+-- UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
+-- %xF4 %x80-8F 2( UTF8-tail )
+-- UTF8-tail = %x80-BF
+--
+return function(utf8)
+
+local byte = string.byte
+local char = string.char
+local dump = string.dump
+local find = string.find
+local format = string.format
+local len = string.len
+local lower = string.lower
+local rep = string.rep
+local sub = string.sub
+local upper = string.upper
+
+local utf8charpattern = '[%z\1-\127\194-\244][\128-\191]*'
+
+local function utf8symbollen(byte)
+ return not byte and 0 or (byte < 0x80 and 1) or (byte >= 0xF0 and 4) or (byte >= 0xE0 and 3) or (byte >= 0xC0 and 2) or 1
+end
+
+local head_table = utf8.config.int32array(256)
+for i = 0, 255 do
+ head_table[i] = utf8symbollen(i)
+end
+head_table[256] = 0
+
+local function utf8charbytes(str, bs)
+ return head_table[byte(str, bs) or 256]
+end
+
+local function utf8next(str, bs)
+ return bs + utf8charbytes(str, bs)
+end
+
+-- returns the number of characters in a UTF-8 string
+local function utf8len (str)
+ local bs = 1
+ local bytes = len(str)
+ local length = 0
+
+ while bs <= bytes do
+ length = length + 1
+ bs = utf8next(str, bs)
+ end
+
+ return length
+end
+
+-- functions identically to string.sub except that i and j are UTF-8 characters
+-- instead of bytes
+local function utf8sub (s, i, j)
+ -- argument defaults
+ j = j or -1
+
+ local bs = 1
+ local bytes = len(s)
+ local length = 0
+
+ local l = (i >= 0 and j >= 0) or utf8len(s)
+ i = (i >= 0) and i or l + i + 1
+ j = (j >= 0) and j or l + j + 1
+
+ if i > j then
+ return ""
+ end
+
+ local start, finish = 1, bytes
+
+ while bs <= bytes do
+ length = length + 1
+
+ if length == i then
+ start = bs
+ end
+
+ bs = utf8next(s, bs)
+
+ if length == j then
+ finish = bs - 1
+ break
+ end
+ end
+
+ if i > length then start = bytes + 1 end
+ if j < 1 then finish = 0 end
+
+ return sub(s, start, finish)
+end
+
+-- http://en.wikipedia.org/wiki/Utf8
+-- http://developer.coronalabs.com/code/utf-8-conversion-utility
+local function utf8char(...)
+ local codes = {...}
+ local result = {}
+
+ for _, unicode in ipairs(codes) do
+
+ if unicode <= 0x7F then
+ result[#result + 1] = unicode
+ elseif unicode <= 0x7FF then
+ local b0 = 0xC0 + math.floor(unicode / 0x40);
+ local b1 = 0x80 + (unicode % 0x40);
+ result[#result + 1] = b0
+ result[#result + 1] = b1
+ elseif unicode <= 0xFFFF then
+ local b0 = 0xE0 + math.floor(unicode / 0x1000);
+ local b1 = 0x80 + (math.floor(unicode / 0x40) % 0x40);
+ local b2 = 0x80 + (unicode % 0x40);
+ result[#result + 1] = b0
+ result[#result + 1] = b1
+ result[#result + 1] = b2
+ elseif unicode <= 0x10FFFF then
+ local code = unicode
+ local b3= 0x80 + (code % 0x40);
+ code = math.floor(code / 0x40)
+ local b2= 0x80 + (code % 0x40);
+ code = math.floor(code / 0x40)
+ local b1= 0x80 + (code % 0x40);
+ code = math.floor(code / 0x40)
+ local b0= 0xF0 + code;
+
+ result[#result + 1] = b0
+ result[#result + 1] = b1
+ result[#result + 1] = b2
+ result[#result + 1] = b3
+ else
+ error 'Unicode cannot be greater than U+10FFFF!'
+ end
+
+ end
+
+ return char(utf8.config.unpack(result))
+end
+
+
+local shift_6 = 2^6
+local shift_12 = 2^12
+local shift_18 = 2^18
+
+local utf8unicode
+utf8unicode = function(str, ibs, jbs)
+ if ibs > jbs then return end
+
+ local ch,bytes
+
+ bytes = utf8charbytes(str, ibs)
+ if bytes == 0 then return end
+
+ local unicode
+
+ if bytes == 1 then unicode = byte(str, ibs, ibs) end
+ if bytes == 2 then
+ local byte0,byte1 = byte(str, ibs, ibs + 1)
+ if byte0 and byte1 then
+ local code0,code1 = byte0-0xC0,byte1-0x80
+ unicode = code0*shift_6 + code1
+ else
+ unicode = byte0
+ end
+ end
+ if bytes == 3 then
+ local byte0,byte1,byte2 = byte(str, ibs, ibs + 2)
+ if byte0 and byte1 and byte2 then
+ local code0,code1,code2 = byte0-0xE0,byte1-0x80,byte2-0x80
+ unicode = code0*shift_12 + code1*shift_6 + code2
+ else
+ unicode = byte0
+ end
+ end
+ if bytes == 4 then
+ local byte0,byte1,byte2,byte3 = byte(str, ibs, ibs + 3)
+ if byte0 and byte1 and byte2 and byte3 then
+ local code0,code1,code2,code3 = byte0-0xF0,byte1-0x80,byte2-0x80,byte3-0x80
+ unicode = code0*shift_18 + code1*shift_12 + code2*shift_6 + code3
+ else
+ unicode = byte0
+ end
+ end
+
+ if ibs == jbs then
+ return unicode
+ else
+ return unicode,utf8unicode(str, ibs+bytes, jbs)
+ end
+end
+
+local function utf8byte(str, i, j)
+ if #str == 0 then return end
+
+ local ibs, jbs
+
+ if i or j then
+ i = i or 1
+ j = j or i
+
+ local str_len = utf8len(str)
+ i = i < 0 and str_len + i + 1 or i
+ j = j < 0 and str_len + j + 1 or j
+ j = j > str_len and str_len or j
+
+ if i > j then return end
+
+ for p = 1, i - 1 do
+ ibs = utf8next(str, ibs or 1)
+ end
+
+ if i == j then
+ jbs = ibs
+ else
+ for p = 1, j - 1 do
+ jbs = utf8next(str, jbs or 1)
+ end
+ end
+
+ if not ibs or not jbs then
+ return nil
+ end
+ else
+ ibs, jbs = 1, 1
+ end
+
+ return utf8unicode(str, ibs, jbs)
+end
+
+local function utf8gensub(str, sub_len)
+ sub_len = sub_len or 1
+ local max_len = #str
+ return function(skip_ptr, bs)
+ bs = (bs and bs or 1) + (skip_ptr and (skip_ptr[1] or 0) or 0)
+
+ local nbs = bs
+ if bs > max_len then return nil end
+ for i = 1, sub_len do
+ nbs = utf8next(str, nbs)
+ end
+
+ return nbs, sub(str, bs, nbs - 1), bs
+ end
+end
+
+local function utf8reverse (s)
+ local result = ''
+ for _, w in utf8gensub(s) do result = w .. result end
+ return result
+end
+
+local function utf8validator(str, bs)
+ bs = bs or 1
+
+ if type(str) ~= "string" then
+ error("bad argument #1 to 'utf8charbytes' (string expected, got ".. type(str).. ")")
+ end
+ if type(bs) ~= "number" then
+ error("bad argument #2 to 'utf8charbytes' (number expected, got ".. type(bs).. ")")
+ end
+
+ local c = byte(str, bs)
+ if not c then return end
+
+ -- determine bytes needed for character, based on RFC 3629
+
+ -- UTF8-1
+ if c >= 0 and c <= 127 then
+ return bs + 1
+ elseif c >= 128 and c <= 193 then
+ return bs + 1, bs, 1, c
+ -- UTF8-2
+ elseif c >= 194 and c <= 223 then
+ local c2 = byte(str, bs + 1)
+ if not c2 or c2 < 128 or c2 > 191 then
+ return bs + 2, bs, 2, c2
+ end
+
+ return bs + 2
+ -- UTF8-3
+ elseif c >= 224 and c <= 239 then
+ local c2 = byte(str, bs + 1)
+
+ if not c2 then
+ return bs + 2, bs, 2, c2
+ end
+
+ -- validate byte 2
+ if c == 224 and (c2 < 160 or c2 > 191) then
+ return bs + 2, bs, 2, c2
+ elseif c == 237 and (c2 < 128 or c2 > 159) then
+ return bs + 2, bs, 2, c2
+ elseif c2 < 128 or c2 > 191 then
+ return bs + 2, bs, 2, c2
+ end
+
+ local c3 = byte(str, bs + 2)
+ if not c3 or c3 < 128 or c3 > 191 then
+ return bs + 3, bs, 3, c3
+ end
+
+ return bs + 3
+ -- UTF8-4
+ elseif c >= 240 and c <= 244 then
+ local c2 = byte(str, bs + 1)
+
+ if not c2 then
+ return bs + 2, bs, 2, c2
+ end
+
+ -- validate byte 2
+ if c == 240 and (c2 < 144 or c2 > 191) then
+ return bs + 2, bs, 2, c2
+ elseif c == 244 and (c2 < 128 or c2 > 143) then
+ return bs + 2, bs, 2, c2
+ elseif c2 < 128 or c2 > 191 then
+ return bs + 2, bs, 2, c2
+ end
+
+ local c3 = byte(str, bs + 2)
+ if not c3 or c3 < 128 or c3 > 191 then
+ return bs + 3, bs, 3, c3
+ end
+
+ local c4 = byte(str, bs + 3)
+ if not c4 or c4 < 128 or c4 > 191 then
+ return bs + 4, bs, 4, c4
+ end
+
+ return bs + 4
+ else -- c > 245
+ return bs + 1, bs, 1, c
+ end
+end
+
+local function utf8validate(str, byte_pos)
+ local result = {}
+ for nbs, bs, part, code in utf8validator, str, byte_pos do
+ if bs then
+ result[#result + 1] = { pos = bs, part = part, code = code }
+ end
+ end
+ return #result == 0, result
+end
+
+local function utf8codes(str)
+ local max_len = #str
+ local bs = 1
+ return function(skip_ptr)
+ if bs > max_len then return nil end
+ local pbs = bs
+ bs = utf8next(str, pbs)
+
+ return pbs, utf8unicode(str, pbs, pbs), pbs
+ end
+end
+
+
+--[[--
+differs from Lua 5.3 utf8.offset in accepting any byte positions (not only head byte) for all n values
+
+h - head, c - continuation, t - tail
+hhhccthccthccthcthhh
+ ^ start byte pos
+searching current charracter head by moving backwards
+hhhccthccthccthcthhh
+ ^ head
+
+n == 0: current position
+n > 0: n jumps forward
+n < 0: n more scans backwards
+--]]--
+local function utf8offset(str, n, bs)
+ local l = #str
+ if not bs then
+ if n < 0 then
+ bs = l + 1
+ else
+ bs = 1
+ end
+ end
+ if bs <= 0 or bs > l + 1 then
+ error("bad argument #3 to 'offset' (position out of range)")
+ end
+
+ if n == 0 then
+ if bs == l + 1 then
+ return bs
+ end
+ while true do
+ local b = byte(str, bs)
+ if (0 < b and b < 127)
+ or (194 < b and b < 244) then
+ return bs
+ end
+ bs = bs - 1
+ if bs < 1 then
+ return
+ end
+ end
+ elseif n < 0 then
+ bs = bs - 1
+ repeat
+ if bs < 1 then
+ return
+ end
+
+ local b = byte(str, bs)
+ if (0 < b and b < 127)
+ or (194 < b and b < 244) then
+ n = n + 1
+ end
+ bs = bs - 1
+ until n == 0
+ return bs + 1
+ else
+ while true do
+ if bs > l then
+ return
+ end
+
+ local b = byte(str, bs)
+ if (0 < b and b < 127)
+ or (194 < b and b < 244) then
+ n = n - 1
+ for i = 1, n do
+ if bs > l then
+ return
+ end
+ bs = utf8next(str, bs)
+ end
+ return bs
+ end
+ bs = bs - 1
+ end
+ end
+
+end
+
+local function utf8replace (s, mapping)
+ if type(s) ~= "string" then
+ error("bad argument #1 to 'utf8replace' (string expected, got ".. type(s).. ")")
+ end
+ if type(mapping) ~= "table" then
+ error("bad argument #2 to 'utf8replace' (table expected, got ".. type(mapping).. ")")
+ end
+ local result = utf8.raw.gsub( s, utf8charpattern, mapping )
+ return result
+end
+
+local function utf8upper (s)
+ return utf8replace(s, utf8.config.conversion.lc_uc)
+end
+
+if utf8.config.conversion.lc_uc then
+ upper = utf8upper
+end
+
+local function utf8lower (s)
+ return utf8replace(s, utf8.config.conversion.uc_lc)
+end
+
+if utf8.config.conversion.uc_lc then
+ lower = utf8lower
+end
+
+utf8.len = utf8len
+utf8.sub = utf8sub
+utf8.reverse = utf8reverse
+utf8.char = utf8char
+utf8.unicode = utf8unicode
+utf8.byte = utf8byte
+utf8.next = utf8next
+utf8.gensub = utf8gensub
+utf8.validator = utf8validator
+utf8.validate = utf8validate
+utf8.dump = dump
+utf8.format = format
+utf8.lower = lower
+utf8.upper = upper
+utf8.rep = rep
+utf8.raw = {}
+for k,v in pairs(string) do
+ utf8.raw[k] = v
+end
+
+utf8.charpattern = utf8charpattern
+utf8.offset = utf8offset
+if _VERSION == 'Lua 5.3' then
+ local utf8_53 = require "utf8"
+ utf8.codes = utf8_53.codes
+ utf8.codepoint = utf8_53.codepoint
+ utf8.len53 = utf8_53.len
+else
+ utf8.codes = utf8codes
+ utf8.codepoint = utf8unicode
+end
+
+return utf8
+
+end