1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
|
return function(utf8)
local utf8sub = utf8.sub
local utf8gensub = utf8.gensub
local unpack = utf8.config.unpack
local generate_matcher_function = utf8:require 'regex_parser'
local
function get_matcher_function(regex, plain)
local res
if utf8.config.cache then
res = utf8.config.cache[plain and "plain" or "regex"][regex]
end
if res then
return res
end
res = generate_matcher_function(regex, plain)
if utf8.config.cache then
utf8.config.cache[plain and "plain" or "regex"][regex] = res
end
return res
end
local function utf8find(str, regex, init, plain)
local func = get_matcher_function(regex, plain)
init = ((init or 1) < 0) and (utf8.len(str) + init + 1) or init
local ctx, result, captures = func(str, init, utf8)
if not ctx then return nil end
utf8.debug('ctx:', ctx)
utf8.debug('result:', result)
utf8.debug('captures:', captures)
return result.start, result.finish, unpack(captures)
end
local function utf8match(str, regex, init)
local func = get_matcher_function(regex, false)
init = ((init or 1) < 0) and (utf8.len(str) + init + 1) or init
local ctx, result, captures = func(str, init, utf8)
if not ctx then return nil end
utf8.debug('ctx:', ctx)
utf8.debug('result:', result)
utf8.debug('captures:', captures)
if #captures > 0 then return unpack(captures) end
return utf8sub(str, result.start, result.finish)
end
local function utf8gmatch(str, regex)
regex = (utf8sub(regex,1,1) ~= '^') and regex or '%' .. regex
local func = get_matcher_function(regex, false)
local ctx, result, captures
local continue_pos = 1
return function()
ctx, result, captures = func(str, continue_pos, utf8)
if not ctx then return nil end
utf8.debug('ctx:', ctx)
utf8.debug('result:', result)
utf8.debug('captures:', captures)
continue_pos = math.max(result.finish + 1, result.start + 1)
if #captures > 0 then
return unpack(captures)
else
return utf8sub(str, result.start, result.finish)
end
end
end
local function replace(repl, args)
local ret = ''
if type(repl) == 'string' then
local ignore = false
local num
for _, c in utf8gensub(repl) do
if not ignore then
if c == '%' then
ignore = true
else
ret = ret .. c
end
else
num = tonumber(c)
if num then
ret = ret .. assert(args[num], "invalid capture index %" .. c)
else
ret = ret .. c
end
ignore = false
end
end
elseif type(repl) == 'table' then
ret = repl[args[1]] or args[0]
elseif type(repl) == 'function' then
ret = repl(unpack(args, 1)) or args[0]
end
return ret
end
local function utf8gsub(str, regex, repl, limit)
limit = limit or -1
local subbed = ''
local prev_sub_finish = 1
local func = get_matcher_function(regex, false)
local ctx, result, captures
local continue_pos = 1
local n = 0
while limit ~= n do
ctx, result, captures = func(str, continue_pos, utf8)
if not ctx then break end
utf8.debug('ctx:', ctx)
utf8.debug('result:', result)
utf8.debug('result:', utf8sub(str, result.start, result.finish))
utf8.debug('captures:', captures)
continue_pos = math.max(result.finish + 1, result.start + 1)
local args
if #captures > 0 then
args = {[0] = utf8sub(str, result.start, result.finish), unpack(captures)}
else
args = {[0] = utf8sub(str, result.start, result.finish)}
args[1] = args[0]
end
subbed = subbed .. utf8sub(str, prev_sub_finish, result.start - 1)
subbed = subbed .. replace(repl, args)
prev_sub_finish = result.finish + 1
n = n + 1
end
return subbed .. utf8sub(str, prev_sub_finish), n
end
-- attaching high-level functions
utf8.find = utf8find
utf8.match = utf8match
utf8.gmatch = utf8gmatch
utf8.gsub = utf8gsub
return utf8
end
|