1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include <linux/module.h>
30#include <linux/types.h>
31#include <linux/string.h>
32#include <linux/ctype.h>
33#include <linux/textsearch.h>
34#include <linux/textsearch_fsm.h>
35
36struct ts_fsm
37{
38 unsigned int ntokens;
39 struct ts_fsm_token tokens[0];
40};
41
42
43#define _A 0x100
44#define _W 0x200
45
46
47static const u16 token_map[TS_FSM_TYPE_MAX+1] = {
48 [TS_FSM_SPECIFIC] = 0,
49 [TS_FSM_WILDCARD] = _W,
50 [TS_FSM_CNTRL] = _C,
51 [TS_FSM_LOWER] = _L,
52 [TS_FSM_UPPER] = _U,
53 [TS_FSM_PUNCT] = _P,
54 [TS_FSM_SPACE] = _S,
55 [TS_FSM_DIGIT] = _D,
56 [TS_FSM_XDIGIT] = _D | _X,
57 [TS_FSM_ALPHA] = _U | _L,
58 [TS_FSM_ALNUM] = _U | _L | _D,
59 [TS_FSM_PRINT] = _P | _U | _L | _D | _SP,
60 [TS_FSM_GRAPH] = _P | _U | _L | _D,
61 [TS_FSM_ASCII] = _A,
62};
63
64static const u16 token_lookup_tbl[256] = {
65_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C,
66_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C,
67_W|_A|_C, _W|_A|_C|_S, _W|_A|_C|_S, _W|_A|_C|_S,
68_W|_A|_C|_S, _W|_A|_C|_S, _W|_A|_C, _W|_A|_C,
69_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C,
70_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C,
71_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C,
72_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C,
73_W|_A|_S|_SP, _W|_A|_P, _W|_A|_P, _W|_A|_P,
74_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P,
75_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P,
76_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P,
77_W|_A|_D, _W|_A|_D, _W|_A|_D, _W|_A|_D,
78_W|_A|_D, _W|_A|_D, _W|_A|_D, _W|_A|_D,
79_W|_A|_D, _W|_A|_D, _W|_A|_P, _W|_A|_P,
80_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P,
81_W|_A|_P, _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U|_X,
82_W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U,
83_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U,
84_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U,
85_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U,
86_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U,
87_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_P,
88_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P,
89_W|_A|_P, _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L|_X,
90_W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L,
91_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L,
92_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L,
93_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L,
94_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L,
95_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_P,
96_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_C,
97_W, _W, _W, _W,
98_W, _W, _W, _W,
99_W, _W, _W, _W,
100_W, _W, _W, _W,
101_W, _W, _W, _W,
102_W, _W, _W, _W,
103_W, _W, _W, _W,
104_W, _W, _W, _W,
105_W|_S|_SP, _W|_P, _W|_P, _W|_P,
106_W|_P, _W|_P, _W|_P, _W|_P,
107_W|_P, _W|_P, _W|_P, _W|_P,
108_W|_P, _W|_P, _W|_P, _W|_P,
109_W|_P, _W|_P, _W|_P, _W|_P,
110_W|_P, _W|_P, _W|_P, _W|_P,
111_W|_P, _W|_P, _W|_P, _W|_P,
112_W|_P, _W|_P, _W|_P, _W|_P,
113_W|_U, _W|_U, _W|_U, _W|_U,
114_W|_U, _W|_U, _W|_U, _W|_U,
115_W|_U, _W|_U, _W|_U, _W|_U,
116_W|_U, _W|_U, _W|_U, _W|_U,
117_W|_U, _W|_U, _W|_U, _W|_U,
118_W|_U, _W|_U, _W|_U, _W|_P,
119_W|_U, _W|_U, _W|_U, _W|_U,
120_W|_U, _W|_U, _W|_U, _W|_L,
121_W|_L, _W|_L, _W|_L, _W|_L,
122_W|_L, _W|_L, _W|_L, _W|_L,
123_W|_L, _W|_L, _W|_L, _W|_L,
124_W|_L, _W|_L, _W|_L, _W|_L,
125_W|_L, _W|_L, _W|_L, _W|_L,
126_W|_L, _W|_L, _W|_L, _W|_P,
127_W|_L, _W|_L, _W|_L, _W|_L,
128_W|_L, _W|_L, _W|_L, _W|_L};
129
130static inline int match_token(struct ts_fsm_token *t, u8 d)
131{
132 if (t->type)
133 return (token_lookup_tbl[d] & t->type) != 0;
134 else
135 return t->value == d;
136}
137
138static unsigned int fsm_find(struct ts_config *conf, struct ts_state *state)
139{
140 struct ts_fsm *fsm = ts_config_priv(conf);
141 struct ts_fsm_token *cur = NULL, *next;
142 unsigned int match_start, block_idx = 0, tok_idx;
143 unsigned block_len = 0, strict, consumed = state->offset;
144 const u8 *data;
145
146#define GET_NEXT_BLOCK() \
147({ consumed += block_idx; \
148 block_idx = 0; \
149 block_len = conf->get_next_block(consumed, &data, conf, state); })
150
151#define TOKEN_MISMATCH() \
152 do { \
153 if (strict) \
154 goto no_match; \
155 block_idx++; \
156 goto startover; \
157 } while(0)
158
159#define end_of_data() unlikely(block_idx >= block_len && !GET_NEXT_BLOCK())
160
161 if (end_of_data())
162 goto no_match;
163
164 strict = fsm->tokens[0].recur != TS_FSM_HEAD_IGNORE;
165
166startover:
167 match_start = consumed + block_idx;
168
169 for (tok_idx = 0; tok_idx < fsm->ntokens; tok_idx++) {
170 cur = &fsm->tokens[tok_idx];
171
172 if (likely(tok_idx < (fsm->ntokens - 1)))
173 next = &fsm->tokens[tok_idx + 1];
174 else
175 next = NULL;
176
177 switch (cur->recur) {
178 case TS_FSM_SINGLE:
179 if (end_of_data())
180 goto no_match;
181
182 if (!match_token(cur, data[block_idx]))
183 TOKEN_MISMATCH();
184 break;
185
186 case TS_FSM_PERHAPS:
187 if (end_of_data() ||
188 !match_token(cur, data[block_idx]))
189 continue;
190 break;
191
192 case TS_FSM_MULTI:
193 if (end_of_data())
194 goto no_match;
195
196 if (!match_token(cur, data[block_idx]))
197 TOKEN_MISMATCH();
198
199 block_idx++;
200
201
202 case TS_FSM_ANY:
203 if (next == NULL)
204 goto found_match;
205
206 if (end_of_data())
207 continue;
208
209 while (!match_token(next, data[block_idx])) {
210 if (!match_token(cur, data[block_idx]))
211 TOKEN_MISMATCH();
212 block_idx++;
213 if (end_of_data())
214 goto no_match;
215 }
216 continue;
217
218
219
220
221
222 case TS_FSM_HEAD_IGNORE:
223 if (end_of_data())
224 continue;
225
226 while (!match_token(next, data[block_idx])) {
227
228
229
230
231
232
233 if (!match_token(cur, data[block_idx]))
234 goto no_match;
235
236 block_idx++;
237 if (end_of_data())
238 goto no_match;
239 }
240
241 match_start = consumed + block_idx;
242 continue;
243 }
244
245 block_idx++;
246 }
247
248 if (end_of_data())
249 goto found_match;
250
251no_match:
252 return UINT_MAX;
253
254found_match:
255 state->offset = consumed + block_idx;
256 return match_start;
257}
258
259static struct ts_config *fsm_init(const void *pattern, unsigned int len,
260 gfp_t gfp_mask)
261{
262 int i, err = -EINVAL;
263 struct ts_config *conf;
264 struct ts_fsm *fsm;
265 struct ts_fsm_token *tokens = (struct ts_fsm_token *) pattern;
266 unsigned int ntokens = len / sizeof(*tokens);
267 size_t priv_size = sizeof(*fsm) + len;
268
269 if (len % sizeof(struct ts_fsm_token) || ntokens < 1)
270 goto errout;
271
272 for (i = 0; i < ntokens; i++) {
273 struct ts_fsm_token *t = &tokens[i];
274
275 if (t->type > TS_FSM_TYPE_MAX || t->recur > TS_FSM_RECUR_MAX)
276 goto errout;
277
278 if (t->recur == TS_FSM_HEAD_IGNORE &&
279 (i != 0 || i == (ntokens - 1)))
280 goto errout;
281 }
282
283 conf = alloc_ts_config(priv_size, gfp_mask);
284 if (IS_ERR(conf))
285 return conf;
286
287 fsm = ts_config_priv(conf);
288 fsm->ntokens = ntokens;
289 memcpy(fsm->tokens, pattern, len);
290
291 for (i = 0; i < fsm->ntokens; i++) {
292 struct ts_fsm_token *t = &fsm->tokens[i];
293 t->type = token_map[t->type];
294 }
295
296 return conf;
297
298errout:
299 return ERR_PTR(err);
300}
301
302static void *fsm_get_pattern(struct ts_config *conf)
303{
304 struct ts_fsm *fsm = ts_config_priv(conf);
305 return fsm->tokens;
306}
307
308static unsigned int fsm_get_pattern_len(struct ts_config *conf)
309{
310 struct ts_fsm *fsm = ts_config_priv(conf);
311 return fsm->ntokens * sizeof(struct ts_fsm_token);
312}
313
314static struct ts_ops fsm_ops = {
315 .name = "fsm",
316 .find = fsm_find,
317 .init = fsm_init,
318 .get_pattern = fsm_get_pattern,
319 .get_pattern_len = fsm_get_pattern_len,
320 .owner = THIS_MODULE,
321 .list = LIST_HEAD_INIT(fsm_ops.list)
322};
323
324static int __init init_fsm(void)
325{
326 return textsearch_register(&fsm_ops);
327}
328
329static void __exit exit_fsm(void)
330{
331 textsearch_unregister(&fsm_ops);
332}
333
334MODULE_LICENSE("GPL");
335
336module_init(init_fsm);
337module_exit(exit_fsm);
338