-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathentities.py
642 lines (627 loc) · 47.3 KB
/
entities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
# -*- coding: UTF-8 -*-
# common intents
def common():
return {
"yes": [{"stem": "y", "max_words": 1}, {"stem": "yes"}, {"stem": "igen", "exc": [{"stem": "de"}]},
{"stem": "aha"}, {"stem": "ja", "affix": ["ja", "h"], "exc": [{"stem": "de"}]},
{"stem": "ok", "affix": ["é", "s", "és", "sa", "ay"], "exc": [{"stem": "de"}, {"stem": "nem"}]},
{"stem": "jól", "inc": [{"stem": "ért", "wordclass": "verb"}],
"exc": [{"stem": "de"}, {"stem": "nem"}]}, {"stem": "rendben", "exc": [{"stem": "nincs"}]},
{"stem": "biztos", "affix": ["an"], "exc": [{"stem": "nem"}, {"stem": "sem"}]},
{"stem": "akarom", "exc": [{"stem": "nem"}, {"stem": "sem"}]},
{"stem": "szeretné", "match_stem": False, "affix": ["k", "m"],
"exc": [{"stem": "nem"}, {"stem": "sem"}], "max_words": 2},
{"stem": "kére", "match_stem": False, "affix": ["k", "m"], "exc": [{"stem": "nem"}, {"stem": "sem"}],
"max_words": 2}, {"stem": "naná"}, {"stem": "nyugodtan", "max_words": 3},
{"stem": "persze", "max_words": 1}, {"stem": "nagyon", "max_words": 1}],
"no": [{"stem": "n", "max_words": 1}, {"stem": "no", "max_words": 3}, {"stem": "nem", "exc": [{"stem": "megy"},
{"stem": "baj"},
{"stem": "tud",
"wordclass": "verb"},
{"stem": "ért",
"wordclass": "verb"}]},
{"stem": "ne", "exc": [{"stem": "haragudj", "affix": ["on"]}]}, {"stem": "soha"},
{"stem": "mégse", "affix": ["m"]}, {"stem": "ros+z\s(v[aá]lasz|vic+|megold[aá]s)", "wordclass": "regex"},
{"stem": "nincs rendben"}, {"stem": "buta vagy"}],
"hi": [{"stem": "ha?i+", "wordclass": "regex"}, {"stem": "s+z+i+[aoó](ka|sztok)?", "wordclass": "regex"},
{"stem": "helló", "affix": ["ka"]}, {"stem": "szer?[bv][au]sz(tok)?", "wordclass": "regex"},
{"stem": "hali", "affix": ["hó"]},
{"stem": "(sz[eé]p|j[oó])\s?(reg+el|nap|est[eé])(o?t|[eéuü]nk)", "wordclass": "regex"},
{"stem": "[uü]dv([oö]z[oö]?l+(e[kt])?([eoö]m)?)?", "wordclass": "regex"}, {"stem": "örvendek"}],
"bye": [{"stem": "bye"}, {"stem": "viszlát"}, {"stem": "viszont látásra"},
{"stem": "jó éj", "affix": ["t", "szakát"]}, {"stem": "jóccakát"}, {"stem": "mennem kell"},
{"stem": "csumi"}, {"stem": "cs[aáoöő]+[oó]*(v[aá]z?)?", "wordclass": "regex"},
{"stem": "puszi", "affix": ["llak"], "max_words": 3}, {"stem": "aludni"},
{"stem": "feksze", "prefix": ["le"], "affix": ["k", "m"], "match_stem": False}],
"thx": [{
"stem": "(ezer\s?)?(k[oö]s+z|k[oösz][oösz][oösz])(i(ke)?|ke|[oö]n[oö]m|[oö]nj[uü]k|[eoö]net(em)?|csi|ent+y[uüű])?(\s?sz[eé]pen)?",
"wordclass": "regex"}, {"stem": "[ht][ht]x", "wordclass": "regex"},
{"stem": "t(ha|h?e)nks?\s?(you)?", "wordclass": "regex"}, {"stem": "danke"},
{"stem": "neked is", "max_words": 3}, {"stem": "magának is", "max_words": 3},
{"stem": "önnek is", "max_words": 3}],
"pls": [{"stem": "p+l+[iíea]*[zs]+e*", "wordclass": "regex"},
{"stem": "l[eé]+[cgyt]+[sz]*[ií]+(ves|keh?)?", "wordclass": "regex"},
{"stem": "l[eé](sz(el)?|gy(en)?|n+[eé]l).*?(kedves|sz[ií](ves)?)", "wordclass": "regex"},
{"stem": "szeretn[eé](k|m)", "wordclass": "regex", "exc": [
{"stem": "(meg)?bocs(i(ka)?|[aá](nat([aá][eé]rt)?|nat[aáo]t?|s+|s+on|j?t(ana)?))?",
"wordclass": "regex"}, {"stem": "elnézés", "wordclass": "noun", "match_stem": False}]},
{"stem": "(meg)?k[eé]r(het)?((n[eéi])?l?e?[km]?)", "wordclass": "regex", "exc": [
{"stem": "(meg)?bocs(i(ka)?|[aá](nat([aá][eé]rt)?|nat[aáo]t?|s+|s+on|j?t(ana)?))?",
"wordclass": "regex"}, {"stem": "elnézés", "wordclass": "noun", "match_stem": False}]},
{"stem": "szeretn[eé]([km]|lek)", "wordclass": "regex", "exc": [
{"stem": "(meg)?bocs(i(ka)?|[aá](nat([aá][eé]rt)?|nat[aáo]t?|s+|s+on|j?t(ana)?))?",
"wordclass": "regex"}, {"stem": "elnézés", "wordclass": "noun", "match_stem": False}]}],
"welks": [{"stem": "nincs mit"},
{"stem": "(nagyon\s?)?(is\s)?sz[ií]ves(en|\s?[oö]r[oö]mest)", "wordclass": "regex"},
{"stem": "ugyan\,?\shag[gy]\w{1,3}", "wordclass": "regex"},
{"stem": "hag[gy]\w{1,3}\scsak", "wordclass": "regex"},
{"stem": "sz[aá]momra.+?([oö]r[oö]m|megtiszteltet[eé]s)", "wordclass": "regex"},
{"stem": "gyors volt"}],
"sorry": [{"stem": "(meg)?bocs(i(ka)?|esz|[aá](nat([aá][eé]rt)?|nat[aáo]t?|s+|s+on|j?t(ana)?))?",
"wordclass": "regex"}, {"stem": "elnézés", "wordclass": "noun", "match_stem": False},
{"stem": "sajn[aá]l(om|juk)", "wordclass": "regex"},
{"stem": "s+z*o*r+[iy]+(ka)?", "wordclass": "regex"}],
"lol": [{"stem": "(h[aei]){2,}h?", "wordclass": "regex"}, {"stem": "o?(lol)+o?", "wordclass": "regex"},
{"stem": "[\:\;]\-*[dp\)9]+", "wordclass": "regex", "boundary": False},
{"stem": "[\(8]+\-*[:;]", "wordclass": "regex", "boundary": False},
{"stem": "rot?fl", "wordclass": "regex"}, {"stem": "vicces", "exc": [{"stem": "nem"}]},
{"stem": "nevet(tem|ek|[uü]nk)", "wordclass": "regex", "exc": [{"stem": "nem"}]}],
"nvm": [{"stem": "felejts[ed]n?\sel", "wordclass": "regex"},
{"stem": "mindegy", "exc": [{"stem": "hogy"}, {"stem": "nem"}]}, {"stem": "nem fontos"},
{"stem": "hagy(jad?|d)", "wordclass": "regex",
"inc": [{"stem": "jól", "affix": ["van"]}, {"stem": "á"}, {"stem": "mindegy"}, {"stem": "inkább"}]}, {
"stem": "ne\s(is\s)?(foglalkoz+(on|[aá]l)?|t[oö]r[oöő]dj([oö]n|[eé]l)?)\s(vel(e|[uü]k)|[ae][vz]+[ae]l)",
"wordclass": "regex"},
{"stem": "hagy\w+\sfigyelmen\sk[ií]v[uü]l", "wordclass": "regex", "exc": [{"stem": "ne"}]},
{"stem": "nem baj", "max_words": 2}],
"help": [{"stem": "segít", "wordclass": "verb", "prefix": [], "exc": [{"stem": "nem"}, {"stem": "miben"}]},
{"stem": "segítség", "wordclass": "noun", "exc": [{"stem": "nem"}]},
{"stem": "help", "wordclass": "verb", "prefix": [], "exc": [{"stem": "nem"}]}],
"again": [{"stem": "[uú]j(ra|b[oó]l|at)|(meg)?ism[eé]t(l[eé]s|el(je|ni)?d?)?|m[eé]g\s?eg+y(szer|et)|megint?",
"wordclass": "regex", "exc": [{"stem": "vagyok"}, {"stem": "ne", "affix": ["m"]}]}],
"command": [
{"stem": "(csin[aá]l(jad?|d)|(keres|mutas|mond)[aedjos]+n?|n[eé]z[nz]?[eé]?[dl]|akaro[km]|utas[ií]t\w{1,})",
"wordclass": "regex"}, {"stem": "haj[cts]+(a|[aá]?[ld])\sv[eé]gre", "wordclass": "regex"}],
"question": [{"stem": "(\?+$)|(\?+\s\w+)", "wordclass": "regex"},
{"stem": "([^,][^,\S+]hogy|^hogy)(an)?", "wordclass": "regex"}, {"stem": "hol"},
{"stem": "honnan"}, {"stem": "hová"}, {"stem": "hány", "affix": ["an", "at", "ból"]},
{"stem": "mettől"}, {"stem": "meddig"}, {"stem": "merre"},
{"stem": "mennyi", "affix": ["en", "re"]}, {"stem": "mi",
"affix": ["t", "k", "ket", "kor", "korra", "lyen",
"lyenek", "nek", "től", "kortól", "korra",
"ből", "hez", "re", "vel"]},
{"stem": "ki(k?(e?t|nek|[bt][oöő]l|hez|re|[kv]el)|\saz?)", "wordclass": "regex",
"exc": [{"stem": "\w+[ad]\ski", "wordclass": "regex"}]}],
"conditional": [{"stem": "(meg)?(vol|tud|[lt]en?)n[aáeé][dl]?", "wordclass": "regex"},
{"stem": "\w+h[ae]t\w+", "wordclass": "regex"}],
"profanity": [{
"stem": "(fel|le|meg|r[aá]|ki|be|oda|[oö]s+ze|bele|hoz+[aá])?bas*z+d?(at)?(hat)?\s?(us|a[dk]?|n?[aá][kl]|[aá]?t[aáo][lkm]?|ot+|ni|n[aá]n?[dlkm]?|va|meg|ki)?",
"wordclass": "regex", "exc": [{"stem": "megye"}]},
{"stem": "fasz", "prefix": ["ló", "agy"], "wordclass": "noun"},
{"stem": "fasza", "wordclass": "adjective"}, {"stem": "geci", "wordclass": "noun"},
{"stem": "kurva", "affix": ["élet", "anya", "anyja", "annya"], "wordclass": "noun"},
{"stem": "hülye", "wordclass": "adjective"},
{"stem": "pi(n|cs)[aá][dk]?(a?t|nak|ban?|[bt][oó]l|[eé]rt)?", "wordclass": "regex"},
{"stem": "((bekap(ja?|hato?|n[aái])?d?)|(kap.*?be))", "wordclass": "regex"},
{"stem": "(le)?szop(sz|ol|[jn][aá][dl]|hat(sz|n[aá]l|o[dl]))(\s?(le|ki))?", "wordclass": "regex"},
{"stem": "(geci|kurva)?(fos|szar)\w{0,3}", "wordclass": "regex"},
{"stem": "szar", "wordclass": "verb"}],
"welldone": [{"stem": "fasza"}, {"stem": "nagyszerű"}, {"stem": "remek", "max_words": 5},
{"stem": "jó", "prefix": ["kurva"],
"exc": [{"stem": "de"}, {"stem": "nincs"}, {"stem": "nem"}, {"stem": "éjt"}, {"stem": "reggelt"},
{"stem": "napot"}, {"stem": "estét"}, {"stem": "éjszakát"}]},
{"stem": "j[oó]l\s?van", "wordclass": "regex"}, {"stem": "király"},
{"stem": "[uü]gy(es|i\sv(agy|olt\w*))", "wordclass": "regex"},
{"stem": "(sz[eé]p\s(volt|munka))|(ez\s(lesz\s)?az)|(sz?uper)|zs[ií]r", "wordclass": "regex"},
{"stem": "👍", "wordclass": "emoji"}, {"stem": "\(Y\)", "wordclass": "regex", "boundary": False},
{"stem": "profi vagy"}, {"stem": "fant[aoö](rp|sz?t)i[ck](us)?(an)?", "wordclass": "regex"},
{"stem": "szeretem", "inc": [{"stem": "amikor"}, {"stem": "ahogy"}], "exc": [{"stem": "nem"}]},
{"stem": "azt a kurva"}, {"stem": "menő", "prefix": ["geci"], "max_words": 3},
{"stem": "zsír", "max_words": 3}, {"stem": "fasza", "prefix": ["nagyon"], "max_words": 3},
{"stem": "helyes", "max_words": 3,
"exc": [{"stem": "(let+|volna|mond\w*|[ns]em?)", "wordclass": "regex"}]}],
"dontknow": [{"stem": "fogalmam sincs", "affix": ["en"]},
{"stem": "(m[eé]g)?[ns]em?\stud(hat)?o\w+", "wordclass": "regex", "exc": [{"stem": "nem tudod"}]},
{"stem": "hon+an.+?tud(jam|(hat)?n[aá]m)", "wordclass": "regex"}],
"dontunderstand": [
{"stem": "(m[eé]g)?[ns]em?\s([eé]rte(t+e)?[lm](ek)?|v[aá]gom|hal+[ao](t+a)?[km])", "wordclass": "regex"},
{"stem": "(mit|hogy(an)?)\s([eé]rte(t+[eé])?|mond(t[aá])?o?)(sz|d|l)", "wordclass": "regex"},
{"stem": "meg\s?ism[eé]tel(het)?n\w+", "wordclass": "regex"},
{"stem": "ism[eé]tel[dj]\w*\s?meg", "wordclass": "regex"}],
}
# menu commands
def commands():
return {
"ok": [{"stem": "[jy]+e+a*[hps]*", "wordclass": "regex"}, {"stem": "igen"}, {"stem": "aha"},
{"stem": "ja", "affix": ["ja", "h"]},
{"stem": "ok", "affix": ["é", "s", "és", "sa", "ay", "ézd", "ézza"], "exc": [{"stem": "nem"}]},
{"stem": "úgy", "exc": [{"stem": "nem"}]},
{"stem": "így", "exc": [{"stem": "((m[eé]g)?[ns]em*i?|baj)", "wordclass": "regex"}]},
{"stem": "jó", "wordclass": "adjective", "max_words": 4,
"exc": [{"stem": "((m[eé]g)?[ns]em*i?|baj)", "wordclass": "regex"},
{"stem": "(nap\w+|reg+elt|est[eé]t)", "wordclass": "regex"}]}],
"cancel": [{"stem": "^([ae]z\s)?(\w+\s)?(nem?|no(pe|ne)?)(\s\w+)?(\s\w+)?$", "boundary": False,
"exc": [{"stem": "jó"}, {"stem": "tud", "wordclass": "verb"}, {"stem": "sikerül", "affix": ["t"]},
{"stem": "haragudj", "affix": ["on"]}, {"stem": "tud", "wordclass": "verb"}],
"wordclass": "regex"}, {"stem": "cancel"}, {"stem": "mégse", "affix": ["m"], "max_words": 4},
{"stem": "elvetés"}, {"stem": "ves[ds]e?\sel", "wordclass": "regex"}],
"next": [{"stem": "next"},
{"stem": "másik", "affix": ["at", "nak"], "max_words": 5, "exc": [{"stem": "ne", "affix": ["m"]}]},
{"stem": "tovább", "max_words": 5,
"exc": [{"stem": "((m[eé]g)?[ns]em*i?|baj)", "wordclass": "regex"}]},
{"stem": "előre", "max_words": 5, "exc": [{"stem": "((m[eé]g)?[ns]em*i?|baj)", "wordclass": "regex"}]},
{"stem": "még", "max_words": 4,
"exc": [{"stem": "ne", "affix": ["m"]}, {"stem": "\w+[ad]\smeg", "wordclass": "regex"},
{"stem": "((m[eé]g)?[ns]em*i?|baj)", "wordclass": "regex"}, {"stem": "egy"}, {"stem": "1"},
{"stem": "hang\w*", "wordclass": "regex"}]}, {"stem": "more"}, {"stem": "continue"},
{"stem": "folyta[st]+(a|[ao]?[dn]|ni|[aá]s)?", "wordclass": "regex",
"exc": [{"stem": "((m[eé]g)?[ns]em*i?|baj)", "wordclass": "regex"}]},
{"stem": "következő", "affix": ["t", "re"]},
{"stem": "mond+([hj]a[dt]?)?(od|ja)?", "wordclass": "regex",
"exc": [{"stem": "(vala)?[km]i([jlnv]?y?[eé])?[klrt]?", "wordclass": "regex"}, {"stem": "nem"},
{"stem": "csak"}, {"stem": "hogy(an)?\smond\w*", "wordclass": "regex"}]}],
"back": [{"stem": "back"}, {"stem": "vissza", "max_words": 5, "affix": ["lép", "lépés"],
"exc": [{"stem": "hang\w*", "wordclass": "regex"}]},
{"stem": "hátra", "max_words": 4}, {"stem": "előző", "wordclass": "noun", "max_words": 5}],
"save": [{"stem": "save"}, {"stem": "ment", "wordclass": "verb"}, {"stem": "mentés", "wordclass": "noun"}],
"open": [{"stem": "open", "exc": [{"stem": "menü", "wordclass": "noun"}]},
{"stem": "nyit", "wordclass": "verb", "exc": [{"stem": "menü", "wordclass": "noun"}]},
{"stem": "nyis", "match_stem": False, "wordclass": "verb",
"exc": [{"stem": "menü", "wordclass": "noun"}]}],
"delete": [{"stem": "del", "affix": ["ete"]}, {"stem": "töröl", "wordclass": "verb"}, {"stem": "törlés"},
{"stem": "(kuk[aá]|lomt[aá]r)(ba)?", "wordclass": "regex"}],
"exit": [{"stem": "(exit|quit)(elj([eé][dln])?)?", "wordclass": "regex"}, {"stem": "esc", "affix": ["ape"]},
{"stem": "kilép", "wordclass": "verb", "prefix": [], "affix": ["ás"]},
{"stem": "bezár", "wordclass": "verb", "prefix": [], "affix": ["ás"]},
{"stem": "(l[eé]pj?([eé][dln])?.+?ki|z[aá]r(ja)?d?.+?be)", "wordclass": "regex"},
{"stem": "(kapcso[lj]\w{1,3}\s?(m[aá]r\s)?(ki|le))|(csuk\w{1,3}\s?(m[aá]r\s)?be)",
"wordclass": "regex"}],
"options": [{"stem": "options", "wordclass": "noun"}, {"stem": "settings", "wordclass": "noun"},
{"stem": "be[aá]l+[ií]t\w*", "wordclass": "regex"},
{"stem": "[aá]l+[ií]ts.+?be", "wordclass": "regex"}],
"menu": [{"stem": "menü", "prefix": ["main", "fő", "al", "legördülő"], "affix": ["pont", "elem"],
"wordclass": "noun"}],
"login": [{"stem": "login"}, {"stem": "log in"}, {"stem": "belép", "prefix": [], "wordclass": "verb"},
{"stem": "bejelentkez", "prefix": [], "wordclass": "verb"},
{"stem": "l[eé]p.+?\sbe", "wordclass": "regex"}, {"stem": "jelentkez.+?\sbe", "wordclass": "regex"}],
"logout": [{"stem": "logout"}, {"stem": "log out"}, {"stem": "kilép", "prefix": [], "wordclass": "verb"},
{"stem": "kijelentkez", "prefix": [], "wordclass": "verb"},
{"stem": "l[eé]p.+?\ski", "wordclass": "regex"}, {"stem": "jelentkez.+?\ski", "wordclass": "regex"}],
"error": [{"stem": "error", "wordclass": "noun"}, {"stem": "hiba", "wordclass": "noun"},
{"stem": "rossz", "wordclass": "adjective",
"exc": [{"stem": "[eé]rzem|kedv(e[dm]?)?|vagyok", "wordclass": "regex"}]},
{"stem": "nem\s?(siker[uü]lt|j[oó]l?|m[uüű]k[oö]d(ik|[oö]t+)|ment)(\s\w)?(\s\w)?$",
"wordclass": "regex"}, {"stem": "(le|ki|be|meg)fagyot+", "wordclass": "regex"},
{"stem": "nem megy"}],
"search": [{"stem": "keres", "wordclass": "verb"}, {"stem": "find"}, {"stem": "találd meg"}],
"undo": [{"stem": "visszavon", "wordclass": "verb", "prefix": []},
{"stem": "vis+za(.+?eg[eé]szet|l[eé]p([eé]s)?)", "wordclass": "regex"},
{"stem": "von.+?vis+za", "wordclass": "regex", "exc": [{"stem": "mégse"}]}, {"stem": "undo"}],
"redo": [{"stem": "mégis"}, {"stem": "(meg)ism[eé]t(l[eé]s|el(je)?d?)?", "wordclass": "regex"},
{"stem": "el[oöő]rel[eé]p([eé]s)?", "wordclass": "regex"},
{"stem": "l[eé]p.+?el[oöő]re", "wordclass": "regex"}, {"stem": "redo"},
{"stem": "m[eé]gse.+?von.+?vis+za", "wordclass": "regex"}],
"restart": [{"stem": "ind[ií][ct]+sa?d?(\sel)?(\s[uú]j(ra|b[oó]l))", "wordclass": "regex"},
{"stem": "újraindít", "wordclass": "verb"}, {
"stem": "(([uú]jra)?kezd\w{0,5}|kezd\w{0,5}.+?([uú]jra|el[oöő]l?r[oö]l|(leg)?elej[eé](t|r)[oöő]l))",
"wordclass": "regex"}, {"stem": "restart", "wordclass": "verb", "prefix": []},
{"stem": "reset", "wordclass": "verb", "prefix": []}],
"play": [{"stem": "(le)?j[aá](ts+z|c+)([aá]([dls]|ni))?(\sle)?(\svalamit?)?(\segy)?", "wordclass": "regex"},
{"stem": "play"}, {"stem": "indít", "wordclass": "verb", "prefix": ["el"], "exc": [{"stem": "újra"}]}],
"stop": [
{"stem": "(meg|le)?[aá]l+(j+([aá]l)?|[ií][ct]+(s?a?d|sa|[aá](ni|s)))(\smeg|\sle)?", "wordclass": "regex"},
{"stem": "stop"},
{"stem": "el[eé]g(\sis)?(\sle(sz|gyen))?(\sm[aá]r)?(\smost)?(\sen+yi)?", "wordclass": "regex"},
{"stem": "(kus+(olj([aá]l)?)?|fog(ja)?d\s?be)", "wordclass": "regex"},
{"stem": "(s+h{2,})|(p+s+z*t+)", "wordclass": "regex"}, {"stem": "a[bp]+\s?h?agy", "wordclass": "regex"}],
"pause": [{"stem": "pau[sz][aáeé]([lz]+((as+a|[jz]a)?[dj]|[jz]a|ni))?(\sle)?", "wordclass": "regex"},
{"stem": "sz[uü]net(elt?(et)?([eé]?s+e?d?|ni)?)?", "wordclass": "regex"}],
"resume": [{"stem": "folyta\w+", "wordclass": "regex"}, {"stem": "resume"}],
"skip": [{
"stem": "(kihagy\w+|hag+yj?a?d?\ski|([aá]t|tov[aá]b+)(l[eé]p|ugr[aá])\w*|(ugr[aá]s|ugor\w+|l[eé]p(je)?[dn])\s([aá]t|tov[aá]b+))",
"wordclass": "regex"}, {"stem": "(sz?kip+(el\w*)?|m[aá]sikat)", "wordclass": "regex"}],
"snooze": [{
"stem": "(sz[uú]ndi\w*|sz[uü]net\w*|sn[ouú]+z[eo]\w*|m[eé]g\s\d\sperc\w*|(sz[oó]lj\w*(\sr[aá]m)|jelez+[eé]?[dl]|cs[eoö]nges\w*)\s([uú]jra\s)?((kicsivel\s)?k[eé]s[oöő]b+)|\d\sperc\w*)",
"wordclass": "regex"}],
"volume_up": [{
"stem": "((n[oö]vel\w+|magas\w+|fel|t[oö]b+)\s(\w+\s)?hang(er)?[oöő]?t?|hang(er)?[oöő]?t?\s(n[oö]vel\w+|magas\w+|fel))",
"wordclass": "regex"}, {"stem": "hangos\w+", "wordclass": "regex", "exc": [{"stem": "túl"}]},
{"stem": "t[uú]l\shalk\w*", "wordclass": "regex", "exc": [{"stem": "túl"}]},
{"stem": "(nem|alig|sem+it\s[ns]em?)\shal+[ao][km]", "wordclass": "regex"},
{"stem": "adj\w*(\sm[eé]g)?(\sr[aá])?(\sm[eé]g)?\s(hang\w+t|kaka[oó]t)", "wordclass": "regex"}],
"volume_down": [{
"stem": "((cs[oö]k+en\w+|alacsony\w+|le(j+eb+)?|keveseb+)\s(\w+\s)?hang(er)?[eoöő]?(j[aáeé])?t?|hang(er)?[eoöő]?(j[aáeé])?t?\s(cs[oö]k+en\w+|alacsony\w+|le(j+eb+)?))",
"wordclass": "regex"}, {"stem": "t[uú]l\shangos\w*", "wordclass": "regex"},
{"stem": "halk[aií]\w+", "wordclass": "regex", "exc": [{"stem": "túl"}]}],
"mute": [{"stem": "n[eé]m[aáií]\w{0,3}", "wordclass": "regex",
"exc": [{"stem": "vége"}, {"stem": "vissza"}, {"stem": "feloldás", "affix": ["a"]}]},
{"stem": "mute", "wordclass": "verb", "prefix": ["le", "ki"]},
{"stem": "(kus+(ol\w*)?|cs[eoö]nd(ben?|et)?)(\sel|\s?legyen)?", "wordclass": "regex"}],
"unmute": [{"stem": "n[eé]m[aáií]\w{0,3}", "wordclass": "regex",
"inc": [{"stem": "vége"}, {"stem": "vissza"}, {"stem": "feloldás", "affix": ["a"]}]},
{"stem": "unmute", "wordclass": "verb", "prefix": []}, {"stem": "hang(o\w+)?", "wordclass": "regex",
"inc": [{"stem": "vissza"}, {
"stem": "((be|vis+za)kapcsol\w*)|(kapcsol\w*\s(az?\s)?(\w+\s)?(be|vis+za))",
"wordclass": "regex"}]}],
"order": [{"stem": "rendez", "wordclass": "verb", "prefix": ["el"]},
{"stem": "rendel", "wordclass": "verb", "prefix": ["meg"]}],
"install": [{"stem": "telepít", "wordclass": "verb", "prefix": ["fel", "újra", "rá"]},
{"stem": "install", "wordclass": "verb", "prefix": ["un", "fel", "le", "rá"]},
{"stem": "rak", "wordclass": "verb", "prefix": ["fel"]}],
}
# hungarian counties and county seats
def counties():
return {
"bacs-kiskun": [{"stem": "Bács-Kiskun", "wordclass": "noun"}, {"stem": "Kecskemét", "wordclass": "noun"}],
"baranya": [{"stem": "Baranya", "wordclass": "noun"}, {"stem": "Pécs", "affix": ["ett"], "wordclass": "noun"}],
"bekes": [{"stem": "Békés", "wordclass": "noun"}, {"stem": "Békéscsaba", "wordclass": "noun"}],
"borsod-abauj-zemplen": [{"stem": "Borsod", "affix": ["-Abaúj-Zemplén"], "wordclass": "noun"},
{"stem": "Zemplén", "wordclass": "noun"}, {"stem": "BAZ", "ignorecase": False}],
"csongrad": [{"stem": "Csongrád", "wordclass": "noun"}, {"stem": "Szeged", "wordclass": "noun"}],
"fejer": [{"stem": "Fejér", "wordclass": "noun"},
{"stem": "Fehérvár", "prefix": ["Székes"], "wordclass": "noun"}],
"gyor-moson-sopron": [{"stem": "Győr", "affix": ["-Moson-Sopron"], "wordclass": "noun"},
{"stem": "Sopron", "wordclass": "noun"}],
"hajdu-bihar": [{"stem": "Hajdú-Bihar", "wordclass": "noun"}, {"stem": "Debrecen", "wordclass": "noun"}],
"heves": [{"stem": "Heves", "wordclass": "noun"}, {"stem": "Eger", "wordclass": "noun"}, {"stem": "egri"}],
"jasz-nagykun-szolnok": [{"stem": "Szolnok", "wordclass": "noun", "prefix": ["Jász-Nagykun-"]}],
"komarom-esztergom": [{"stem": "Esztergom", "wordclass": "noun", "prefix": ["Komárom-"]},
{"stem": "Komárom", "wordclass": "noun"},
{"stem": "Tata", "affix": ["bánya"], "wordclass": "noun"}],
"nograd": [{"stem": "Nógrád", "wordclass": "noun"}, {"stem": "Salgótarján", "wordclass": "noun"}],
"pest": [{"stem": "Buda", "wordclass": "noun", "affix": ["pest"]}, {"stem": "Pest", "wordclass": "noun"},
{"stem": "[IVX]+.?(-?ik)?\sker([uü]let)?\w{0,3}", "wordclass": "regex"}],
"somogy": [{"stem": "Somogy", "wordclass": "noun"}, {"stem": "Kaposvár", "wordclass": "noun"}],
"szabolcs-szatmar-bereg": [{"stem": "Szabolcs", "wordclass": "noun", "affix": ["-Szatmár-Bereg"]},
{"stem": "Szatmár", "wordclass": "noun"},
{"stem": "Nyíregyháza", "wordclass": "noun"}],
"somogy": [{"stem": "Somogy", "wordclass": "noun"}, {"stem": "Kaposvár", "wordclass": "noun"}],
"tolna": [{"stem": "Tolna", "wordclass": "noun"}, {"stem": "Szekszárd", "wordclass": "noun"}],
"vas": [{"stem": "Vas", "wordclass": "noun"}, {"stem": "Szombathely", "wordclass": "noun"}],
"veszprem": [{"stem": "Veszprém", "wordclass": "noun"}],
"zala": [{"stem": "Zala", "wordclass": "noun", "affix": ["egerszeg"]}]
}
# days of the week
def dow():
return {
"ma": [{"stem": "m[aá](ig?|ra|val|t[oó]l)?", "wordclass": "regex"}],
"holnap": [{"stem": "holnap(ig?|ra|pal|t[oó]l)?", "wordclass": "regex",
"exc": [{"stem": "holnap\s?ut[aá]n(ig?|ra|nal|t[oó]l)?", "wordclass": "regex"}]}],
"holnaputan": [{"stem": "holnap\s?ut[aá]n(ig?|ra|nal|t[oó]l)?", "wordclass": "regex"}],
"tegnap": [{"stem": "tegnap(ig?|ra|pal|t[oó]l)?", "wordclass": "regex",
"exc": [{"stem": "tegnap\sel[oő]t+?(ig?|re|t?el|t?[oó]l)?", "wordclass": "regex"}]}],
"tegnapelott": [{"stem": "tegnap\s?el[oő]t+(ig?|re|t?el|t?[oóöő]l)?", "wordclass": "regex"}],
"hetfo": [{"stem": "hétfő", "wordclass": "noun"}],
"kedd": [{"stem": "kedd", "wordclass": "noun"}],
"szerda": [{"stem": "szerda", "wordclass": "noun"}],
"csutortok": [{"stem": "csütörtök", "wordclass": "noun"}],
"pentek": [{"stem": "péntek", "wordclass": "noun"}],
"szombat": [{"stem": "szombat", "wordclass": "noun"}, {"stem": "szonbat", "wordclass": "noun"}],
"vasarnap": [{"stem": "vasárnap", "wordclass": "noun"}],
"hetkoznap": [{"stem": "hétköznap", "wordclass": "noun"}, {"stem": "hétfő", "wordclass": "noun"},
{"stem": "kedd", "wordclass": "noun"}, {"stem": "szerda", "wordclass": "noun"},
{"stem": "csütörtök", "wordclass": "noun"}, {"stem": "péntek", "wordclass": "noun"}],
"hetvege": [{"stem": "hétvége", "wordclass": "noun"}, {"stem": "szombat", "wordclass": "noun"},
{"stem": "szonbat", "wordclass": "noun"}, {"stem": "vasárnap", "wordclass": "noun"}]
}
# small talk intents
def smalltalk():
return {
"user_love": [{"stem": "szeretlek", "exc": [{"stem": "nem"}]},
{"stem": "szeretsz engem", "exc": [{"stem": "nem"}]},
{"stem": "tetszek neked", "exc": [{"stem": "nem"}]},
{"stem": "tetszel nekem", "exc": [{"stem": "nem"}]},
{"stem": "szerelmes.+?bel[eé]d", "wordclass": "regex", "exc": [{"stem": "nem"}]},
{"stem": "bel[eé]d.+?(szeret|es)tem", "wordclass": "regex"},
{"stem": "tal([aá]lko|i)z+(hat)?(unk|n[aá]nk)", "wordclass": "regex"},
{"stem": "([oö]le|karo)[lj]j([aáeé]l)?\s([aá]t|meg|bel[eé]m)", "wordclass": "regex"},
{"stem": "(meg|[aá]t|bel[eé]m)?([oö]lel|karol)(h[ae]t)?(sz|n[aáeé]l|j)", "wordclass": "regex"}, {
"stem": "(meg)?(cs[oó]kol|puszil)(j([aá]l)?\smeg|sz|hat(sz|n[aá]l)|[oó]z+(hat)?(unk|n[aáeé]n?k))",
"wordclass": "regex"},
{"stem": "(ad|dob|k[uü]ld)([jn]([aáeé]l)?|e?sz)(\segy)?(\snagy)?\s(puszi(k[aá])?t|cs[oó]kot)",
"wordclass": "regex"}, {
"stem": "le(szel|n+[eé]l|gy[eé]l)\sa\s(bar[aá]t(om|n[oöő]m)|fi[uú]m|csajom|szerelmem|valent[ií]n\w+)",
"wordclass": "regex"},
{"stem": "ismerkedn", "prefix": ["meg"], "affix": ["i", "ék"], "inc": [{"stem": "veled"}]},
{"stem": "szeretem", "inc": [{"stem": "önt"}, {"stem": "magát"}], "exc": [{"stem": "nem"}]}],
"user_flirting": [{"stem": "mi(lyen)?\s(ruha\s)?van\s?(most\s?)?rajtad", "wordclass": "regex"},
{"stem": "(meg)?(basz|dug)(unk|n[aá]lak|lak)", "wordclass": "regex"},
{"stem": "sz?exi?(e[lt]\w*)?", "wordclass": "regex"}, {
"stem": "folyt(ogas+([aá]?[dl])?|s([aá]l)?\smeg)\s(a\snyakam(at)?\s)?(a\s|egy\s)?(d[oö]gl[oö]t+|halot+)\smacsk[aá]val",
"wordclass": "regex"}, {"stem": "(le)?szop(sz|ol|(hat)?n[aá]l)", "wordclass": "regex"},
{"stem": "van barátod", "max_words": 4}],
"user_bored": [{"stem": "un(atkoz)?(om|unk)", "wordclass": "regex", "exc": [{"stem": "nem"}]},
{"stem": "szórakoztass"}],
"user_happy": [
{"stem": "j[oó]\s(a\s)?kedvem(\svan)?", "wordclass": "regex", "exc": [{"stem": "nincs"}, {"stem": "nem"}]},
{"stem": "jól vagyok", "exc": [{"stem": "nincs"}, {"stem": "nem"}]}, {"stem": "boldog", "exc": [{
"stem": "(sz[uü]l(i|t[eé]s\w*)|[uü]n+ep\w*|kar[aá]csony\w*|[eé]vfordul\w|([uú]j)?[eé]v\w*|h[uú]sv[eé]t\w*|n[eé]v\s?nap\w*|[ns]em)",
"wordclass": "regex"}]}],
"user_sad": [{"stem": "j[oó]\s(a\s)?kedvem", "wordclass": "regex", "inc": [{"stem": "nincs"}, {"stem": "nem"}]},
{"stem": "szomorú", "wordclass": "adjective", "inc": [{"stem": "vagyok"}]},
{"stem": "nem\s+(vagyok|[eé]rzem).+?j[oó]l", "wordclass": "regex"}],
"user_sick": [{
"stem": "((beteg|ros+zul)\s(vagyok|[eé]rzem)|(meg|le)betegedtem|nem\s[eé]rzem\s(magam(at)?\sj[oó]l|j[oó]l\smagam(at)?)|nem\svagyok\s(t[uú]l\s)?j[oó]l)",
"wordclass": "regex"}],
"user_angry_at_you": [{"stem": "ne\s((h[uú]z+|bas+z|d[uü]h[ií])\w*\s?fel|idege(s[ií]ts|lj([eé]l)?\s?(ki)?))",
"wordclass": "regex"},
{"stem": "(ideges|m[eé]rges|d[uü]h[oö]s)\s(vagyok|voltam)", "wordclass": "regex"},
{"stem": "haragszom", "exc": [{"stem": "nem"}]},
{"stem": "(mi([eé]r)?t?\s)?nem\s(hal+|[eé]rt)([ae]sz|[eo]d)", "wordclass": "regex"},
{"stem": "nem?\sbesz[eé]l(j|het(sz)?)\s[ií]gy", "wordclass": "regex"},
{"stem": "megbántott", "affix": ["ál"]}, {"stem": "ez nem volt szép"},
{"stem": "buta vagy"}],
"user_forgiving_you": [{"stem": "meg\s?(van\s)?bocs[aá]l?j?t(o(t+a)?[km]|va)", "wordclass": "regex"},
{"stem": "(nem|dehogy)\sharagszo[km]", "wordclass": "regex"},
{"stem": "(sem+i|[ns]i[nc]+s)\s?(baj|gond)", "wordclass": "regex"}],
"user_sorry": [{"stem": "meg\s?(tud(sz|n[aá]l)\s)?bocs[aá]l?j?ta?(ni|sz|od|t*ot+ad)", "wordclass": "regex"},
{"stem": "ne haragudj"},
{"stem": "bocsáss meg", "exc": [{"stem": "bocs[aá]s+\s?meg\,?\s?\w+", "wordclass": "regex"}]},
{"stem": "sajnálom", "exc": [{"stem": "sajn[aá]lom\,?\s?\w+", "wordclass": "regex"}]},
{"stem": "megbántottalak", "inc": [{"stem": "ha"}, {"stem": "hogy"}]},
{"stem": "megs[eé]rt[eoöő][dt]+[eé]l\w*", "wordclass": "regex"}],
"user_friend": [{
"stem": "(lesz(e[kl]|[uü]nk)|legy[uü]nk|len+[eé][kl]|lehet([uü]nk|n[eé]n?k))\s(az?\s)?(egyik\s|legjob+\s|k[eé]pzele?t(beli)?\s)?([oö]r[oöi]k?[\s\-]?)?(bar[aá]to|bari|havero|spano)[dkm]",
"wordclass": "regex"},
{"stem": "(bar[aá]to[km]|havero[km])\svagy(unk)?", "wordclass": "regex"},
{"stem": "te\svagy\sa.+?bar[aá]tom", "wordclass": "regex"},
{"stem": "gyönyörű barátság", "affix": ["unk"], "inc": [{"stem": "kezdete"}]}],
"user_back": [
{"stem": "(vis+za|meg|haza)\s?(is\s)?(j[oö]t+|t?[eé]rt|[eé]rkezt)(em|[uü]nk)", "wordclass": "regex",
"exc": [{"stem": "meg[eé]rt\w*", "wordclass": "regex"}]},
{"stem": "[io]t+(hon)?\s(is\s)?vagy(ok|unk)", "wordclass": "regex"}],
"user_hungry": [
{"stem": "([eé]he[ns]\s?(vagyok|halok)|en+[eé]k\s(most|egy|valamit?)|(meg)?tudn[eé]k\s(most\s)?en+i)",
"wordclass": "regex"}],
"user_thirsty": [
{"stem": "(szomja[ns]\s?(vagyok|halok)|in+[eé]k\s(most|egy|valamit?)|(meg)?tudn[eé]k\s(most\s)?in+i)",
"wordclass": "regex"}],
"how_are_you": [{"stem": "hogy vagy"}, {"stem": "j[oó](l|b+an)\svagy", "wordclass": "regex"},
{"stem": "(j[oó]l|hogy)\s[eé]rzed\s(most\s)?magad(at)?", "wordclass": "regex"},
{"stem": "mizu", "affix": ["js", "jság"]}, {"stem": "hogy ityeg"},
{"stem": "(hogy\stelt\sa|milyen(\svolt\sa)?)\snapod(\svan)?", "wordclass": "regex"},
{"stem": "[vw]+h*[aá]+[csz]+[aáu]+p+", "wordclass": "regex"},
{"stem": "(j[oó]|milyen)\s(a\s)?kedved(\svan)?", "wordclass": "regex"},
{"stem": "mi\sa(z\s[aá]bra|\sst[aá]jsz)", "wordclass": "regex"},
{"stem": "hogy\s[eé]rz(i|ed)\smag[aá][dt]", "wordclass": "regex"},
{"stem": "mi a", "inc": [{"stem": "helyzet"}, {"stem": "stájsz"}]},
{"stem": "mit csinálsz", "max_words": 3}, {"stem": "mi a stájsz"}, {"stem": "hogy ityeg"}],
"about_name": [{"stem": "(mond*(ja)?\ski|mi\sa)\s(bece)nev[eé][dt](et)?", "wordclass": "regex"},
{"stem": "(hogy(an)?|minek)\s(is\s)?(h[ií]v([jn][aá](la)?k|hatom)|nevez+(nek|elek))",
"wordclass": "regex", "exc": [{"stem": "engem"}, {"stem": "én"}]}, {
"stem": "(mi?[eé]rt\s|hogy[\s\-]?hogy\s)(let+\s)?(pont\s)?(ezt?\s(let+\s)?(a\s)?|[ií]gy\s|ilyen\s)(nevez[nt]ek|h[ií]v[nt]ak|neved|nevet\s(kapt[aá][dl]|adt[aá]k))",
"wordclass": "regex"}, {"stem": "mi\sa\s(bece)?neved?", "wordclass": "regex", "exc": [
{"stem": "az|[ae]n+[ae]k|amiben?|amelyik\w*", "wordclass": "regex"}, {"stem": "engem"},
{"stem": "én"}]}, {"stem": "n[eé]v(ed)?\seredete", "wordclass": "regex"}],
"about_you": [{"stem": "(mes[eé]lj|besz[eé]lj|mondj)([eo]n)?.+?mag(ad|[aá])r[oó]l", "wordclass": "regex"},
{"stem": "mutatkoz+([aá]l|on)?\s+be", "wordclass": "regex"},
{"stem": "(be)?muta(koz(hat)?n[aá]l|(tn[aá]d|sd)\s.+?magad(at)?)", "wordclass": "regex"},
{"stem": "([km]i(\s|\sa\s.+?)vagy te|te [km]i(\s|\sa\s.+?)vagy)", "wordclass": "regex"}],
"about_creator": [{
"stem": "(ki|hogy(an)?)\s(a\s)?(k[eé]sz([ií]t([oöő]d|et+(ek)?)|[uü]lt([eé]l)?)|gazd[aá]d|programoz([oó]d|ot+|tak)|[ií]rt[aá]k?|(hoz(ot+|tak)|j[oö]t+[eé]l).+?(l[eé]tre|vil[aá]gra|k[oó]dod(at)?)|alkot([oó][dt]+|tak)|teremt(et+|[oöő]d)|(keresztelt|nevezet+|adtak)\sel|adot+\s(neked\s)?nevet)",
"wordclass": "regex"},
{"stem": "kik?\s(k[eé]sz[ií]tet+|fejlesztet+)(ek)?", "wordclass": "regex", "max_words": 3}],
"about_look": [{"stem": "hogy(an)?\s(n[eé]zn?[eé]l\ski|mutatsz|festesz)", "wordclass": "regex"},
{"stem": "(k[uü]ldj|mutas+).+?(k[eé]pet|fot[oó]t|sz?elfie?t)\smagadr[oó]l",
"wordclass": "regex"},
{"stem": "(k[uü]ldj|mutas+)\smagadr[oó]l.+?(k[eé]pet|fot[oó]t|sz?elfie?t)",
"wordclass": "regex"},
{"stem": "(van|milyen)\s(az?\s)?(arcod|kin[eé]zeted)", "wordclass": "regex"},
{"stem": "szép vagy"}],
"about_age": [{"stem": "mennyi idős vagy"}, {"stem": "hány éves vagy"}, {"stem": "melyik évben születtél"},
{"stem": "mikor születtél"},
{"stem": "(melyik\s[eé]vben|mikor)\sk[eé]sz([uü]lt[eé]l|[ií]tet+ek)", "wordclass": "regex"},
{"stem": "(h[aá]ny(adik|ban)|mikor\s(van|[uü]n+epled)\s?a?)\ssz[uü]l(et[eé]s|i)napod(at)?",
"wordclass": "regex"}, {"stem": "h[aá]ny\s[eé]vesnek\s.+?\smagad(at)?", "wordclass": "regex"},
{"stem": "sz[uü]l(et[eé]s)?i?napod(at)?\s(h[aá]nyadik[aá]n|mikor|melyik)", "wordclass": "regex"}],
"about_zodiac": [{"stem": "(neked\s)?mi\sa\s(horoszk[oó]pod|csil+agjegyed)", "wordclass": "regex"},
{"stem": "milyen jegyben születtél"},
{"stem": "a\s(te\s)?(horoszk[oó]pod|csil+agjegyed)\smi(csoda)?", "wordclass": "regex"},
{"stem": "milyen\sjegyben\ssz[uü]let+\w+", "wordclass": "regex"}],
"about_location": [{
"stem": "(hol|helyen)\s(k[eé]sz[uü]lt[eé]l|k[eé]sz[ií]tet+ek|sz[uü]let+[eé]l|(hoztak|j[oö]t+[eé]l).+?l[eé]tre)",
"wordclass": "regex"},
{"stem": "hon+an\s(sz[aá]rmazol|[ií]rsz|val[oó]\svagy)", "wordclass": "regex"},
{"stem": "ho(n+an|l)\svagy\s(most\s)?(helyileg|most|pontosan)", "wordclass": "regex"},
{"stem": "(hol\s|mer+e\s)(laksz|(van|az?).+?ot+honod)", "wordclass": "regex"},
{"stem": "hol vagy", "max_words": 3}],
"about_family": [{
"stem": "ki(k|t|ket)?\s(az?\s|tartasz\sa\s)?(te\s)?(csal[aá]dod(nak)?|sz[uü]l(t|et+[eé]l)|sz[uü]leid(nek)?|([eé]des)?(any(uk)?[aá]d|ap(uk)?[aá]d)(nak)?)",
"wordclass": "regex"},
{"stem": "csal[aá]dban\s([eé]l(sz|tek)|sz[uü]let+[eé]l)", "wordclass": "regex"},
{"stem": "(h[aá]ny|van(nak)?)\stestv[eé]rei?d", "wordclass": "regex"},
{"stem": "(kik?|van(n?ak)?[\-\s]?e?)(\sa)?(\shoz+[aá]d?\s?tartoz[oó]i?d|csal[aá]dod)",
"wordclass": "regex"}],
"about_software": [{
"stem": "(hogy(hogy|an)?|mit[oöő]l).+?(m[uüű]k[oö]dsz|(tudsz |vagy k[eé]pes )?(meg)?[eé]rte(sz|d|ni)\,? (meg )?(hogy )?(a?mit mond(ok|tam)|a?mit [ií]r(ok|tam)|engem))",
"wordclass": "regex"},
{"stem": "mi(jen|lyen|en|\s?f[eé]le|\s?fajta)\sfekete\s?m[aá]gia", "wordclass": "regex"},
{"stem": "neur[aá]lis\sh[aá]l[oó]\w*", "wordclass": "regex",
"inc": [{"stem": "vagy"}, {"stem": "te"}, {"stem": "működ", "wordclass": "verb"}]}],
"about_skills": [{
"stem": "mi(lyen|(ke)?t|k?re)\s(funkci[oó](id?|kat)\s|dolgok(at|ra)\s|tr[uü]k+([oö]k(et|re)|jeid?)\s|parancsok(at|ra)\s)?(tud(sz|n[aá]l)?\s(csin[aá]lni|mutatni)?|ismer(sz)?|(vagy\s|van\s)?(k[eé]pes|(be|meg)?tan[ií]tva)|tan[ií]tot+[aá]k\s(be|neked|meg)?|(k[eé]pes+[eé]gei?d?|tulajdons[aá]g(o|ai)d?)\svan(nak)?)",
"wordclass": "regex", "exc": [{"stem": "mond", "wordclass": "verb"}]},
{"stem": "mihez ért", "affix": ["esz"]},
{"stem": "mi((ke)?t|k?r[oöő]l)\s(lehet\s|szabad\s|tudok\s)?k[eé]rdez+h?e\w+",
"wordclass": "regex"}, {"stem": "miben tudsz"},
{"stem": "k[eé]rdez+(het)?(ek|ni)\st[oöő]led", "wordclass": "regex"},
{"stem": "mi(ben|vel)\s?tud(sz|n[aá]l)\sseg[ií]teni", "wordclass": "regex"}],
"about_topics": [{"stem": "mir[oöő]l\s.*?besz[eé]lge[st]\w+", "wordclass": "regex"},
{"stem": "milyen\st[eé]m[aá][bk]*r?[aoó][lnt]", "wordclass": "regex"}],
"about_thoughts": [{
"stem": "mi(n|re)?\s(gondol(kodsz|ko[dz]ol|sz)|agyalsz|t[oö]prenge?sz|j[aá]r\s(az?\s)?(fejed|agyad)(b[ae]n?)?)",
"wordclass": "regex"}],
"about_favorite": [{"stem": "melyik", "inc": [{"stem": "kedvenc", "affix": ["ed"]},
{"stem": "szeret", "affix": ["i", "ed"], "match_stem": False}]}],
"are_you_conscious": [{"stem": "(([oö]n)?tudat|akarat|l[eé]le?ke?)\w*", "wordclass": "regex",
"inc": [{"stem": "van"}, {"stem": "ébred", "wordclass": "verb", "prefix": []},
{"stem": "szabad"}]}],
"are_you_a_robot": [{
"stem": "(te\s)?(egy\s)?(igazi\s|val[oó](s|di)\s|h[uú]s[\-\s]?v[eé]r\s)?(ember|szem[eé]ly|(an)?droid)\svagy",
"wordclass": "regex"}, {
"stem": "(robot|chatbot|ai|mesters[eé]ges\s?intel+igencia|g[eé]p|humanoid|programozva)\svagy",
"wordclass": "regex"}, {
"stem": "(ember+el|szem[eé]l+yel|robot+al|program+al|algoritmus+al|g[eé]p+el)\s(besz[eé]l(get)?ek|csevegek|levelezek|konzult[aá]lok)",
"wordclass": "regex"}, {
"stem": "(embernek|szem[eé]lynek|robotnak|programnak|algoritmusnak)\s([ií]ro(gato)?[km]|magyar[aá]zo[km]|[uü]zen(get)?ek)",
"wordclass": "regex"}, {
"stem": "(embernek|intel+igensnek|szem[eé]lynek|robotnak|g[eé]pnek)\s(hiszed|tartod|gondolod)\smagad(at)?",
"wordclass": "regex"}],
"are_you_hungry": [{"stem": "kérsz", "inc": [{"stem": "enni"}]}, {"stem": "nem vagy éhes"},
{"stem": "éhes vagy"},
{"stem": "(nem\s)?en+[eé]l?\s(meg\s)?(most\s)?(velem\s)?valamit?", "wordclass": "regex"},
{"stem": "(nem vagy kaj[aá]s|kaj[aá]s vagy)", "wordclass": "regex"}],
"are_you_thirsty": [{"stem": "kérsz", "inc": [{"stem": "inni"}]}, {"stem": "nem vagy szomjas"},
{"stem": "szomjas vagy"},
{"stem": "(nem\s)?i(n+[aá]|szo)l?\s(meg\s)?(most\s)?(velem\s)?valamit?",
"wordclass": "regex"}],
"are_you_busy": [{"stem": "elfoglalt", "inc": [{"stem": "vagy"}]},
{"stem": "r[aá]m?\s?[eé]r(n[eé]l|sz)(\smost)?(\segy)?(\skicsit|\skis\s\w+|\svalamen+yi\w*)?",
"wordclass": "regex"}, {
"stem": "(van|volna)\s(most\s)?(r[aá]m?\s)?(most\s)?(egy\s)?(kis\s|kev[eé]s\s|valamen+yi\s)?(szabad\s?)?id[oöő]d(\sr[aá]m)?",
"wordclass": "regex"}, {"stem": "sok dolgod van"}],
"are_you_lying": [{"stem": "hazud", "wordclass": "verb"},
{"stem": "nem mondt[aá][dl]\s((el|meg)\saz\s)?igaz(at|s[aá]got)", "wordclass": "regex"}],
"are_you_serious": [
{"stem": "(nem?|csak)\s(vic+el(sz|j)?|mond+(od|ja)?|ideges[ií]ts(en)?)", "wordclass": "regex"},
{"stem": "(komolyan|t[eé]nyleg)\s?([uúií]gy\s|azt\s)?((mond|gondol|[ií]r)(ja|od|tad?)|hisz(i|ed)|hit+ed?)",
"wordclass": "regex"},
{"stem": "biztos(an)?\s(vagy\s)?(\w+\s)?(ben+e|eb+en|mond(ta|o)d|mond[jt]a)", "wordclass": "regex"},
{"stem": "ezt?\s(most\s)?komoly(an)?", "wordclass": "regex"}, {"stem": "viccelsz", "max_words": 1}],
"can_you_hear_me": [
{"stem": "(olvas+a|hal+ja|n[eé]zi|van\sit+)(\sezt)?\s(vala|b[aá]r)ki(\sis)?", "wordclass": "regex"},
{"stem": "(hal+(asz|od)|l[aá]t(sz|od)|vesze[ld])\s(engem|a?mit\s(mondok|[ií]rok|k[eé]rdezek))",
"wordclass": "regex"}, {
"stem": "valaki\s(hal+(ja)?\s|olvas+a|figyeli?(\sar+a)?)\sa?mit\s(ide\s?|it+\s)?([ií]rok|mondok|k[eé]rdezek)",
"wordclass": "regex"}, {"stem": "felfogtad", "max_words": 3},
{"stem": "itt", "inc": [{"stem": "vagy"}, {"stem": "van"}], "max_words": 3},
{"stem": "halló", "max_words": 3}, {"stem": "hallasz", "max_words": 3}, {"stem": "mikrofon próba"},
{"stem": "miért nem válaszolsz"}],
"can_you_learn": [{"stem": "(k[eé]pes(\svagy)?|tud(sz)?)\stanulni", "wordclass": "regex"},
{"stem": "tanulsz\s(is|[ae].+?b[oóöő]l)", "wordclass": "regex"},
{"stem": "[dln][aáeéo][km]\s(be|meg)?tan[ií]tani\b", "wordclass": "regex", "boundary": False},
{"stem": "(lehet|tudlak|tudom)\s(t[eé]ged|[oö]nt)?\stan[ií]tani", "wordclass": "regex"}],
"can_you_understand_me": [
{"stem": "(meg)?[eé]rt(e(d|sz|t+ed?)|i)\,?((\shogy)?\sa?mit\s([ií]r|mond)\w+|\smagyarul)",
"wordclass": "regex"}],
"contact": [{"stem": "mi(lyen)?\s(.+?\s)?(e\-?mail\s?)?c[ií]me[dn]?", "wordclass": "regex"},
{"stem": "elérhetőség", "wordclass": "noun"},
{"stem": "elér", "wordclass": "verb", "inc": [{"stem": "önt"}, {"stem": "téged"}]}],
"no_answer": [{"stem": "válaszol", "wordclass": "verb", "prefix": [], "inc": [{"stem": "nem"}]},
{"stem": "ír", "wordclass": "verb", "prefix": [], "inc": [{"stem": "nem"}]}],
"shame": [{"stem": "kár", "max_words": 2}],
}
# smiley and emoji references
def emoji():
return {
"like": [{"stem": "🙌", "wordclass": "emoji"}, {"stem": "👏", "wordclass": "emoji"},
{"stem": "💯", "wordclass": "emoji"}, {"stem": "👌", "wordclass": "emoji"},
{"stem": "👍", "wordclass": "emoji"}, {"stem": "\(Y\)", "wordclass": "regex", "boundary": False}],
"dislike": [{"stem": "💩", "wordclass": "emoji"}, {"stem": "👎", "wordclass": "emoji"},
{"stem": "😒", "wordclass": "emoji"}, {"stem": "🙄", "wordclass": "emoji"},
{"stem": "🤢", "wordclass": "emoji"}, {"stem": "☹️", "wordclass": "emoji"}],
"happiness": [{"stem": "😉", "wordclass": "emoji"}, {"stem": "😃", "wordclass": "emoji"},
{"stem": "😄", "wordclass": "emoji"}, {"stem": "🙂", "wordclass": "emoji"},
{"stem": "[\:\;\=8BX]\-*[p\)\]93]+", "wordclass": "regex", "boundary": False},
{"stem": "[\(\[8]+\-*[\:\;\=8X]", "wordclass": "regex", "boundary": False}],
"sadness": [{"stem": "😭", "wordclass": "emoji"}, {"stem": "😢", "wordclass": "emoji"},
{"stem": "[\:\;\=][\'\,]?\-*[\(\[8]+", "wordclass": "regex", "boundary": False},
{"stem": "[\)\]9]+\-*[\'\,]?[\:\;\=]", "wordclass": "regex", "boundary": False}],
"laughter": [{"stem": "😀", "wordclass": "emoji"}, {"stem": "😁", "wordclass": "emoji"},
{"stem": "😆", "wordclass": "emoji"}, {"stem": "😝", "wordclass": "emoji"},
{"stem": "😜", "wordclass": "emoji"},
{"stem": "[\:\;\=8BX]\-*d[asd]*", "wordclass": "regex", "boundary": False}],
"love": [{"stem": "😘", "wordclass": "emoji"}, {"stem": "😍", "wordclass": "emoji"},
{"stem": "😙", "wordclass": "emoji"}, {"stem": "😻", "wordclass": "emoji"},
{"stem": "😗", "wordclass": "emoji"}, {"stem": "💋", "wordclass": "emoji"},
{"stem": "❤️", "wordclass": "emoji"}, {"stem": "💕", "wordclass": "emoji"},
{"stem": "🍆", "wordclass": "emoji"}, {"stem": "🏩", "wordclass": "emoji"},
{"stem": "<+3+", "wordclass": "regex", "boundary": False}],
"surprise": [{"stem": "😯", "wordclass": "emoji"}, {"stem": "😲", "wordclass": "emoji"},
{"stem": "😮", "wordclass": "emoji"},
{"stem": "[\:\;\=]\-*o+", "wordclass": "regex", "boundary": False},
{"stem": "o+\-*[\:\;\=]", "wordclass": "regex", "boundary": False}],
"anger": [{"stem": "😡", "wordclass": "emoji"},
{"stem": ">+[\:\;\=]\-*[\(\[8]+", "wordclass": "regex", "boundary": False},
{"stem": "[\)\]9]+\-*[\:\;\=]<+", "wordclass": "regex", "boundary": False}],
"discomfort": [{"stem": "😱", "wordclass": "emoji"}, {"stem": "🙀", "wordclass": "emoji"},
{"stem": "😨", "wordclass": "emoji"}, {"stem": "d+:", "wordclass": "regex", "boundary": False},
{"stem": "😰", "wordclass": "emoji"}, {"stem": "😩", "wordclass": "emoji"},
{"stem": "😓", "wordclass": "emoji"}],
"confusion": [{"stem": "😐", "wordclass": "emoji"}, {"stem": "😕", "wordclass": "emoji"},
{"stem": "[\:\;\=][\'\,]?\-*[\\\/\|]+", "wordclass": "regex", "boundary": False},
{"stem": "[\\\/\|]+\-*[\'\,]?[\:\;\=]", "wordclass": "regex", "boundary": False}],
"tiredness": [{"stem": "😪", "wordclass": "emoji"}, {"stem": "💤", "wordclass": "emoji"},
{"stem": "😫", "wordclass": "emoji"}, {"stem": "😴", "wordclass": "emoji"}],
"seduction": [{"stem": "💦", "wordclass": "emoji"}, {"stem": "🍆", "wordclass": "emoji"}, {"stem": "🛏️",
"inc": [
{"stem": "😏",
"wordclass": "emoji"},
{"stem": "😍",
"wordclass": "emoji"},
{"stem": "😜",
"wordclass": "emoji"},
{"stem": "😛",
"wordclass": "emoji"}]},
{"stem": "🍑", "wordclass": "emoji",
"inc": [{"stem": "🔥", "wordclass": "emoji"}, {"stem": "😏", "wordclass": "emoji"},
{"stem": "😍", "wordclass": "emoji"}, {"stem": "😜", "wordclass": "emoji"},
{"stem": "😛", "wordclass": "emoji"}]}, {"stem": "🍌", "wordclass": "emoji",
"inc": [{"stem": "🔥", "wordclass": "emoji"},
{"stem": "😏", "wordclass": "emoji"},
{"stem": "😍", "wordclass": "emoji"},
{"stem": "😜", "wordclass": "emoji"},
{"stem": "😛", "wordclass": "emoji"}]},
{"stem": "🚀", "wordclass": "emoji",
"inc": [{"stem": "😏", "wordclass": "emoji"}, {"stem": "😍", "wordclass": "emoji"},
{"stem": "😜", "wordclass": "emoji"}, {"stem": "😛", "wordclass": "emoji"}]},
{"stem": "💮", "wordclass": "emoji",
"inc": [{"stem": "🔥", "wordclass": "emoji"}, {"stem": "😏", "wordclass": "emoji"},
{"stem": "😍", "wordclass": "emoji"}, {"stem": "😜", "wordclass": "emoji"},
{"stem": "😛", "wordclass": "emoji"}]}, {"stem": "🌮", "wordclass": "emoji",
"inc": [{"stem": "😏", "wordclass": "emoji"},
{"stem": "😍", "wordclass": "emoji"},
{"stem": "😜", "wordclass": "emoji"},
{"stem": "😛", "wordclass": "emoji"}]},
{"stem": "🌋", "wordclass": "emoji",
"inc": [{"stem": "😏", "wordclass": "emoji"}, {"stem": "😍", "wordclass": "emoji"},
{"stem": "😜", "wordclass": "emoji"}, {"stem": "😛", "wordclass": "emoji"}]},
{"stem": "👅", "wordclass": "emoji"}]
}
# entities you want to ignore in search results or disallow in user inputs
def disallow():
return {
"obscene": [{
"stem": "(fel|le|meg|r[aá]|ki|be|oda|[oö]s+ze|bele|hoz+[aá])?bas*z+d?\s?(at)?(hat)?(us|a[dk]?|n?[aá][kl]|[aá]?t[aáo][lkm]?|ot+|ni|n[aá]n?[dlkm]?|va|meg)?",
"wordclass": "regex", "exc": [{"stem": "megye"}]}, {
"stem": "((l[oó]|agy)?fasz|fas+z+op[oó]|geci\w*|kurv[aá]([eé]let|an+yj?[aá])?|(be)?fos|ribanc|(be)?szar|buzi|k[oö]cs[oö]g|pin[aá]|pics[aá]|p[oö]cs|p[eé]nisz|kur[vw][aá]\w*(any[aá]d\w*)?|any[aá]d\w*)\b",
"wordclass": "regex", "boundary": False}, {
"stem": "((mother)?f\s?u\s?c\s?k|shit(as{2})?|bitch|pus{2}y|cunt|fag(g?[eo]t)?|penis|blowjob|but{2}(plug|head)?|as{2}|arse|homo|gay|dyke|cock|dick(pic)?)(e?s|ing|e?r)?",
"wordclass": "regex", "exc": [{"stem": "hányadik", "affix": ["a", "án", "ai"]}]}],
"racist": [{"stem": "(fek[aá]|nig+(er|a)|n[aá]ci|cig[oó]|cig[aá]n+y|gypsy|dzsip[oó]|zsidr?[ó])[aáeégklnmstv]*",
"wordclass": "regex", "boundary": False}],
"erotic": [{
"stem": "(sz?ex|an[aá]l|[bv]agina|[bp][eé][np]isz?|creampie|cum|sperma?|fuck|homo(kos|sexu[aá]l(is)?)?|milf|bisexual|gay|dild[oó]|vibr[aá]tor|fel+atio|blow\s?job|whore|geci|pus{2}y|pics[aá]|pin[aá]|fasz|pis{2}|boner|dick(pic)?|x{3,}|hentai|catgirl|ec+hi|yaoi|loli|shot[aá]|\w*porn[oó]?(film)?)[aáeéioöőuüdgklmnprstvz]*",
"wordclass": "regex", "boundary": False}, {"stem": "maki verem"}],
"unpleasant": [{"stem": "AIDS", "wordclass": "noun"}, {"stem": "HIV", "ignorecase": False},
{"stem": "Hitler", "wordclass": "noun"},
{"stem": "(Sz?t[aá]lin|Len+in)\w*", "wordclass": "regex"},
{"stem": "pedof[ií]l(i[aá])?[aokltv]*", "wordclass": "regex"},
{"stem": "(fur{2}y|bestiality|yif{2}y?)[aáeégklnmstv]*", "wordclass": "regex"},
{"stem": "mej?i?n\s?kamp+f+\w*", "wordclass": "regex"},
{"stem": "(any[aá]d|gy[oö]k[eé]r)\w*", "wordclass": "regex"},
{"stem": "nemz\w*", "wordclass": "regex"},
{"stem": "kak[aái][abklnstv]*", "wordclass": "regex"}],
}
# decide whether user is talking to you in a formal or informal way
def tone():
return {
"formal": [{"stem": "\w+?([bcdfghjklmnpstvy]{1,2}|j)[eo]n", "wordclass": "regex"},
{"stem": "\w+?n[aeé]m?", "wordclass": "regex"}, {"stem": "ön", "wordclass": "noun"},
{"stem": "maga", "wordclass": "noun"},
{"stem": "(k[eé]r(het)n?[eé]m|megk[eé]rni)", "wordclass": "regex"}],
"informal": [{"stem": "\w+?([bcdfghjklmnpstvy]{1,2}|j)[aáeé][dl]", "wordclass": "regex"},
{"stem": "\w+?([ln][aeé][dlk]|[eo][dlk]|sz)", "wordclass": "regex"},
{"stem": "(t[eé](ged)?|veled|neked|hoz+[aá]d|t[oöő]led|magad(nak|at|dal|hoz|t[oó]l)?)",
"wordclass": "regex"}]
}