Skip to content

Commit

Permalink
Issue 45: Implement the regular expression library
Browse files Browse the repository at this point in the history
Add initial support for the regular expression library.

This implements findall(), search(), and match()

The most complete implementation is findall() which is only limited
by the incompatibiliies between the JS regexp and Python regexp.
The bggest problem is the different interpretation of () in the
JS regexp.  Patterns like:

"From .*@(\\S*)"

Return the whole match instead of the piece inside the parenthesis.

search() and match() are barely there.   They properly return None
if there is no match and return a MatchObject when there is a hit.
but the returned MatchObject is useless (i.e. no data and no methods).
  • Loading branch information
csev committed Jan 27, 2013
1 parent 45d2ad8 commit 9aa25b7
Show file tree
Hide file tree
Showing 5 changed files with 442 additions and 0 deletions.
57 changes: 57 additions & 0 deletions src/lib/re/__init__.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
var $builtinmodule = function(name)
{
var mod = {};

var matchobj = function($gbl, $loc) {
$loc.__init__ = new Sk.builtin.func(function(self,thematch) {
self.thematch = thematch;
});
}

mod.MatchObject = Sk.misceval.buildClass(mod, matchobj, 'MatchObject', []);

// Internal function to return a Python list of strings
// From a JS regular expression string
mod._findre = function(res, string) {
var re = eval(res);
var matches = string.v.match(re);
retval = new Sk.builtin.list();
if ( matches == null ) return retval;
for (var i = 0; i < matches.length; ++i) {
var sitem = new Sk.builtin.str(matches[i]);
retval.v.push(sitem);
}
return retval;
}

mod.findall = new Sk.builtin.func(function(pattern, string, flags) {
var res = "/"+pattern.v.replace("/","\\/")+"/g";
var re = eval(res);
var matches = string.v.match(re);
retval = new Sk.builtin.list();
if ( matches == null ) return retval;
for (var i = 0; i < matches.length; ++i) {
var sitem = new Sk.builtin.str(matches[i]);
retval.v.push(sitem);
}
return retval;
});

mod.search = new Sk.builtin.func(function(pattern, string, flags) {
var res = "/"+pattern.v.replace("/","\\/")+"/";
lst = mod._findre(res,string);
if ( lst.v.length < 1 ) return null;
var mob = Sk.misceval.callsim(mod.MatchObject, lst);
return mob;
});

mod.match = new Sk.builtin.func(function(pattern, string, flags) {
var res = "/^"+pattern.v.replace("/","\\/")+"/";
lst = mod._findre(res,string);
if ( lst.v.length < 1 ) return null;
var mob = Sk.misceval.callsim(mod.MatchObject, lst);
return mob;
});

return mod;
}
51 changes: 51 additions & 0 deletions test/run/t339.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import re

val = re.findall("From","dlkjdsljkdlkdsjlk")
print val
if len(val) == 0 : print "Correct 1"
else : print "InCorrect 1"

val = re.findall("From","dlkjd From kdsjlk")
print val
if len(val) == 1 : print "Correct 2"
else : print "InCorrect 2"

val = re.findall("From","From dlkjd From kdsjlk")
print val
if len(val) == 2 : print "Correct 3"
else : print "InCorrect 3"

val = re.findall("[0-9]+/[0-9]+","1/2 1/3 3/4 1/8 fred 10/0")
print val
if len(val) == 5 : print "Correct 4"
else : print "InCorrect 4"

# Won't work because JS match does not deal with ()
# print re.findall("From .*@(\\S*)","From csev@umich.edu Sat 09:25:14")

# These return either None or a trivial MatchObject with no methods

val = re.search("From","dlkjdsljkdlkdsjlk")
if val is None: print "Correct 5"
else : print "InCorrect 5",val

val = re.search("From","dlkjd From kdsjlk")
if val is not None: print "Correct 6"
else : print "InCorrect 6",val

val = re.search("From","From dlkjd From kdsjlk")
if val is not None: print "Correct 7"
else : print "InCorrect 7",val

val = re.match("From","dlkjdsljkdlkdsjlk")
if val is None: print "Correct 8"
else : print "InCorrect 8",val

val = re.match("From","dlkjd From kdsjlk")
if val is None: print "Correct 9"
else : print "InCorrect 9",val

val = re.match("From","From dlkjd From kdsjlk")
if val is not None: print "Correct 10"
else : print "InCorrect 10",val

14 changes: 14 additions & 0 deletions test/run/t339.py.real
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[]
Correct 1
['From']
Correct 2
['From', 'From']
Correct 3
['1/2', '1/3', '3/4', '1/8', '10/0']
Correct 4
Correct 5
Correct 6
Correct 7
Correct 8
Correct 9
Correct 10
54 changes: 54 additions & 0 deletions test/run/t339.py.symtab
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
Sym_type: module
Sym_name: top
Sym_lineno: 0
Sym_nested: False
Sym_haschildren: False
-- Identifiers --
name: None
is_referenced: True
is_imported: False
is_parameter: False
is_global: True
is_declared_global: False
is_local: False
is_free: False
is_assigned: False
is_namespace: False
namespaces: [
]
name: len
is_referenced: True
is_imported: False
is_parameter: False
is_global: True
is_declared_global: False
is_local: False
is_free: False
is_assigned: False
is_namespace: False
namespaces: [
]
name: re
is_referenced: True
is_imported: True
is_parameter: False
is_global: False
is_declared_global: False
is_local: True
is_free: False
is_assigned: False
is_namespace: False
namespaces: [
]
name: val
is_referenced: True
is_imported: False
is_parameter: False
is_global: False
is_declared_global: False
is_local: True
is_free: False
is_assigned: True
is_namespace: False
namespaces: [
]
Loading

0 comments on commit 9aa25b7

Please sign in to comment.