Statistics
| Revision:

svn-gvsig-desktop / tags / v1_1_Build_1007 / extensions / extScripting / scripts / jython / Lib / sre.py @ 12478

History | View | Annotate | Download (7.95 KB)

1 5782 jmvivo
#
2
# Secret Labs' Regular Expression Engine
3
#
4
# re-compatible interface for the sre matching engine
5
#
6
# Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
7
#
8
# This version of the SRE library can be redistributed under CNRI's
9
# Python 1.6 license.  For any other use, please contact Secret Labs
10
# AB (info@pythonware.com).
11
#
12
# Portions of this engine have been developed in cooperation with
13
# CNRI.  Hewlett-Packard provided funding for 1.6 integration and
14
# other compatibility work.
15
#
16
17
import sre_compile
18
import sre_parse
19
20
# public symbols
21
__all__ = [ "match", "search", "sub", "subn", "split", "findall",
22
    "compile", "purge", "template", "escape", "I", "L", "M", "S", "X",
23
    "U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
24
    "UNICODE", "error" ]
25
26
__version__ = "2.1b2"
27
28
# this module works under 1.5.2 and later.  don't use string methods
29
import string
30
31
# flags
32
I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case
33
L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale
34
U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale
35
M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline
36
S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline
37
X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments
38
39
# sre extensions (experimental, don't rely on these)
40
T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE # disable backtracking
41
DEBUG = sre_compile.SRE_FLAG_DEBUG # dump pattern after compilation
42
43
# sre exception
44
error = sre_compile.error
45
46
# --------------------------------------------------------------------
47
# public interface
48
49
def match(pattern, string, flags=0):
50
    """Try to apply the pattern at the start of the string, returning
51
    a match object, or None if no match was found."""
52
    return _compile(pattern, flags).match(string)
53
54
def search(pattern, string, flags=0):
55
    """Scan through string looking for a match to the pattern, returning
56
    a match object, or None if no match was found."""
57
    return _compile(pattern, flags).search(string)
58
59
def sub(pattern, repl, string, count=0):
60
    """Return the string obtained by replacing the leftmost
61
    non-overlapping occurrences of the pattern in string by the
62
    replacement repl"""
63
    return _compile(pattern, 0).sub(repl, string, count)
64
65
def subn(pattern, repl, string, count=0):
66
    """Return a 2-tuple containing (new_string, number).
67
    new_string is the string obtained by replacing the leftmost
68
    non-overlapping occurrences of the pattern in the source
69
    string by the replacement repl.  number is the number of
70
    substitutions that were made."""
71
    return _compile(pattern, 0).subn(repl, string, count)
72
73
def split(pattern, string, maxsplit=0):
74
    """Split the source string by the occurrences of the pattern,
75
    returning a list containing the resulting substrings."""
76
    return _compile(pattern, 0).split(string, maxsplit)
77
78
def findall(pattern, string, maxsplit=0):
79
    """Return a list of all non-overlapping matches in the string.
80

81
    If one or more groups are present in the pattern, return a
82
    list of groups; this will be a list of tuples if the pattern
83
    has more than one group.
84

85
    Empty matches are included in the result."""
86
    return _compile(pattern, 0).findall(string, maxsplit)
87
88
def compile(pattern, flags=0):
89
    "Compile a regular expression pattern, returning a pattern object."
90
    return _compile(pattern, flags)
91
92
def purge():
93
    "Clear the regular expression cache"
94
    _cache.clear()
95
    _cache_repl.clear()
96
97
def template(pattern, flags=0):
98
    "Compile a template pattern, returning a pattern object"
99
    return _compile(pattern, flags|T)
100
101
def escape(pattern):
102
    "Escape all non-alphanumeric characters in pattern."
103
    s = list(pattern)
104
    for i in range(len(pattern)):
105
        c = pattern[i]
106
        if not ("a" <= c <= "z" or "A" <= c <= "Z" or "0" <= c <= "9"):
107
            if c == "\000":
108
                s[i] = "\\000"
109
            else:
110
                s[i] = "\\" + c
111
    return _join(s, pattern)
112
113
# --------------------------------------------------------------------
114
# internals
115
116
_cache = {}
117
_cache_repl = {}
118
119
_MAXCACHE = 100
120
121
def _join(seq, sep):
122
    # internal: join into string having the same type as sep
123
    return string.join(seq, sep[:0])
124
125
def _compile(*key):
126
    # internal: compile pattern
127
    p = _cache.get(key)
128
    if p is not None:
129
        return p
130
    pattern, flags = key
131
    if type(pattern) not in sre_compile.STRING_TYPES:
132
        return pattern
133
    try:
134
        p = sre_compile.compile(pattern, flags)
135
    except error, v:
136
        raise error, v # invalid expression
137
    if len(_cache) >= _MAXCACHE:
138
        _cache.clear()
139
    _cache[key] = p
140
    return p
141
142
def _compile_repl(*key):
143
    # internal: compile replacement pattern
144
    p = _cache_repl.get(key)
145
    if p is not None:
146
        return p
147
    repl, pattern = key
148
    try:
149
        p = sre_parse.parse_template(repl, pattern)
150
    except error, v:
151
        raise error, v # invalid expression
152
    if len(_cache_repl) >= _MAXCACHE:
153
        _cache_repl.clear()
154
    _cache_repl[key] = p
155
    return p
156
157
def _expand(pattern, match, template):
158
    # internal: match.expand implementation hook
159
    template = sre_parse.parse_template(template, pattern)
160
    return sre_parse.expand_template(template, match)
161
162
def _sub(pattern, template, string, count=0):
163
    # internal: pattern.sub implementation hook
164
    return _subn(pattern, template, string, count)[0]
165
166
def _subn(pattern, template, string, count=0):
167
    # internal: pattern.subn implementation hook
168
    if callable(template):
169
        filter = template
170
    else:
171
        template = _compile_repl(template, pattern)
172
        def filter(match, template=template):
173
            return sre_parse.expand_template(template, match)
174
    n = i = 0
175
    s = []
176
    append = s.append
177
    c = pattern.scanner(string)
178
    while not count or n < count:
179
        m = c.search()
180
        if not m:
181
            break
182
        b, e = m.span()
183
        if i < b:
184
            append(string[i:b])
185
        append(filter(m))
186
        i = e
187
        n = n + 1
188
    append(string[i:])
189
    return _join(s, string[:0]), n
190
191
def _split(pattern, string, maxsplit=0):
192
    # internal: pattern.split implementation hook
193
    n = i = 0
194
    s = []
195
    append = s.append
196
    extend = s.extend
197
    c = pattern.scanner(string)
198
    g = pattern.groups
199
    while not maxsplit or n < maxsplit:
200
        m = c.search()
201
        if not m:
202
            break
203
        b, e = m.span()
204
        if b == e:
205
            if i >= len(string):
206
                break
207
            continue
208
        append(string[i:b])
209
        if g and b != e:
210
            extend(list(m.groups()))
211
        i = e
212
        n = n + 1
213
    append(string[i:])
214
    return s
215
216
# register myself for pickling
217
218
import copy_reg
219
220
def _pickle(p):
221
    return _compile, (p.pattern, p.flags)
222
223
copy_reg.pickle(type(_compile("", 0)), _pickle, _compile)
224
225
# --------------------------------------------------------------------
226
# experimental stuff (see python-dev discussions for details)
227
228
class Scanner:
229
    def __init__(self, lexicon):
230
        from sre_constants import BRANCH, SUBPATTERN
231
        self.lexicon = lexicon
232
        # combine phrases into a compound pattern
233
        p = []
234
        s = sre_parse.Pattern()
235
        for phrase, action in lexicon:
236
            p.append(sre_parse.SubPattern(s, [
237
                (SUBPATTERN, (len(p), sre_parse.parse(phrase))),
238
                ]))
239
        p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
240
        s.groups = len(p)
241
        self.scanner = sre_compile.compile(p)
242
    def scan(self, string):
243
        result = []
244
        append = result.append
245
        match = self.scanner.match
246
        i = 0
247
        while 1:
248
            m = match(string, i)
249
            if not m:
250
                break
251
            j = m.end()
252
            if i == j:
253
                break
254
            action = self.lexicon[m.lastindex][1]
255
            if callable(action):
256
                self.match = m
257
                action = action(self, m.group())
258
            if action is not None:
259
                append(action)
260
            i = j
261
        return result, string[i:]