svn-gvsig-desktop / tags / v1_1_Build_1013 / extensions / extScripting / scripts / jython / Lib / reconvert.py @ 13521
History | View | Annotate | Download (5.09 KB)
1 |
#! /usr/bin/env python1.5
|
---|---|
2 |
|
3 |
r"""Convert old ("regex") regular expressions to new syntax ("re").
|
4 |
|
5 |
When imported as a module, there are two functions, with their own
|
6 |
strings:
|
7 |
|
8 |
convert(s, syntax=None) -- convert a regex regular expression to re syntax
|
9 |
|
10 |
quote(s) -- return a quoted string literal
|
11 |
|
12 |
When used as a script, read a Python string literal (or any other
|
13 |
expression evaluating to a string) from stdin, and write the
|
14 |
translated expression to stdout as a string literal. Unless stdout is
|
15 |
a tty, no trailing \n is written to stdout. This is done so that it
|
16 |
can be used with Emacs C-U M-| (shell-command-on-region with argument
|
17 |
which filters the region through the shell command).
|
18 |
|
19 |
No attempt has been made at coding for performance.
|
20 |
|
21 |
Translation table...
|
22 |
|
23 |
\( ( (unless RE_NO_BK_PARENS set)
|
24 |
\) ) (unless RE_NO_BK_PARENS set)
|
25 |
\| | (unless RE_NO_BK_VBAR set)
|
26 |
\< \b (not quite the same, but alla...)
|
27 |
\> \b (not quite the same, but alla...)
|
28 |
\` \A
|
29 |
\' \Z
|
30 |
|
31 |
Not translated...
|
32 |
|
33 |
.
|
34 |
^
|
35 |
$
|
36 |
*
|
37 |
+ (unless RE_BK_PLUS_QM set, then to \+)
|
38 |
? (unless RE_BK_PLUS_QM set, then to \?)
|
39 |
\ |
40 |
\b
|
41 |
\B
|
42 |
\w
|
43 |
\W
|
44 |
\1 ... \9 |
45 |
|
46 |
Special cases... |
47 |
|
48 |
Non-printable characters are always replaced by their 3-digit
|
49 |
escape code (except \t, \n, \r, which use mnemonic escapes) |
50 |
|
51 |
Newline is turned into | when RE_NEWLINE_OR is set |
52 |
|
53 |
XXX To be done... |
54 |
|
55 |
[...] (different treatment of backslashed items?)
|
56 |
[^...] (different treatment of backslashed items?)
|
57 |
^ $ * + ? (in some error contexts these are probably treated differently) |
58 |
\vDD \DD (in the regex docs but only works when RE_ANSI_HEX set) |
59 |
|
60 |
"""
|
61 |
|
62 |
|
63 |
import regex
|
64 |
from regex_syntax import * # RE_*
|
65 |
|
66 |
__all__ = ["convert","quote"]
|
67 |
|
68 |
# Default translation table
|
69 |
mastertable = {
|
70 |
r'\<': r'\b',
|
71 |
r'\>': r'\b',
|
72 |
r'\`': r'\A',
|
73 |
r'\'': r'\Z',
|
74 |
r'\(': '(',
|
75 |
r'\)': ')',
|
76 |
r'\|': '|',
|
77 |
'(': r'\(',
|
78 |
')': r'\)',
|
79 |
'|': r'\|',
|
80 |
'\t': r'\t',
|
81 |
'\n': r'\n',
|
82 |
'\r': r'\r',
|
83 |
}
|
84 |
|
85 |
|
86 |
def convert(s, syntax=None):
|
87 |
"""Convert a regex regular expression to re syntax.
|
88 |
|
89 |
The first argument is the regular expression, as a string object, |
90 |
just like it would be passed to regex.compile(). (I.e., pass the
|
91 |
actual string object -- string quotes must already have been
|
92 |
removed and the standard escape processing has already been done,
|
93 |
e.g. by eval().)
|
94 |
|
95 |
The optional second argument is the regex syntax variant to be
|
96 |
used. This is an integer mask as passed to regex.set_syntax(); |
97 |
the flag bits are defined in regex_syntax. When not specified, or |
98 |
when None is given, the current regex syntax mask (as retrieved by |
99 |
regex.get_syntax()) is used -- which is 0 by default. |
100 |
|
101 |
The return value is a regular expression, as a string object that |
102 |
could be passed to re.compile(). (I.e., no string quotes have |
103 |
been added -- use quote() below, or repr().) |
104 |
|
105 |
The conversion is not always guaranteed to be correct. More |
106 |
syntactical analysis should be performed to detect borderline |
107 |
cases and decide what to do with them. For example, 'x*?' is not |
108 |
translated correctly. |
109 |
|
110 |
"""
|
111 |
table = mastertable.copy()
|
112 |
if syntax is None:
|
113 |
syntax = regex.get_syntax()
|
114 |
if syntax & RE_NO_BK_PARENS:
|
115 |
del table[r'\('], table[r'\)']
|
116 |
del table['('], table[')']
|
117 |
if syntax & RE_NO_BK_VBAR:
|
118 |
del table[r'\|']
|
119 |
del table['|']
|
120 |
if syntax & RE_BK_PLUS_QM:
|
121 |
table['+'] = r'\+'
|
122 |
table['?'] = r'\?'
|
123 |
table[r'\+'] = '+'
|
124 |
table[r'\?'] = '?'
|
125 |
if syntax & RE_NEWLINE_OR:
|
126 |
table['\n'] = '|'
|
127 |
res = ""
|
128 |
|
129 |
i = 0
|
130 |
end = len(s)
|
131 |
while i < end:
|
132 |
c = s[i]
|
133 |
i = i+1
|
134 |
if c == '\\':
|
135 |
c = s[i]
|
136 |
i = i+1
|
137 |
key = '\\' + c
|
138 |
key = table.get(key, key)
|
139 |
res = res + key
|
140 |
else:
|
141 |
c = table.get(c, c)
|
142 |
res = res + c
|
143 |
return res
|
144 |
|
145 |
|
146 |
def quote(s, quote=None):
|
147 |
"""Convert a string object to a quoted string literal. |
148 |
|
149 |
This is similar to repr() but will return a "raw" string (r'...' |
150 |
or r"...") when the string contains backslashes, instead of |
151 |
doubling all backslashes. The resulting string does *not* always |
152 |
evaluate to the same string as the original; however it will do
|
153 |
just the right thing when passed into re.compile(). |
154 |
|
155 |
The optional second argument forces the string quote; it must be |
156 |
a single character which is a valid Python string quote.
|
157 |
|
158 |
"""
|
159 |
if quote is None:
|
160 |
q = "'"
|
161 |
altq = "'"
|
162 |
if q in s and altq not in s:
|
163 |
q = altq
|
164 |
else:
|
165 |
assert quote in ('"', "'")
|
166 |
q = quote
|
167 |
res = q
|
168 |
for c in s:
|
169 |
if c == q: c = '\\' + c
|
170 |
elif c < ' ' or c > '~': c = "\\%03o" % ord(c)
|
171 |
res = res + c
|
172 |
res = res + q
|
173 |
if '\\' in res:
|
174 |
res = 'r' + res
|
175 |
return res
|
176 |
|
177 |
|
178 |
def main():
|
179 |
"""Main program -- called when run as a script.""" |
180 |
import sys
|
181 |
s = eval(sys.stdin.read())
|
182 |
sys.stdout.write(quote(convert(s)))
|
183 |
if sys.stdout.isatty():
|
184 |
sys.stdout.write("\n")
|
185 |
|
186 |
|
187 |
if __name__ == '__main__':
|
188 |
main()
|
189 |
|