Statistics
| Revision:

gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / cssutils / tests / test_tokenize2.py @ 475

History | View | Annotate | Download (32.9 KB)

1
# -*- coding: utf-8 -*-
2
"""Testcases for new cssutils.tokenize.Tokenizer
3

4
TODO: old tests as new ones are **not complete**!
5
"""
6

    
7
import sys
8
import xml.dom
9
import basetest
10
from cssutils.tokenize2 import *
11

    
12
class TokenizerTestCase(basetest.BaseTestCase):
13

    
14
    testsall = {
15
        # IDENT
16
        u'äöü߀': [('IDENT', u'äöü߀', 1, 1)],
17
        u' a ': [('S', u' ', 1, 1),
18
                 ('IDENT', u'a', 1, 2),
19
                 ('S', u' ', 1, 3)],
20
        u'_a': [('IDENT', u'_a', 1, 1)],
21
        u'-a': [('IDENT', u'-a', 1, 1)],
22
        u'aA-_\200\377': [('IDENT', u'aA-_\200\377', 1, 1)],
23
        u'a1': [('IDENT', u'a1', 1, 1)],
24
        # escapes must end with S or max 6 digits:
25
        u'\\44 b': [('IDENT', u'Db', 1, 1)],
26
        u'\\44  b': [('IDENT', u'D', 1, 1),
27
                     ('S', u' ', 1, 5),
28
                     ('IDENT', u'b', 1, 6)],
29
        u'\\44\nb': [('IDENT', u'Db', 1, 1)],
30
        u'\\44\rb': [('IDENT', u'Db', 1, 1)],
31
        u'\\44\fb': [('IDENT', u'Db', 1, 1)],
32
        u'\\44\n*': [('IDENT', u'D', 1, 1),
33
                    ('CHAR', u'*', 2, 1)],
34
        u'\\44  a': [('IDENT', u'D', 1, 1),
35
                    ('S', u' ', 1, 5),
36
                    ('IDENT', u'a', 1, 6)],
37
        # TODO:
38
        # Note that this means that a "real" space after the escape sequence
39
        # must itself either be escaped or doubled:
40
        u'\\44\ x': [('IDENT', u'D\\ x', 1, 1)],
41
        u'\\44  ': [('IDENT', u'D', 1, 1),
42
                     ('S', u' ', 1, 5)],
43

    
44
        ur'\44': [('IDENT', u'D', 1, 1)],
45
        ur'\\': [('IDENT', ur'\\', 1, 1)],
46
        ur'\{': [('IDENT', ur'\{', 1, 1)],
47
        ur'\"': [('IDENT', ur'\"', 1, 1)],
48
        ur'\(': [('IDENT', ur'\(', 1, 1)],
49
        ur'\1 \22 \333 \4444 \55555 \666666 \777777 7 \7777777':
50
            [(
51
                ('IDENT', u'\x01"\u0333\u4444\U00055555\\666666 \\777777 7', 1, 1)
52
                if sys.maxunicode > 0x10000 else
53
                ('IDENT', u'\x01"\u0333\u4444\\55555 \\666666 \\777777 7', 1, 1)
54
            ),
55
            ('S', ' ', 1, 43),
56
            ('IDENT', '\\7777777', 1, 44)
57
        ],
58

    
59

    
60
        u'\\1 b': [('IDENT', u'\x01b', 1, 1)],
61
        u'\\44 b': [('IDENT', u'Db', 1, 1)],
62
        u'\\123 b': [('IDENT', u'\u0123b', 1, 1)],
63
        u'\\1234 b': [('IDENT', u'\u1234b', 1, 1)],
64
        u'\\12345 b':
65
            [(
66
                ('IDENT', u'\U00012345b', 1, 1)
67
                if sys.maxunicode > 0x10000 else
68
                ('IDENT', u'\\12345 b', 1, 1)
69
            )],
70
        u'\\123456 b': [('IDENT', u'\\123456 b', 1, 1)],
71
        u'\\1234567 b': [('IDENT', u'\\1234567', 1, 1),
72
                         ('S', u' ', 1, 9),
73
                         ('IDENT', u'b', 1, 10)],
74
        u'\\{\\}\\(\\)\\[\\]\\#\\@\\.\\,':
75
            [('IDENT', u'\\{\\}\\(\\)\\[\\]\\#\\@\\.\\,', 1, 1)],
76

    
77
        # STRING
78
        u' "" ': [('S', u' ', 1, 1),
79
                 ('STRING', u'""', 1, 2),
80
                 ('S', u' ', 1, 4)],
81
        u' "\'" ': [('S', u' ', 1, 1),
82
                 ('STRING', u'"\'"', 1, 2),
83
                 ('S', u' ', 1, 5)],
84
        u" '' ": [('S', u' ', 1, 1),
85
                 ('STRING', u"''", 1, 2),
86
                 ('S', u' ', 1, 4)],
87
        u" '' ": [('S', u' ', 1, 1),
88
                 ('STRING', u"''", 1, 2),
89
                 ('S', u' ', 1, 4)],
90
        # until 0.9.5.x
91
        #u"'\\\n'": [('STRING', u"'\\\n'", 1, 1)],
92
        #u"'\\\n\\\n\\\n'": [('STRING', u"'\\\n\\\n\\\n'", 1, 1)],
93
        #u"'\\\f'": [('STRING', u"'\\\f'", 1, 1)],
94
        #u"'\\\r'": [('STRING', u"'\\\r'", 1, 1)],
95
        #u"'\\\r\n'": [('STRING', u"'\\\r\n'", 1, 1)],
96
        #u"'1\\\n2'": [('STRING', u"'1\\\n2'", 1, 1)],
97
        # from 0.9.6a0 escaped nl is removed from string
98
        u"'\\\n'": [('STRING', u"''", 1, 1)],
99
        u"'\\\n\\\n\\\n'": [('STRING', u"''", 1, 1)],
100
        u"'\\\f'": [('STRING', u"''", 1, 1)],
101
        u"'\\\r'": [('STRING', u"''", 1, 1)],
102
        u"'1\\\n2'": [('STRING', u"'12'", 1, 1)],
103
        u"'1\\\r\n2'": [('STRING', u"'12'", 1, 1)],
104
        #ur'"\0020|\0020"': [('STRING', u'"\\0020|\\0020"', 1, 1)],
105
        ur'"\61|\0061"': [('STRING', u'"a|a"', 1, 1)],
106

    
107
        # HASH
108
        u' #a ': [('S', u' ', 1, 1),
109
                 ('HASH', u'#a', 1, 2),
110
                 ('S', u' ', 1, 4)],
111

    
112
        u'#ccc': [('HASH', u'#ccc', 1, 1)],
113
        u'#111': [('HASH', u'#111', 1, 1)],
114
        u'#a1a1a1': [('HASH', u'#a1a1a1', 1, 1)],
115
        u'#1a1a1a': [('HASH', u'#1a1a1a', 1, 1)],
116

    
117
        # NUMBER, for plus see CSS3
118
        u' 0 ': [('S', u' ', 1, 1),
119
                 ('NUMBER', u'0', 1, 2),
120
                 ('S', u' ', 1, 3)],
121
        u' 0.1 ': [('S', u' ', 1, 1),
122
                 ('NUMBER', u'0.1', 1, 2),
123
                 ('S', u' ', 1, 5)],
124
        u' .0 ': [('S', u' ', 1, 1),
125
                 ('NUMBER', u'.0', 1, 2),
126
                 ('S', u' ', 1, 4)],
127

    
128
        u' -0 ': [('S', u' ', 1, 1),
129
                 #('CHAR', u'-', 1, 2),
130
                 #('NUMBER', u'0', 1, 3),
131
                 ('NUMBER', u'-0', 1, 2),
132
                 ('S', u' ', 1, 4)],
133

    
134
        # PERCENTAGE
135
        u' 0% ': [('S', u' ', 1, 1),
136
                 ('PERCENTAGE', u'0%', 1, 2),
137
                 ('S', u' ', 1, 4)],
138
        u' .5% ': [('S', u' ', 1, 1),
139
                 ('PERCENTAGE', u'.5%', 1, 2),
140
                 ('S', u' ', 1, 5)],
141

    
142
        # URI
143
        u' url() ': [('S', u' ', 1, 1),
144
                 ('URI', u'url()', 1, 2),
145
                 ('S', u' ', 1, 7)],
146
        u' url(a) ': [('S', u' ', 1, 1),
147
                 ('URI', u'url(a)', 1, 2),
148
                 ('S', u' ', 1, 8)],
149
        u' url("a") ': [('S', u' ', 1, 1),
150
                 ('URI', u'url("a")', 1, 2),
151
                 ('S', u' ', 1, 10)],
152
        u' url( a ) ': [('S', u' ', 1, 1),
153
                 ('URI', u'url( a )', 1, 2),
154
                 ('S', u' ', 1, 10)],
155

    
156
        # UNICODE-RANGE
157

    
158
        # CDO
159
        u' <!-- ': [('S', u' ', 1, 1),
160
                   ('CDO', u'<!--', 1, 2),
161
                   ('S', u' ', 1, 6)],
162
        u'"<!--""-->"': [('STRING', u'"<!--"', 1, 1),
163
                    ('STRING', u'"-->"', 1, 7)],
164

    
165
        # CDC
166
        u' --> ': [('S', u' ', 1, 1),
167
                  ('CDC', u'-->', 1, 2),
168
                  ('S', u' ', 1, 5)],
169

    
170
        # S
171
        u' ': [('S', u' ', 1, 1)],
172
        u'  ': [('S', u'  ', 1, 1)],
173
        u'\r': [('S', u'\r', 1, 1)],
174
        u'\n': [('S', u'\n', 1, 1)],
175
        u'\r\n': [('S', u'\r\n', 1, 1)],
176
        u'\f': [('S', u'\f', 1, 1)],
177
        u'\r': [('S', u'\r', 1, 1)],
178
        u'\t': [('S', u'\t', 1, 1)],
179
        u'\r\n\r\n\f\t ': [('S', u'\r\n\r\n\f\t ', 1, 1)],
180

    
181
        # COMMENT, for incomplete see later
182
        u'/*x*/ ': [('COMMENT', u'/*x*/', 1, 1),
183
                    ('S', u' ', 1, 6)],
184

    
185
        # FUNCTION
186
        u' x( ': [('S', u' ', 1, 1),
187
                  ('FUNCTION', u'x(', 1, 2),
188
                  ('S', u' ', 1, 4)],
189

    
190
        # INCLUDES
191
        u' ~= ': [('S', u' ', 1, 1),
192
                  ('INCLUDES', u'~=', 1, 2),
193
                  ('S', u' ', 1, 4)],
194
        u'~==': [('INCLUDES', u'~=', 1, 1), ('CHAR', u'=', 1, 3)],
195

    
196
        # DASHMATCH
197
        u' |= ': [('S', u' ', 1, 1),
198
                  ('DASHMATCH', u'|=', 1, 2),
199
                  ('S', u' ', 1, 4)],
200
        u'|==': [('DASHMATCH', u'|=', 1, 1), ('CHAR', u'=', 1, 3)],
201

    
202
        # CHAR
203
        u' @ ': [('S', u' ', 1, 1),
204
                  ('CHAR', u'@', 1, 2),
205
                  ('S', u' ', 1, 3)],
206

    
207
        # --- overwritten for CSS 2.1 ---
208
        # LBRACE
209
        u' { ': [('S', u' ', 1, 1),
210
                 ('CHAR', u'{', 1, 2),
211
                 ('S', u' ', 1, 3)],
212
        # PLUS
213
        u' + ': [('S', u' ', 1, 1),
214
                 ('CHAR', u'+', 1, 2),
215
                 ('S', u' ', 1, 3)],
216
        # GREATER
217
        u' > ': [('S', u' ', 1, 1),
218
                 ('CHAR', u'>', 1, 2),
219
                 ('S', u' ', 1, 3)],
220
        # COMMA
221
        u' , ': [('S', u' ', 1, 1),
222
                 ('CHAR', u',', 1, 2),
223
                 ('S', u' ', 1, 3)],
224
        # class
225
        u' . ': [('S', u' ', 1, 1),
226
                  ('CHAR', u'.', 1, 2),
227
                  ('S', u' ', 1, 3)],
228
        }
229

    
230
    tests3 = {
231
        # UNICODE-RANGE
232
        u' u+0 ': [('S', u' ', 1, 1),
233
                  ('UNICODE-RANGE', u'u+0', 1, 2),
234
                  ('S', u' ', 1, 5)],
235
        u' u+01 ': [('S', u' ', 1, 1),
236
                  ('UNICODE-RANGE', u'u+01', 1, 2),
237
                  ('S', u' ', 1, 6)],
238
        u' u+012 ': [('S', u' ', 1, 1),
239
                  ('UNICODE-RANGE', u'u+012', 1, 2),
240
                  ('S', u' ', 1, 7)],
241
        u' u+0123 ': [('S', u' ', 1, 1),
242
                  ('UNICODE-RANGE', u'u+0123', 1, 2),
243
                  ('S', u' ', 1, 8)],
244
        u' u+01234 ': [('S', u' ', 1, 1),
245
                  ('UNICODE-RANGE', u'u+01234', 1, 2),
246
                  ('S', u' ', 1, 9)],
247
        u' u+012345 ': [('S', u' ', 1, 1),
248
                  ('UNICODE-RANGE', u'u+012345', 1, 2),
249
                  ('S', u' ', 1, 10)],
250
        u' u+0123456 ': [('S', u' ', 1, 1),
251
                  ('UNICODE-RANGE', u'u+012345', 1, 2),
252
                  ('NUMBER', u'6', 1, 10),
253
                  ('S', u' ', 1, 11)],
254
        u' U+123456 ': [('S', u' ', 1, 1),
255
                  ('UNICODE-RANGE', u'U+123456', 1, 2),
256
                  ('S', u' ', 1, 10)],
257
        u' \\55+abcdef ': [('S', u' ', 1, 1),
258
                  ('UNICODE-RANGE', u'U+abcdef', 1, 2),
259
                  ('S', u' ', 1, 12)],
260
        u' \\75+abcdef ': [('S', u' ', 1, 1),
261
                  ('UNICODE-RANGE', u'u+abcdef', 1, 2),
262
                  ('S', u' ', 1, 12)],
263
        u' u+0-1 ': [('S', u' ', 1, 1),
264
                  ('UNICODE-RANGE', u'u+0-1', 1, 2),
265
                  ('S', u' ', 1, 7)],
266
        u' u+0-1, u+123456-abcdef ': [('S', u' ', 1, 1),
267
                  ('UNICODE-RANGE', u'u+0-1', 1, 2),
268
                  ('CHAR', u',', 1, 7),
269
                  ('S', u' ', 1, 8),
270
                  ('UNICODE-RANGE', u'u+123456-abcdef', 1, 9),
271
                  ('S', u' ', 1, 24)],
272

    
273
        # specials
274
        u'c\\olor': [('IDENT', u'c\\olor', 1, 1)],
275
        #u'-1': [('CHAR', u'-', 1, 1), ('NUMBER', u'1', 1, 2)],
276
        #u'-1px': [('CHAR', u'-', 1, 1), ('DIMENSION', u'1px', 1, 2)],
277
        u'-1': [('NUMBER', u'-1', 1, 1)],
278
        u'-1px': [('DIMENSION', u'-1px', 1, 1)],
279

    
280
        # ATKEYWORD
281
        u' @x ': [('S', u' ', 1, 1),
282
                  ('ATKEYWORD', u'@x', 1, 2),
283
                  ('S', u' ', 1, 4)],
284
        u'@X': [('ATKEYWORD', u'@X', 1, 1)],
285
        u'@\\x': [('ATKEYWORD', u'@\\x', 1, 1)],
286
        # -
287
        u'@1x': [('CHAR', u'@', 1, 1),
288
                  ('DIMENSION', u'1x', 1, 2)],
289

    
290
        # DIMENSION
291
        u' 0px ': [('S', u' ', 1, 1),
292
                 ('DIMENSION', u'0px', 1, 2),
293
                 ('S', u' ', 1, 5)],
294
        u' 1s ': [('S', u' ', 1, 1),
295
                 ('DIMENSION', u'1s', 1, 2),
296
                 ('S', u' ', 1, 4)],
297
        u'0.2EM': [('DIMENSION', u'0.2EM', 1, 1)],
298
        u'1p\\x': [('DIMENSION', u'1p\\x', 1, 1)],
299
        u'1PX': [('DIMENSION', u'1PX', 1, 1)],
300

    
301
        # NUMBER
302
        u' - 0 ': [('S', u' ', 1, 1),
303
                 ('CHAR', u'-', 1, 2),
304
                 ('S', u' ', 1, 3),
305
                 ('NUMBER', u'0', 1, 4),
306
                 ('S', u' ', 1, 5)],
307
        u' + 0 ': [('S', u' ', 1, 1),
308
                 ('CHAR', u'+', 1, 2),
309
                 ('S', u' ', 1, 3),
310
                 ('NUMBER', u'0', 1, 4),
311
                 ('S', u' ', 1, 5)],
312

    
313
        # PREFIXMATCH
314
        u' ^= ': [('S', u' ', 1, 1),
315
                  ('PREFIXMATCH', u'^=', 1, 2),
316
                  ('S', u' ', 1, 4)],
317
        u'^==': [('PREFIXMATCH', u'^=', 1, 1), ('CHAR', u'=', 1, 3)],
318

    
319
        # SUFFIXMATCH
320
        u' $= ': [('S', u' ', 1, 1),
321
                  ('SUFFIXMATCH', u'$=', 1, 2),
322
                  ('S', u' ', 1, 4)],
323
        u'$==': [('SUFFIXMATCH', u'$=', 1, 1), ('CHAR', u'=', 1, 3)],
324

    
325
        # SUBSTRINGMATCH
326
        u' *= ': [('S', u' ', 1, 1),
327
                  ('SUBSTRINGMATCH', u'*=', 1, 2),
328
                  ('S', u' ', 1, 4)],
329
        u'*==': [('SUBSTRINGMATCH', u'*=', 1, 1), ('CHAR', u'=', 1, 3)],
330

    
331
        # BOM only at start
332
#        u'\xFEFF ': [('BOM', u'\xfeFF', 1, 1),
333
#                  ('S', u' ', 1, 1)],
334
#        u' \xFEFF ': [('S', u' ', 1, 1),
335
#                  ('IDENT', u'\xfeFF', 1, 2),
336
#                  ('S', u' ', 1, 5)],
337
        u'\xfe\xff ': [('BOM', u'\xfe\xff', 1, 1),
338
                  ('S', u' ', 1, 1)],
339
        u' \xfe\xff ': [('S', u' ', 1, 1),
340
                  ('IDENT', u'\xfe\xff', 1, 2),
341
                  ('S', u' ', 1, 4)],
342
        u'\xef\xbb\xbf ': [('BOM', u'\xef\xbb\xbf', 1, 1),
343
                  ('S', u' ', 1, 1)],
344
        u' \xef\xbb\xbf ': [('S', u' ', 1, 1),
345
                  ('IDENT', u'\xef\xbb\xbf', 1, 2),
346
                  ('S', u' ', 1, 5)],        }
347

    
348
    tests2 = {
349
        # escapes work not for a-f!
350
        # IMPORT_SYM
351
        u' @import ': [('S', u' ', 1, 1),
352
                 ('IMPORT_SYM', u'@import', 1, 2),
353
                 ('S', u' ', 1, 9)],
354
        u'@IMPORT': [('IMPORT_SYM', u'@IMPORT', 1, 1)],
355
        u'@\\49\r\nMPORT': [('IMPORT_SYM', u'@\\49\r\nMPORT', 1, 1)],
356
        ur'@\i\m\p\o\r\t': [('IMPORT_SYM', ur'@\i\m\p\o\r\t', 1, 1)],
357
        ur'@\I\M\P\O\R\T': [('IMPORT_SYM', ur'@\I\M\P\O\R\T', 1, 1)],
358
        ur'@\49 \04d\0050\0004f\000052\54': [('IMPORT_SYM',
359
                                        ur'@\49 \04d\0050\0004f\000052\54',
360
                                        1, 1)],
361
        ur'@\69 \06d\0070\0006f\000072\74': [('IMPORT_SYM',
362
                                        ur'@\69 \06d\0070\0006f\000072\74',
363
                                        1, 1)],
364

    
365
        # PAGE_SYM
366
        u' @page ': [('S', u' ', 1, 1),
367
                 ('PAGE_SYM', u'@page', 1, 2),
368
                 ('S', u' ', 1, 7)],
369
        u'@PAGE': [('PAGE_SYM', u'@PAGE', 1, 1)],
370
        ur'@\pa\ge': [('PAGE_SYM', ur'@\pa\ge', 1, 1)],
371
        ur'@\PA\GE': [('PAGE_SYM', ur'@\PA\GE', 1, 1)],
372
        ur'@\50\41\47\45': [('PAGE_SYM', ur'@\50\41\47\45', 1, 1)],
373
        ur'@\70\61\67\65': [('PAGE_SYM', ur'@\70\61\67\65', 1, 1)],
374

    
375
        # MEDIA_SYM
376
        u' @media ': [('S', u' ', 1, 1),
377
                 ('MEDIA_SYM', u'@media', 1, 2),
378
                 ('S', u' ', 1, 8)],
379
        u'@MEDIA': [('MEDIA_SYM', u'@MEDIA', 1, 1)],
380
        ur'@\med\ia': [('MEDIA_SYM', ur'@\med\ia', 1, 1)],
381
        ur'@\MED\IA': [('MEDIA_SYM', ur'@\MED\IA', 1, 1)],
382
        u'@\\4d\n\\45\r\\44\t\\49\r\nA': [('MEDIA_SYM', u'@\\4d\n\\45\r\\44\t\\49\r\nA', 1, 1)],
383
        u'@\\4d\n\\45\r\\44\t\\49\r\\41\f': [('MEDIA_SYM',
384
                                        u'@\\4d\n\\45\r\\44\t\\49\r\\41\f',
385
                                        1, 1)],
386
        u'@\\6d\n\\65\r\\64\t\\69\r\\61\f': [('MEDIA_SYM',
387
                                        u'@\\6d\n\\65\r\\64\t\\69\r\\61\f',
388
                                        1, 1)],
389

    
390
        # FONT_FACE_SYM
391
        u' @font-face ': [('S', u' ', 1, 1),
392
                 ('FONT_FACE_SYM', u'@font-face', 1, 2),
393
                 ('S', u' ', 1, 12)],
394
        u'@FONT-FACE': [('FONT_FACE_SYM', u'@FONT-FACE', 1, 1)],
395
        ur'@f\o\n\t\-face': [('FONT_FACE_SYM', ur'@f\o\n\t\-face', 1, 1)],
396
        ur'@F\O\N\T\-FACE': [('FONT_FACE_SYM', ur'@F\O\N\T\-FACE', 1, 1)],
397
        # TODO: "-" as hex!
398
        ur'@\46\4f\4e\54\-\46\41\43\45': [('FONT_FACE_SYM',
399
            ur'@\46\4f\4e\54\-\46\41\43\45', 1, 1)],
400
        ur'@\66\6f\6e\74\-\66\61\63\65': [('FONT_FACE_SYM',
401
            ur'@\66\6f\6e\74\-\66\61\63\65', 1, 1)],
402

    
403
        # CHARSET_SYM only if "@charset "!
404
        u'@charset  ': [('CHARSET_SYM', u'@charset ', 1, 1),
405
                        ('S', u' ', 1, 10)],
406
        u' @charset  ': [('S', u' ', 1, 1),
407
                 ('CHARSET_SYM', u'@charset ', 1, 2), # not at start
408
                 ('S', u' ', 1, 11)],
409
        u'@charset': [('ATKEYWORD', u'@charset', 1, 1)], # no ending S
410
        u'@CHARSET ': [('ATKEYWORD', u'@CHARSET', 1, 1),# uppercase
411
                       ('S', u' ', 1, 9)],
412
        u'@cha\\rset ': [('ATKEYWORD', u'@cha\\rset', 1, 1), # not literal
413
                         ('S', u' ', 1, 10)],
414

    
415
        # NAMESPACE_SYM
416
        u' @namespace ': [('S', u' ', 1, 1),
417
                 ('NAMESPACE_SYM', u'@namespace', 1, 2),
418
                 ('S', u' ', 1, 12)],
419
        ur'@NAMESPACE': [('NAMESPACE_SYM', ur'@NAMESPACE', 1, 1)],
420
        ur'@\na\me\s\pace': [('NAMESPACE_SYM', ur'@\na\me\s\pace', 1, 1)],
421
        ur'@\NA\ME\S\PACE': [('NAMESPACE_SYM', ur'@\NA\ME\S\PACE', 1, 1)],
422
        ur'@\4e\41\4d\45\53\50\41\43\45': [('NAMESPACE_SYM',
423
            ur'@\4e\41\4d\45\53\50\41\43\45', 1, 1)],
424
        ur'@\6e\61\6d\65\73\70\61\63\65': [('NAMESPACE_SYM',
425
            ur'@\6e\61\6d\65\73\70\61\63\65', 1, 1)],
426

    
427
        # ATKEYWORD
428
        u' @unknown ': [('S', u' ', 1, 1),
429
                 ('ATKEYWORD', u'@unknown', 1, 2),
430
                 ('S', u' ', 1, 10)],
431

    
432
        # STRING
433
        # strings with linebreak in it
434
        u' "\\na"\na': [('S', u' ', 1, 1),
435
                   ('STRING', u'"\\na"', 1, 2),
436
                   ('S', u'\n', 1, 7),
437
                   ('IDENT', u'a', 2, 1)],
438
        u" '\\na'\na": [('S', u' ', 1, 1),
439
                   ('STRING', u"'\\na'", 1, 2),
440
                   ('S', u'\n', 1, 7),
441
                   ('IDENT', u'a', 2, 1)],
442
        u' "\\r\\n\\t\\n\\ra"a': [('S', u' ', 1, 1),
443
                   ('STRING', u'"\\r\\n\\t\\n\\ra"', 1, 2),
444
                   ('IDENT', u'a', 1, 15)],
445

    
446
        # IMPORTANT_SYM is not IDENT!!!
447
        u' !important ': [('S', u' ', 1, 1),
448
                ('CHAR', u'!', 1, 2),
449
                 ('IDENT', u'important', 1, 3),
450
                 ('S', u' ', 1, 12)],
451
        u'! /*1*/ important ': [
452
                ('CHAR', u'!', 1, 1),
453
                ('S', u' ', 1, 2),
454
                ('COMMENT', u'/*1*/', 1, 3),
455
                ('S', u' ', 1, 8),
456
                 ('IDENT', u'important', 1, 9),
457
                 ('S', u' ', 1, 18)],
458
        u'! important': [('CHAR', u'!', 1, 1),
459
                         ('S', u' ', 1, 2),
460
                         ('IDENT', u'important', 1, 3)],
461
        u'!\n\timportant': [('CHAR', u'!', 1, 1),
462
                            ('S', u'\n\t', 1, 2),
463
                            ('IDENT', u'important', 2, 2)],
464
        u'!IMPORTANT': [('CHAR', u'!', 1, 1),
465
                        ('IDENT', u'IMPORTANT', 1, 2)],
466
        ur'!\i\m\p\o\r\ta\n\t': [('CHAR', u'!', 1, 1),
467
                                 ('IDENT',
468
                                  ur'\i\m\p\o\r\ta\n\t', 1, 2)],
469
        ur'!\I\M\P\O\R\Ta\N\T': [('CHAR', u'!', 1, 1),
470
                                 ('IDENT',
471
                                  ur'\I\M\P\O\R\Ta\N\T', 1, 2)],
472
        ur'!\49\4d\50\4f\52\54\41\4e\54': [('CHAR', u'!', 1, 1),
473
                                           ('IDENT',
474
                                            ur'IMPORTANT',
475
                                            1, 2)],
476
        ur'!\69\6d\70\6f\72\74\61\6e\74': [('CHAR', u'!', 1, 1),
477
                                           ('IDENT',
478
                                            ur'important',
479
                                            1, 2)],
480
        }
481

    
482
    # overwriting tests in testsall
483
    tests2only = {
484
        # LBRACE
485
        u' { ': [('S', u' ', 1, 1),
486
                 ('LBRACE', u'{', 1, 2),
487
                 ('S', u' ', 1, 3)],
488
        # PLUS
489
        u' + ': [('S', u' ', 1, 1),
490
                 ('PLUS', u'+', 1, 2),
491
                 ('S', u' ', 1, 3)],
492
        # GREATER
493
        u' > ': [('S', u' ', 1, 1),
494
                 ('GREATER', u'>', 1, 2),
495
                 ('S', u' ', 1, 3)],
496
        # COMMA
497
        u' , ': [('S', u' ', 1, 1),
498
                 ('COMMA', u',', 1, 2),
499
                 ('S', u' ', 1, 3)],
500
        # class
501
        u' . ': [('S', u' ', 1, 1),
502
                 ('CLASS', u'.', 1, 2),
503
                 ('S', u' ', 1, 3)],
504
        }
505

    
506
    testsfullsheet = {
507
        # escape ends with explicit space but \r\n as single space
508
        u'\\65\r\nb': [('IDENT', u'eb', 1, 1)],
509

    
510
        # STRING
511
        ur'"\""': [('STRING', ur'"\""', 1, 1)],
512
        ur'"\" "': [('STRING', ur'"\" "', 1, 1)],
513
        u"""'\\''""": [('STRING', u"""'\\''""", 1, 1)],
514
        u'''"\\""''': [('STRING', u'''"\\""''', 1, 1)],
515
        u' "\na': [('S', u' ', 1, 1),
516
                   ('INVALID', u'"', 1, 2),
517
                   ('S', u'\n', 1, 3),
518
                   ('IDENT', u'a', 2, 1)],
519

    
520
        # strings with linebreak in it
521
        u' "\\na\na': [('S', u' ', 1, 1),
522
                   ('INVALID', u'"\\na', 1, 2),
523
                   ('S', u'\n', 1, 6),
524
                   ('IDENT', u'a', 2, 1)],
525
        u' "\\r\\n\\t\\n\\ra\na': [('S', u' ', 1, 1),
526
                   ('INVALID', u'"\\r\\n\\t\\n\\ra', 1, 2),
527
                   ('S', u'\n', 1, 14),
528
                   ('IDENT', u'a', 2, 1)],
529
        # URI
530
        u'ur\\l(a)': [('URI', u'ur\\l(a)', 1, 1)],
531
        u'url(a)': [('URI', u'url(a)', 1, 1)],
532
        u'\\55r\\4c(a)': [('URI', u'UrL(a)', 1, 1)],
533
        u'\\75r\\6c(a)': [('URI', u'url(a)', 1, 1)],
534
        u' url())': [('S', u' ', 1, 1),
535
                 ('URI', u'url()', 1, 2),
536
                 ('CHAR', u')', 1, 7)],
537
        u'url("x"))': [('URI', u'url("x")', 1, 1),
538
                       ('CHAR', u')', 1, 9)],
539
        u"url('x'))": [('URI', u"url('x')", 1, 1),
540
                       ('CHAR', u')', 1, 9)],
541
        }
542

    
543
    # tests if fullsheet=False is set on tokenizer
544
    testsfullsheetfalse = {
545
        # COMMENT incomplete
546
        u'/*': [('CHAR', u'/', 1, 1),
547
                ('CHAR', u'*', 1, 2)],
548

    
549
        # INVALID incomplete
550
        u' " ': [('S', u' ', 1, 1),
551
                 ('INVALID', u'" ', 1, 2)],
552
        u" 'abc\"with quote\" in it": [('S', u' ', 1, 1),
553
                 ('INVALID', u"'abc\"with quote\" in it", 1, 2)],
554

    
555
        # URI incomplete
556
        u'url(a': [('FUNCTION', u'url(', 1, 1),
557
                   ('IDENT', u'a', 1, 5)],
558
        u'url("a': [('FUNCTION', u'url(', 1, 1),
559
                   ('INVALID', u'"a', 1, 5)],
560
        u"url('a": [('FUNCTION', u'url(', 1, 1),
561
                   ('INVALID', u"'a", 1, 5)],
562
        u"UR\\l('a": [('FUNCTION', u'UR\\l(', 1, 1),
563
                   ('INVALID', u"'a", 1, 6)],
564
        }
565

    
566
    # tests if fullsheet=True is set on tokenizer
567
    testsfullsheettrue = {
568
        # COMMENT incomplete
569
        u'/*': [('COMMENT', u'/**/', 1, 1)],
570

    
571
#        # INVALID incomplete => STRING
572
        u' " ': [('S', u' ', 1, 1),
573
                 ('STRING', u'" "', 1, 2)],
574
        u" 'abc\"with quote\" in it": [('S', u' ', 1, 1),
575
                 ('STRING', u"'abc\"with quote\" in it'", 1, 2)],
576

    
577
        # URI incomplete FUNC => URI
578
        u'url(a': [('URI', u'url(a)', 1, 1)],
579
        u'url( a': [('URI', u'url( a)', 1, 1)],
580
        u'url("a': [('URI', u'url("a")', 1, 1)],
581
        u'url( "a ': [('URI', u'url( "a ")', 1, 1)],
582
        u"url('a": [('URI', u"url('a')", 1, 1)],
583
        u'url("a"': [('URI', u'url("a")', 1, 1)],
584
        u"url('a'": [('URI', u"url('a')", 1, 1)],
585
        }
586

    
587
    def setUp(self):
588
        #log = cssutils.errorhandler.ErrorHandler()
589
        self.tokenizer = Tokenizer()
590

    
591
#    NOT USED
592
#    def test_push(self):
593
#        "Tokenizer.push()"
594
#        r = []
595
#        def do():
596
#            T = Tokenizer()
597
#            x = False
598
#            for t in T.tokenize('1 x 2 3'):
599
#                if not x and t[1] == 'x':
600
#                    T.push(t)
601
#                    x = True
602
#                r.append(t[1])
603
#            return ''.join(r)
604
#
605
#        # push reinserts token into token stream, so x is doubled
606
#        self.assertEqual('1 xx 2 3', do())
607

    
608
#    def test_linenumbers(self):
609
#        "Tokenizer line + col"
610
#        pass
611

    
612
    def test_tokenize(self):
613
        "cssutils Tokenizer().tokenize()"
614
        import cssutils.cssproductions
615
        tokenizer = Tokenizer(cssutils.cssproductions.MACROS,
616
                              cssutils.cssproductions.PRODUCTIONS)
617
        tests = {}
618
        tests.update(self.testsall)
619
        tests.update(self.tests2)
620
        tests.update(self.tests3)
621
        tests.update(self.testsfullsheet)
622
        tests.update(self.testsfullsheetfalse)
623
        for css in tests:
624
            # check token format
625
            tokens = tokenizer.tokenize(css)
626
            for i, actual in enumerate(tokens):
627
                expected = tests[css][i]
628
                self.assertEqual(expected, actual)
629

    
630
            # check if all same number of tokens
631
            tokens = list(tokenizer.tokenize(css))
632
            self.assertEqual(len(tokens), len(tests[css]))
633

    
634
    def test_tokenizefullsheet(self):
635
        "cssutils Tokenizer().tokenize(fullsheet=True)"
636
        import cssutils.cssproductions
637
        tokenizer = Tokenizer(cssutils.cssproductions.MACROS,
638
                              cssutils.cssproductions.PRODUCTIONS)
639
        tests = {}
640
        tests.update(self.testsall)
641
        tests.update(self.tests2)
642
        tests.update(self.tests3)
643
        tests.update(self.testsfullsheet)
644
        tests.update(self.testsfullsheettrue)
645
        for css in tests:
646
            # check token format
647
            tokens = tokenizer.tokenize(css, fullsheet=True)
648
            for i, actual in enumerate(tokens):
649
                try:
650
                    expected = tests[css][i]
651
                except IndexError:
652
                    # EOF is added
653
                    self.assertEqual(actual[0], 'EOF')
654
                else:
655
                    self.assertEqual(expected, actual)
656

    
657
            # check if all same number of tokens
658
            tokens = list(tokenizer.tokenize(css, fullsheet=True))
659
            # EOF is added so -1
660
            self.assertEqual(len(tokens) - 1, len(tests[css]))
661

    
662

    
663
    # --------------
664

    
665
    def __old(self):
666

    
667
        testsOLD = {
668
            u'x x1 -x .-x #_x -': [(1, 1, tt.IDENT, u'x'),
669
               (1, 2, 'S', u' '),
670
               (1, 3, tt.IDENT, u'x1'),
671
               (1, 5, 'S', u' '),
672
               (1, 6, tt.IDENT, u'-x'),
673
               (1, 8, 'S', u' '),
674
               (1, 9, tt.CLASS, u'.'),
675
               (1, 10, tt.IDENT, u'-x'),
676
               (1, 12, 'S', u' '),
677
               (1, 13, tt.HASH, u'#_x'),
678
               (1, 16, 'S', u' '),
679
               (1, 17, 'DELIM', u'-')],
680

    
681
            # num
682
            u'1 1.1 -1 -1.1 .1 -.1 1.': [(1, 1, tt.NUMBER, u'1'),
683
               (1, 2, 'S', u' '), (1, 3, tt.NUMBER, u'1.1'),
684
               (1, 6, 'S', u' '), (1, 7, tt.NUMBER, u'-1'),
685
               (1, 9, 'S', u' '), (1, 10, tt.NUMBER, u'-1.1'),
686
               (1, 14, 'S', u' '), (1, 15, tt.NUMBER, u'0.1'),
687
               (1, 17, 'S', u' '), (1, 18, tt.NUMBER, u'-0.1'),
688
               (1, 21, 'S', u' '),
689
               (1, 22, tt.NUMBER, u'1'), (1, 23, tt.CLASS, u'.')
690
                                         ],
691
            # CSS3 pseudo
692
            u'::': [(1, 1, tt.PSEUDO_ELEMENT, u'::')],
693

    
694
            # SPECIALS
695
            u'*+>~{},': [(1, 1, tt.UNIVERSAL, u'*'),
696
               (1, 2, tt.PLUS, u'+'),
697
               (1, 3, tt.GREATER, u'>'),
698
               (1, 4, tt.TILDE, u'~'),
699
               (1, 5, tt.LBRACE, u'{'),
700
               (1, 6, tt.RBRACE, u'}'),
701
               (1, 7, tt.COMMA, u',')],
702

    
703
            # DELIM
704
            u'!%:&$|': [(1, 1, 'DELIM', u'!'),
705
               (1, 2, 'DELIM', u'%'),
706
               (1, 3, 'DELIM', u':'),
707
               (1, 4, 'DELIM', u'&'),
708
               (1, 5, 'DELIM', u'$'),
709
               (1, 6, 'DELIM', u'|')],
710

    
711

    
712
            # DIMENSION
713
            u'5em': [(1, 1, tt.DIMENSION, u'5em')],
714
            u' 5em': [(1, 1, 'S', u' '), (1, 2, tt.DIMENSION, u'5em')],
715
            u'5em ': [(1, 1, tt.DIMENSION, u'5em'), (1, 4, 'S', u' ')],
716

    
717
            u'-5em': [(1, 1, tt.DIMENSION, u'-5em')],
718
            u' -5em': [(1, 1, 'S', u' '), (1, 2, tt.DIMENSION, u'-5em')],
719
            u'-5em ': [(1, 1, tt.DIMENSION, u'-5em'), (1, 5, 'S', u' ')],
720

    
721
            u'.5em': [(1, 1, tt.DIMENSION, u'0.5em')],
722
            u' .5em': [(1, 1, 'S', u' '), (1, 2, tt.DIMENSION, u'0.5em')],
723
            u'.5em ': [(1, 1, tt.DIMENSION, u'0.5em'), (1, 5, 'S', u' ')],
724

    
725
            u'-.5em': [(1, 1, tt.DIMENSION, u'-0.5em')],
726
            u' -.5em': [(1, 1, 'S', u' '), (1, 2, tt.DIMENSION, u'-0.5em')],
727
            u'-.5em ': [(1, 1, tt.DIMENSION, u'-0.5em'), (1, 6, 'S', u' ')],
728

    
729
            u'5em5_-': [(1, 1, tt.DIMENSION, u'5em5_-')],
730

    
731
            u'a a5 a5a 5 5a 5a5': [(1, 1, tt.IDENT, u'a'),
732
               (1, 2, 'S', u' '),
733
               (1, 3, tt.IDENT, u'a5'),
734
               (1, 5, 'S', u' '),
735
               (1, 6, tt.IDENT, u'a5a'),
736
               (1, 9, 'S', u' '),
737
               (1, 10, tt.NUMBER, u'5'),
738
               (1, 11, 'S', u' '),
739
               (1, 12, tt.DIMENSION, u'5a'),
740
               (1, 14, 'S', u' '),
741
               (1, 15, tt.DIMENSION, u'5a5')],
742

    
743
            # URI
744
            u'url()': [(1, 1, tt.URI, u'url()')],
745
            u'url();': [(1, 1, tt.URI, u'url()'), (1, 6, tt.SEMICOLON, ';')],
746
            u'url("x")': [(1, 1, tt.URI, u'url("x")')],
747
            u'url( "x")': [(1, 1, tt.URI, u'url("x")')],
748
            u'url("x" )': [(1, 1, tt.URI, u'url("x")')],
749
            u'url( "x" )': [(1, 1, tt.URI, u'url("x")')],
750
            u' url("x")': [
751
                (1, 1, 'S', u' '),
752
                (1, 2, tt.URI, u'url("x")')],
753
            u'url("x") ': [
754
                (1, 1, tt.URI, u'url("x")'),
755
                (1, 9, 'S', u' '),
756
                ],
757
            u'url(ab)': [(1, 1, tt.URI, u'url(ab)')],
758
            u'url($#/ab)': [(1, 1, tt.URI, u'url($#/ab)')],
759
            u'url(\1233/a/b)': [(1, 1, tt.URI, u'url(\1233/a/b)')],
760
            # not URI
761
            u'url("1""2")': [
762
                (1, 1, tt.FUNCTION, u'url('),
763
                (1, 5, tt.STRING, u'"1"'),
764
                (1, 8, tt.STRING, u'"2"'),
765
                (1, 11, tt.RPARANTHESIS, u')'),
766
                ],
767
            u'url(a"2")': [
768
                (1, 1, tt.FUNCTION, u'url('),
769
                (1, 5, tt.IDENT, u'a'),
770
                (1, 6, tt.STRING, u'"2"'),
771
                (1, 9, tt.RPARANTHESIS, u')'),
772
                ],
773
            u'url(a b)': [
774
                (1, 1, tt.FUNCTION, u'url('),
775
                (1, 5, tt.IDENT, u'a'),
776
                (1, 6, 'S', u' '),
777
                (1, 7, tt.IDENT, u'b'),
778
                (1, 8, tt.RPARANTHESIS, u')'),
779
                ],
780

    
781
            # FUNCTION
782
            u' counter("x")': [
783
               (1,1, 'S', u' '),
784
               (1, 2, tt.FUNCTION, u'counter('),
785
               (1, 10, tt.STRING, u'"x"'),
786
               (1, 13, tt.RPARANTHESIS, u')')],
787
            # HASH
788
            u'# #a #_a #-a #1': [
789
                (1, 1, 'DELIM', u'#'),
790
                (1, 2, 'S', u' '),
791
                (1, 3, tt.HASH, u'#a'),
792
                (1, 5, 'S', u' '),
793
                (1, 6, tt.HASH, u'#_a'),
794
                (1, 9, 'S', u' '),
795
                (1, 10, tt.HASH, u'#-a'),
796
                (1, 13, 'S', u' '),
797
                (1, 14, tt.HASH, u'#1')
798
                ],
799
            u'#1a1 ': [
800
                (1, 1, tt.HASH, u'#1a1'),
801
                (1, 5, 'S', u' '),
802
                ],
803
            u'#1a1\n': [
804
                (1, 1, tt.HASH, u'#1a1'),
805
                (1, 5, 'S', u'\n'),
806
                ],
807
            u'#1a1{': [
808
                (1, 1, tt.HASH, u'#1a1'),
809
                (1, 5, tt.LBRACE, u'{'),
810
                ],
811
            u'#1a1 {': [
812
                (1, 1, tt.HASH, u'#1a1'),
813
                (1, 5, 'S', u' '),
814
                (1, 6, tt.LBRACE, u'{'),
815
                ],
816
            u'#1a1\n{': [
817
                (1, 1, tt.HASH, u'#1a1'),
818
                (1, 5, 'S', u'\n'),
819
                (2, 1, tt.LBRACE, u'{'),
820
                ],
821
            u'#1a1\n {': [
822
                (1, 1, tt.HASH, u'#1a1'),
823
                (1, 5, 'S', u'\n '),
824
                (2, 2, tt.LBRACE, u'{'),
825
                ],
826
            u'#1a1 \n{': [
827
                (1, 1, tt.HASH, u'#1a1'),
828
                (1, 5, 'S', u' \n'),
829
                (2, 1, tt.LBRACE, u'{'),
830
                ],
831
            # STRINGS with NL
832
            u'"x\n': [(1,1, tt.INVALID, u'"x\n')],
833
            u'"x\r': [(1,1, tt.INVALID, u'"x\r')],
834
            u'"x\f': [(1,1, tt.INVALID, u'"x\f')],
835
            u'"x\n ': [
836
               (1,1, tt.INVALID, u'"x\n'),
837
               (2,1, 'S', u' ')
838
               ]
839

    
840
            }
841

    
842
        tests = {
843
            u'/*a': xml.dom.SyntaxErr,
844
            u'"a': xml.dom.SyntaxErr,
845
            u"'a": xml.dom.SyntaxErr,
846
            u"\\0 a": xml.dom.SyntaxErr,
847
            u"\\00": xml.dom.SyntaxErr,
848
            u"\\000": xml.dom.SyntaxErr,
849
            u"\\0000": xml.dom.SyntaxErr,
850
            u"\\00000": xml.dom.SyntaxErr,
851
            u"\\000000": xml.dom.SyntaxErr,
852
            u"\\0000001": xml.dom.SyntaxErr
853
            }
854
#        self.tokenizer.log.raiseExceptions = True #!!
855
#        for css, exception in tests.items():
856
#            self.assertRaises(exception, self.tokenizer.tokenize, css)
857

    
858

    
859
if __name__ == '__main__':
860
    import unittest
861
    unittest.main()