Statistics
| Revision:

gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / cssutils / util.py @ 475

History | View | Annotate | Download (30 KB)

1
"""base classes and helper functions for css and stylesheets packages
2
"""
3
__all__ = []
4
__docformat__ = 'restructuredtext'
5
__version__ = '$Id$'
6

    
7
from helper import normalize
8
from itertools import ifilter, chain
9
import cssutils
10
import codec
11
import codecs
12
import errorhandler
13
import tokenize2
14
import types
15
import xml.dom
16

    
17
try:
18
    from _fetchgae import _defaultFetcher
19
except ImportError, e:
20
    from _fetch import _defaultFetcher
21

    
22
log = errorhandler.ErrorHandler()
23

    
24
class _BaseClass(object):
25
    """
26
    Base class for Base, Base2 and _NewBase.
27

28
    **Base and Base2 will be removed in the future!**
29
    """
30
    _log = errorhandler.ErrorHandler()
31
    _prods = tokenize2.CSSProductions
32

    
33
    def _checkReadonly(self):
34
        "Raise xml.dom.NoModificationAllowedErr if rule/... is readonly"
35
        if hasattr(self, '_readonly') and self._readonly:
36
            raise xml.dom.NoModificationAllowedErr(
37
                u'%s is readonly.' % self.__class__)
38
            return True
39
        return False
40

    
41
    def _valuestr(self, t):
42
        """
43
        Return string value of t (t may be a string, a list of token tuples
44
        or a single tuple in format (type, value, line, col).
45
        Mainly used to get a string value of t for error messages.
46
        """
47
        if not t:
48
            return u''
49
        elif isinstance(t, basestring):
50
            return t
51
        else:
52
            return u''.join([x[1] for x in t])
53

    
54

    
55
class _NewBase(_BaseClass):
56
    """
57
    New base class for classes using ProdParser.
58

59
    **Currently CSSValue and related ones only.**
60
    """
61
    def __init__(self):
62
        self._seq = Seq()
63

    
64
    def _setSeq(self, newseq):
65
        """Set value of ``seq`` which is readonly."""
66
        newseq._readonly = True
67
        self._seq = newseq
68

    
69
    def _clearSeq(self):
70
        self._seq.clear()
71

    
72
    def _tempSeq(self, readonly=False):
73
        "Get a writeable Seq() which is used to set ``seq`` later"
74
        return Seq(readonly=readonly)
75

    
76
    seq = property(lambda self: self._seq,
77
                   doc="Internal readonly attribute, **DO NOT USE**!")
78

    
79

    
80
class _NewListBase(_NewBase):
81
    """
82
    (EXPERIMENTAL)
83
    A base class used for list classes like stylesheets.MediaList
84

85
    adds list like behaviour running on inhering class' property ``seq``
86

87
    - item in x => bool
88
    - len(x) => integer
89
    - get, set and del x[i]
90
    - for item in x
91
    - append(item)
92

93
    some methods must be overwritten in inheriting class
94
    """
95
    def __init__(self):
96
        self._seq = Seq()
97

    
98
    def __contains__(self, item):
99
        for it in self._seq:
100
            if item == it.value:
101
                return True
102
        return False
103

    
104
    def __delitem__(self, index):
105
        del self._seq[index]
106

    
107
    def __getitem__(self, index):
108
        return self._seq[index].value
109

    
110
    def __iter__(self):
111
        def gen():
112
            for x in self._seq:
113
                yield x.value
114
        return gen()
115

    
116
    def __len__(self):
117
        return len(self._seq)
118

    
119
    def __setitem__(self, index, item):
120
        "must be overwritten"
121
        raise NotImplementedError
122

    
123
    def append(self, item):
124
        "must be overwritten"
125
        raise NotImplementedError
126

    
127

    
128
class Base(_BaseClass):
129
    """
130
    **Superceded by _NewBase**
131

132
    **Superceded by Base2 which is used for new seq handling class.**
133

134
    Base class for most CSS and StyleSheets classes
135

136
    Contains helper methods for inheriting classes helping parsing
137

138
    ``_normalize`` is static as used by Preferences.
139
    """
140
    __tokenizer2 = tokenize2.Tokenizer()
141

    
142
    # for more on shorthand properties see
143
    # http://www.dustindiaz.com/css-shorthand/
144
    # format: shorthand: [(propname, mandatorycheck?)*]
145
    _SHORTHANDPROPERTIES = {
146
            u'background': [],
147
            #u'background-position': [], # list of 2 values!
148
            u'border': [],
149
            u'border-left': [],
150
            u'border-right': [],
151
            u'border-top': [],
152
            u'border-bottom': [],
153
            #u'border-color': [], # list or single but same values
154
            #u'border-style': [], # list or single but same values
155
            #u'border-width': [], # list or single but same values
156
            u'cue': [],
157
            u'font': [],
158
            u'list-style': [],
159
            #u'margin': [], # list or single but same values
160
            u'outline': [],
161
            #u'padding': [], # list or single but same values
162
            u'pause': []
163
            }
164

    
165
    @staticmethod
166
    def _normalize(x):
167
        """
168
        normalizes x, namely:
169

170
        - remove any \ before non unicode sequences (0-9a-zA-Z) so for
171
          x=="c\olor\" return "color" (unicode escape sequences should have
172
          been resolved by the tokenizer already)
173
        - lowercase
174
        """
175
        return normalize(x)
176

    
177
    def _splitNamespacesOff(self, text_namespaces_tuple):
178
        """
179
        returns tuple (text, dict-of-namespaces) or if no namespaces are
180
        in cssText returns (cssText, {})
181

182
        used in Selector, SelectorList, CSSStyleRule, CSSMediaRule and
183
        CSSStyleSheet
184
        """
185
        if isinstance(text_namespaces_tuple, tuple):
186
            return text_namespaces_tuple[0], _SimpleNamespaces(self._log,
187
                                                    text_namespaces_tuple[1])
188
        else:
189
            return text_namespaces_tuple, _SimpleNamespaces(log=self._log)
190

    
191
    def _tokenize2(self, textortokens):
192
        """
193
        returns tokens of textortokens which may already be tokens in which
194
        case simply returns input
195
        """
196
        if not textortokens:
197
            return None
198
        elif isinstance(textortokens, basestring):
199
            # needs to be tokenized
200
            return self.__tokenizer2.tokenize(
201
                 textortokens)
202
        elif isinstance(textortokens, tuple):
203
            # a single token (like a comment)
204
            return [textortokens]
205
        else:
206
            # already tokenized but return an iterator
207
            return iter(textortokens)
208

    
209
    def _nexttoken(self, tokenizer, default=None):
210
        "returns next token in generator tokenizer or the default value"
211
        try:
212
            return tokenizer.next()
213
        # TypeError for py3
214
        except (StopIteration, AttributeError, TypeError):
215
            return default
216

    
217
    def _type(self, token):
218
        "returns type of Tokenizer token"
219
        if token:
220
            return token[0]
221
        else:
222
            return None
223

    
224
    def _tokenvalue(self, token, normalize=False):
225
        "returns value of Tokenizer token"
226
        if token and normalize:
227
            return Base._normalize(token[1])
228
        elif token:
229
            return token[1]
230
        else:
231
            return None
232

    
233
    def _stringtokenvalue(self, token):
234
        """
235
        for STRING returns the actual content without surrounding "" or ''
236
        and without respective escapes, e.g.::
237

238
             "with \" char" => with " char
239
        """
240
        if token:
241
            value = token[1]
242
            return value.replace('\\' + value[0], value[0])[1: - 1]
243
        else:
244
            return None
245

    
246
    def _uritokenvalue(self, token):
247
        """
248
        for URI returns the actual content without surrounding url()
249
        or url(""), url('') and without respective escapes, e.g.::
250

251
             url("\"") => "
252
        """
253
        if token:
254
            value = token[1][4: - 1].strip()
255
            if value and (value[0] in '\'"') and (value[0] == value[ - 1]):
256
                # a string "..." or '...'
257
                value = value.replace('\\' + value[0], value[0])[1: - 1]
258
            return value
259
        else:
260
            return None
261

    
262
    def _tokensupto2(self,
263
                     tokenizer,
264
                     starttoken=None,
265
                     blockstartonly=False, # {
266
                     blockendonly=False, # }
267
                     mediaendonly=False,
268
                     importmediaqueryendonly=False, # ; or STRING
269
                     mediaqueryendonly=False, # { or STRING
270
                     semicolon=False, # ;
271
                     propertynameendonly=False, # :
272
                     propertyvalueendonly=False, # ! ; }
273
                     propertypriorityendonly=False, # ; }
274
                     selectorattendonly=False, # ]
275
                     funcendonly=False, # )
276
                     listseponly=False, # ,
277
                     separateEnd=False # returns (resulttokens, endtoken)
278
                     ):
279
        """
280
        returns tokens upto end of atrule and end index
281
        end is defined by parameters, might be ; } ) or other
282

283
        default looks for ending "}" and ";"
284
        """
285
        ends = u';}'
286
        endtypes = ()
287
        brace = bracket = parant = 0 # {}, [], ()
288

    
289
        if blockstartonly: # {
290
            ends = u'{'
291
            brace = - 1 # set to 0 with first {
292
        elif blockendonly: # }
293
            ends = u'}'
294
            brace = 1
295
        elif mediaendonly: # }
296
            ends = u'}'
297
            brace = 1 # rules } and mediarules }
298
        elif importmediaqueryendonly:
299
            # end of mediaquery which may be ; or STRING
300
            ends = u';'
301
            endtypes = ('STRING',)
302
        elif mediaqueryendonly:
303
            # end of mediaquery which may be { or STRING
304
            # special case, see below
305
            ends = u'{'
306
            brace = - 1 # set to 0 with first {
307
            endtypes = ('STRING',)
308
        elif semicolon:
309
            ends = u';'
310
        elif propertynameendonly: # : and ; in case of an error
311
            ends = u':;'
312
        elif propertyvalueendonly: # ; or !important
313
            ends = u';!'
314
        elif propertypriorityendonly: # ;
315
            ends = u';'
316
        elif selectorattendonly: # ]
317
            ends = u']'
318
            if starttoken and self._tokenvalue(starttoken) == u'[':
319
                bracket = 1
320
        elif funcendonly: # )
321
            ends = u')'
322
            parant = 1
323
        elif listseponly: # ,
324
            ends = u','
325

    
326
        resulttokens = []
327
        if starttoken:
328
            resulttokens.append(starttoken)
329
            val = starttoken[1]
330
            if u'[' == val:
331
                bracket += 1
332
            elif u'{' == val:
333
                brace += 1
334
            elif u'(' == val:
335
                parant += 1
336

    
337
        if tokenizer:
338
            for token in tokenizer:
339
                typ, val, line, col = token
340
                if 'EOF' == typ:
341
                    resulttokens.append(token)
342
                    break
343

    
344
                if u'{' == val:
345
                    brace += 1
346
                elif u'}' == val:
347
                    brace -= 1
348
                elif u'[' == val:
349
                    bracket += 1
350
                elif u']' == val:
351
                    bracket -= 1
352
                # function( or single (
353
                elif u'(' == val or \
354
                     Base._prods.FUNCTION == typ:
355
                    parant += 1
356
                elif u')' == val:
357
                    parant -= 1
358

    
359
                resulttokens.append(token)
360

    
361
                if (brace == bracket == parant == 0) and (
362
                    val in ends or typ in endtypes):
363
                    break
364
                elif mediaqueryendonly and brace == - 1 and (
365
                     bracket == parant == 0) and typ in endtypes:
366
                     # mediaqueryendonly with STRING
367
                    break
368
        if separateEnd:
369
            # TODO: use this method as generator, then this makes sense
370
            if resulttokens:
371
                return resulttokens[: - 1], resulttokens[ - 1]
372
            else:
373
                return resulttokens, None
374
        else:
375
            return resulttokens
376

    
377
    def _adddefaultproductions(self, productions, new=None):
378
        """
379
        adds default productions if not already present, used by
380
        _parse only
381

382
        each production should return the next expected token
383
        normaly a name like "uri" or "EOF"
384
        some have no expectation like S or COMMENT, so simply return
385
        the current value of self.__expected
386
        """
387
        def ATKEYWORD(expected, seq, token, tokenizer=None):
388
            "default impl for unexpected @rule"
389
            if expected != 'EOF':
390
                # TODO: parentStyleSheet=self
391
                rule = cssutils.css.CSSUnknownRule()
392
                rule.cssText = self._tokensupto2(tokenizer, token)
393
                if rule.wellformed:
394
                    seq.append(rule)
395
                return expected
396
            else:
397
                new['wellformed'] = False
398
                self._log.error(u'Expected EOF.', token=token)
399
                return expected
400

    
401
        def COMMENT(expected, seq, token, tokenizer=None):
402
            "default implementation for COMMENT token adds CSSCommentRule"
403
            seq.append(cssutils.css.CSSComment([token]))
404
            return expected
405

    
406
        def S(expected, seq, token, tokenizer=None):
407
            "default implementation for S token, does nothing"
408
            return expected
409

    
410
        def EOF(expected=None, seq=None, token=None, tokenizer=None):
411
            "default implementation for EOF token"
412
            return 'EOF'
413

    
414
        p = {'ATKEYWORD': ATKEYWORD,
415
             'COMMENT': COMMENT,
416
             'S': S,
417
             'EOF': EOF # only available if fullsheet
418
             }
419
        p.update(productions)
420
        return p
421

    
422
    def _parse(self, expected, seq, tokenizer, productions, default=None,
423
               new=None, initialtoken=None):
424
        """
425
        puts parsed tokens in seq by calling a production with
426
            (seq, tokenizer, token)
427

428
        expected
429
            a name what token or value is expected next, e.g. 'uri'
430
        seq
431
            to add rules etc to
432
        tokenizer
433
            call tokenizer.next() to get next token
434
        productions
435
            callbacks {tokentype: callback}
436
        default
437
            default callback if tokentype not in productions
438
        new
439
            used to init default productions
440
        initialtoken
441
            will be used together with tokenizer running 1st this token
442
            and then all tokens in tokenizer
443

444
        returns (wellformed, expected) which the last prod might have set
445
        """
446
        wellformed = True
447

    
448
        if initialtoken:
449
            # add initialtoken to tokenizer
450
            def tokens():
451
                "Build new tokenizer including initialtoken"
452
                yield initialtoken
453
                for item in tokenizer:
454
                    yield item
455
            fulltokenizer = chain([initialtoken], tokenizer)
456
        else:
457
            fulltokenizer = tokenizer
458

    
459
        if fulltokenizer:
460
            prods = self._adddefaultproductions(productions, new)
461
            for token in fulltokenizer:
462
                p = prods.get(token[0], default)
463
                if p:
464
                    expected = p(expected, seq, token, tokenizer)
465
                else:
466
                    wellformed = False
467
                    self._log.error(u'Unexpected token (%s, %s, %s, %s)' % token)
468
        return wellformed, expected
469

    
470

    
471
class Base2(Base, _NewBase):
472
    """
473
    **Superceded by _NewBase.**
474

475
    Base class for new seq handling.
476
    """
477
    def __init__(self):
478
        self._seq = Seq()
479

    
480
    def _adddefaultproductions(self, productions, new=None):
481
        """
482
        adds default productions if not already present, used by
483
        _parse only
484

485
        each production should return the next expected token
486
        normaly a name like "uri" or "EOF"
487
        some have no expectation like S or COMMENT, so simply return
488
        the current value of self.__expected
489
        """
490
        def ATKEYWORD(expected, seq, token, tokenizer=None):
491
            "default impl for unexpected @rule"
492
            if expected != 'EOF':
493
                # TODO: parentStyleSheet=self
494
                rule = cssutils.css.CSSUnknownRule()
495
                rule.cssText = self._tokensupto2(tokenizer, token)
496
                if rule.wellformed:
497
                    seq.append(rule, cssutils.css.CSSRule.UNKNOWN_RULE,
498
                               line=token[2], col=token[3])
499
                return expected
500
            else:
501
                new['wellformed'] = False
502
                self._log.error(u'Expected EOF.', token=token)
503
                return expected
504

    
505
        def COMMENT(expected, seq, token, tokenizer=None):
506
            "default impl, adds CSSCommentRule if not token == EOF"
507
            if expected == 'EOF':
508
                new['wellformed'] = False
509
                self._log.error(u'Expected EOF but found comment.', token=token)
510
            seq.append(cssutils.css.CSSComment([token]), 'COMMENT')
511
            return expected
512

    
513
        def S(expected, seq, token, tokenizer=None):
514
            "default impl, does nothing if not token == EOF"
515
            if expected == 'EOF':
516
                new['wellformed'] = False
517
                self._log.error(u'Expected EOF but found whitespace.', token=token)
518
            return expected
519

    
520
        def EOF(expected=None, seq=None, token=None, tokenizer=None):
521
            "default implementation for EOF token"
522
            return 'EOF'
523

    
524
        defaultproductions = {'ATKEYWORD': ATKEYWORD,
525
             'COMMENT': COMMENT,
526
             'S': S,
527
             'EOF': EOF # only available if fullsheet
528
             }
529
        defaultproductions.update(productions)
530
        return defaultproductions
531

    
532

    
533
class Seq(object):
534
    """
535
    property seq of Base2 inheriting classes, holds a list of Item objects.
536

537
    used only by Selector for now
538

539
    is normally readonly, only writable during parsing
540
    """
541
    def __init__(self, readonly=True):
542
        """
543
        only way to write to a Seq is to initialize it with new items
544
        each itemtuple has (value, type, line) where line is optional
545
        """
546
        self._seq = []
547
        self._readonly = readonly
548

    
549
    def __repr__(self):
550
        "returns a repr same as a list of tuples of (value, type)"
551
        return u'cssutils.%s.%s([\n    %s], readonly=%r)' % (self.__module__,
552
                                          self.__class__.__name__,
553
            u',\n    '.join([u'%r' % item for item in self._seq]
554
            ), self._readonly)
555

    
556
    def __str__(self):
557
        vals = []
558
        for v in self:
559
            if isinstance(v.value, basestring):
560
                vals.append(v.value)
561
            elif isinstance(v, tuple):
562
                vals.append(v.value[1])
563
            else:
564
                vals.append(repr(v))
565

    
566
        return "<cssutils.%s.%s object length=%r items=%r readonly=%r at 0x%x>" % (
567
                self.__module__, self.__class__.__name__, len(self),
568
                vals, self._readonly, id(self))
569

    
570
    def __delitem__(self, i):
571
        del self._seq[i]
572

    
573
    def __getitem__(self, i):
574
        return self._seq[i]
575

    
576
    def __setitem__(self, i, (val, typ, line, col)):
577
        self._seq[i] = Item(val, typ, line, col)
578

    
579
    def __iter__(self):
580
        return iter(self._seq)
581

    
582
    def __len__(self):
583
        return len(self._seq)
584

    
585
    def append(self, val, typ=None, line=None, col=None):
586
        "If not readonly add new Item()"
587
        if self._readonly:
588
            raise AttributeError('Seq is readonly.')
589
        else:
590
            if isinstance(val, Item):
591
                self._seq.append(val)
592
            else:
593
                self._seq.append(Item(val, typ, line, col))
594

    
595
    def appendItem(self, item):
596
        "if not readonly add item which must be an Item"
597
        if self._readonly:
598
            raise AttributeError('Seq is readonly.')
599
        else:
600
            self._seq.append(item)
601

    
602
    def clear(self):
603
        del self._seq[:]
604

    
605
    def insert(self, index, val, typ, line=None, col=None):
606
        "Insert new Item() at index # even if readony!? TODO!"
607
        self._seq.insert(index, Item(val, typ, line, col))
608
    
609
    def replace(self, index=-1, val=None, typ=None, line=None, col=None):
610
        """
611
        if not readonly replace Item at index with new Item or
612
        simply replace value or type
613
        """
614
        if self._readonly:
615
            raise AttributeError('Seq is readonly.')
616
        else:
617
            self._seq[index] = Item(val, typ, line, col)
618

    
619
    def rstrip(self):
620
        "trims S items from end of Seq"
621
        while self._seq and self._seq[ - 1].type == tokenize2.CSSProductions.S:
622
            # TODO: removed S before CSSComment /**/ /**/
623
            del self._seq[ - 1]
624

    
625
    def appendToVal(self, val=None, index= - 1):
626
        """
627
        if not readonly append to Item's value at index
628
        """
629
        if self._readonly:
630
            raise AttributeError('Seq is readonly.')
631
        else:
632
            old = self._seq[index]
633
            self._seq[index] = Item(old.value + val, old.type,
634
                                    old.line, old.col)
635

    
636

    
637
class Item(object):
638
    """
639
    an item in the seq list of classes (successor to tuple items in old seq)
640

641
    each item has attributes:
642

643
    type
644
        a sematic type like "element", "attribute"
645
    value
646
        the actual value which may be a string, number etc or an instance
647
        of e.g. a CSSComment
648
    *line*
649
        **NOT IMPLEMENTED YET, may contain the line in the source later**
650
    """
651
    def __init__(self, value, type, line=None, col=None):
652
        self.__value = value
653
        self.__type = type
654
        self.__line = line
655
        self.__col = col
656

    
657
    type = property(lambda self: self.__type)
658
    value = property(lambda self: self.__value)
659
    line = property(lambda self: self.__line)
660
    col = property(lambda self: self.__col)
661

    
662
    def __repr__(self):
663
        return "%s.%s(value=%r, type=%r, line=%r, col=%r)" % (
664
                self.__module__, self.__class__.__name__,
665
                self.__value, self.__type, self.__line, self.__col)
666

    
667

    
668
class ListSeq(object):
669
    """
670
    (EXPERIMENTAL)
671
    A base class used for list classes like cssutils.css.SelectorList or
672
    stylesheets.MediaList
673

674
    adds list like behaviour running on inhering class' property ``seq``
675

676
    - item in x => bool
677
    - len(x) => integer
678
    - get, set and del x[i]
679
    - for item in x
680
    - append(item)
681

682
    some methods must be overwritten in inheriting class
683
    """
684
    def __init__(self):
685
        self.seq = [] # does not need to use ``Seq`` as simple list only
686

    
687
    def __contains__(self, item):
688
        return item in self.seq
689

    
690
    def __delitem__(self, index):
691
        del self.seq[index]
692

    
693
    def __getitem__(self, index):
694
        return self.seq[index]
695

    
696
    def __iter__(self):
697
        def gen():
698
            for x in self.seq:
699
                yield x
700
        return gen()
701

    
702
    def __len__(self):
703
        return len(self.seq)
704

    
705
    def __setitem__(self, index, item):
706
        "must be overwritten"
707
        raise NotImplementedError
708

    
709
    def append(self, item):
710
        "must be overwritten"
711
        raise NotImplementedError
712

    
713

    
714
class _Namespaces(object):
715
    """
716
    A dictionary like wrapper for @namespace rules used in a CSSStyleSheet.
717
    Works on effective namespaces, so e.g. if::
718

719
        @namespace p1 "uri";
720
        @namespace p2 "uri";
721

722
    only the second rule is effective and kept.
723

724
    namespaces
725
        a dictionary {prefix: namespaceURI} containing the effective namespaces
726
        only. These are the latest set in the CSSStyleSheet.
727
    parentStyleSheet
728
        the parent CSSStyleSheet
729
    """
730
    def __init__(self, parentStyleSheet, log=None, *args):
731
        "no initial values are set, only the relevant sheet is"
732
        self.parentStyleSheet = parentStyleSheet
733
        self._log = log
734

    
735
    def __repr__(self):
736
        return "%r" % self.namespaces
737

    
738
    def __contains__(self, prefix):
739
        return prefix in self.namespaces
740

    
741
    def __delitem__(self, prefix):
742
        """deletes CSSNamespaceRule(s) with rule.prefix == prefix
743

744
        prefix '' and None are handled the same
745
        """
746
        if not prefix:
747
            prefix = u''
748
        delrule = self.__findrule(prefix)
749
        for i, rule in enumerate(ifilter(lambda r: r.type == r.NAMESPACE_RULE,
750
                            self.parentStyleSheet.cssRules)):
751
            if rule == delrule:
752
                self.parentStyleSheet.deleteRule(i)
753
                return
754

    
755
        self._log.error('Prefix %s not found.' % prefix,
756
                        error=xml.dom.NamespaceErr)
757

    
758
    def __getitem__(self, prefix):
759
        try:
760
            return self.namespaces[prefix]
761
        except KeyError, e:
762
            self._log.error('Prefix %s not found.' % prefix,
763
                            error=xml.dom.NamespaceErr)
764

    
765
    def __iter__(self):
766
        return self.namespaces.__iter__()
767

    
768
    def __len__(self):
769
        return len(self.namespaces)
770

    
771
    def __setitem__(self, prefix, namespaceURI):
772
        "replaces prefix or sets new rule, may raise NoModificationAllowedErr"
773
        if not prefix:
774
            prefix = u'' # None or ''
775
        rule = self.__findrule(prefix)
776
        if not rule:
777
            self.parentStyleSheet.insertRule(cssutils.css.CSSNamespaceRule(
778
                                                    prefix=prefix,
779
                                                    namespaceURI=namespaceURI),
780
                                  inOrder=True)
781
        else:
782
            if prefix in self.namespaces:
783
                rule.namespaceURI = namespaceURI # raises NoModificationAllowedErr
784
            if namespaceURI in self.namespaces.values():
785
                rule.prefix = prefix
786

    
787
    def __findrule(self, prefix):
788
        # returns namespace rule where prefix == key
789
        for rule in ifilter(lambda r: r.type == r.NAMESPACE_RULE,
790
                            reversed(self.parentStyleSheet.cssRules)):
791
            if rule.prefix == prefix:
792
                return rule
793

    
794
    @property
795
    def namespaces(self):
796
        """
797
        A property holding only effective @namespace rules in
798
        self.parentStyleSheets.
799
        """
800
        namespaces = {}
801
        for rule in ifilter(lambda r: r.type == r.NAMESPACE_RULE,
802
                            reversed(self.parentStyleSheet.cssRules)):
803
            if rule.namespaceURI not in namespaces.values():
804
                namespaces[rule.prefix] = rule.namespaceURI
805
        return namespaces
806

    
807
    def get(self, prefix, default):
808
        return self.namespaces.get(prefix, default)
809

    
810
    def items(self):
811
        return self.namespaces.items()
812

    
813
    def keys(self):
814
        return self.namespaces.keys()
815

    
816
    def values(self):
817
        return self.namespaces.values()
818

    
819
    def prefixForNamespaceURI(self, namespaceURI):
820
        """
821
        returns effective prefix for given namespaceURI or raises IndexError
822
        if this cannot be found"""
823
        for prefix, uri in self.namespaces.items():
824
            if uri == namespaceURI:
825
                return prefix
826
        raise IndexError(u'NamespaceURI %s not found.' % namespaceURI)
827

    
828
    def __str__(self):
829
        return u"<cssutils.util.%s object parentStyleSheet=%r at 0x%x>" % (
830
                self.__class__.__name__, str(self.parentStyleSheet), id(self))
831

    
832

    
833
class _SimpleNamespaces(_Namespaces):
834
    """
835
    namespaces used in objects like Selector as long as they are not connected
836
    to a CSSStyleSheet
837
    """
838
    def __init__(self, log=None, *args):
839
        """init"""
840
        super(_SimpleNamespaces, self).__init__(parentStyleSheet=None, log=log)
841
        self.__namespaces = dict(*args)
842

    
843
    def __setitem__(self, prefix, namespaceURI):
844
        self.__namespaces[prefix] = namespaceURI
845

    
846
    namespaces = property(lambda self: self.__namespaces,
847
                          doc=u'Dict Wrapper for self.sheets @namespace rules.')
848

    
849
    def __str__(self):
850
        return u"<cssutils.util.%s object namespaces=%r at 0x%x>" % (
851
                self.__class__.__name__, self.namespaces, id(self))
852

    
853
    def __repr__(self):
854
        return u"cssutils.util.%s(%r)" % (self.__class__.__name__,
855
            self.namespaces)
856

    
857

    
858
def _readUrl(url, fetcher=None, overrideEncoding=None, parentEncoding=None):
859
    """
860
    Read cssText from url and decode it using all relevant methods (HTTP
861
    header, BOM, @charset). Returns
862

863
    - encoding used to decode text (which is needed to set encoding of
864
      stylesheet properly)
865
    - type of encoding (how it was retrieved, see list below)
866
    - decodedCssText
867

868
    ``fetcher``
869
        see cssutils.CSSParser.setFetcher for details
870
    ``overrideEncoding``
871
        If given this encoding is used and all other encoding information is
872
        ignored (HTTP, BOM etc)
873
    ``parentEncoding``
874
        Encoding of parent stylesheet (while e.g. reading @import references
875
        sheets) or document if available.
876

877
    Priority or encoding information
878
    --------------------------------
879
    **cssutils only**: 0. overrideEncoding
880

881
    1. An HTTP "charset" parameter in a "Content-Type" field (or similar
882
       parameters in other protocols)
883
    2. BOM and/or @charset (see below)
884
    3. <link charset=""> or other metadata from the linking mechanism (if any)
885
    4. charset of referring style sheet or document (if any)
886
    5. Assume UTF-8
887

888
    """
889
    enctype = None
890

    
891
    if not fetcher:
892
        fetcher = _defaultFetcher
893

    
894
    r = fetcher(url)
895
    if r and len(r) == 2 and r[1] is not None:
896
        httpEncoding, content = r
897

    
898
        if overrideEncoding:
899
            enctype = 0 # 0. override encoding
900
            encoding = overrideEncoding
901
        elif httpEncoding:
902
            enctype = 1 # 1. HTTP
903
            encoding = httpEncoding
904
        else:
905
            # BOM or @charset
906
            if isinstance(content, unicode):
907
                contentEncoding, explicit = codec.detectencoding_unicode(content)
908
            else:
909
                contentEncoding, explicit = codec.detectencoding_str(content)
910
            
911
            if explicit:
912
                enctype = 2 # 2. BOM/@charset: explicitly
913
                encoding = contentEncoding
914
                
915
            elif parentEncoding:
916
                enctype = 4 # 4. parent stylesheet or document
917
                # may also be None in which case 5. is used in next step anyway
918
                encoding = parentEncoding
919
                
920
            else:
921
                enctype = 5 # 5. assume UTF-8
922
                encoding = 'utf-8'
923

    
924
        if isinstance(content, unicode):
925
            decodedCssText = content
926
        else:
927
            try:
928
                # encoding may still be wrong if encoding *is lying*!
929
                try:
930
                    decodedCssText = codecs.lookup("css")[1](content, encoding=encoding)[0]
931
                except AttributeError, ae:
932
                    # at least in GAE
933
                    decodedCssText = content.decode(encoding if encoding else 'utf-8')
934
                    
935
            except UnicodeDecodeError, e:
936
                log.warn(e, neverraise=True)
937
                decodedCssText = None
938

    
939
        return encoding, enctype, decodedCssText
940
    else:
941
        return None, None, None