Statistics
| Revision:

gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / cssutils / script.py @ 475

History | View | Annotate | Download (12.7 KB)

1
"""classes and functions used by cssutils scripts
2
"""
3
__all__ = ['CSSCapture', 'csscombine']
4
__docformat__ = 'restructuredtext'
5
__version__ = '$Id: parse.py 1323 2008-07-06 18:13:57Z cthedot $'
6

    
7
import HTMLParser
8
import codecs
9
import cssutils
10
import errno
11
import logging
12
import os
13
import sys
14
import urllib2
15
import urlparse
16

    
17
try:
18
    import cssutils.encutils as encutils
19
except ImportError:
20
    try:
21
        import encutils
22
    except ImportError:
23
        sys.exit("You need encutils from http://cthedot.de/encutils/")
24

    
25
# types of sheets in HTML
26
LINK = 0 # <link rel="stylesheet" type="text/css" href="..." [@title="..." @media="..."]/>
27
STYLE = 1 # <style type="text/css" [@title="..."]>...</style>
28

    
29
class CSSCaptureHTMLParser(HTMLParser.HTMLParser):
30
    """CSSCapture helper: Parse given data for link and style elements"""
31
    curtag = u''
32
    sheets = [] # (type, [atts, cssText])
33

    
34
    def _loweratts(self, atts):
35
        return dict([(a.lower(), v.lower()) for a, v in atts])
36

    
37
    def handle_starttag(self, tag, atts):
38
        if tag == u'link':
39
            atts = self._loweratts(atts)
40
            if u'text/css' == atts.get(u'type', u''):
41
                self.sheets.append((LINK, atts))
42
        elif tag == u'style':
43
            # also get content of style
44
            atts = self._loweratts(atts)
45
            if u'text/css' == atts.get(u'type', u''):
46
                self.sheets.append((STYLE, [atts, u'']))
47
                self.curtag = tag
48
        else:
49
            # close as only intersting <style> cannot contain any elements
50
            self.curtag = u''
51

    
52
    def handle_data(self, data):
53
        if self.curtag == u'style':
54
            self.sheets[-1][1][1] = data # replace cssText
55

    
56
    def handle_comment(self, data):
57
        # style might have comment content, treat same as data
58
        self.handle_data(data)
59

    
60
    def handle_endtag(self, tag):
61
        # close as style cannot contain any elements
62
        self.curtag = u''
63

    
64

    
65
class CSSCapture(object):
66
    """
67
    Retrieve all CSS stylesheets including embedded for a given URL.
68
    Optional setting of User-Agent used for retrieval possible
69
    to handle browser sniffing servers.
70

71
    raises urllib2.HTTPError
72
    """
73
    def __init__(self, ua=None, log=None, defaultloglevel=logging.INFO):
74
        """
75
        initialize a new Capture object
76

77
        ua
78
            init User-Agent to use for requests
79
        log
80
            supply a log object which is used instead of the default
81
            log which writes to sys.stderr
82
        defaultloglevel
83
            constant of logging package which defines the level of the
84
            default log if no explicit log given
85
        """
86
        self._ua = ua
87

    
88
        if log:
89
            self._log = log
90
        else:
91
            self._log = logging.getLogger('CSSCapture')
92
            hdlr = logging.StreamHandler(sys.stderr)
93
            formatter = logging.Formatter('%(message)s')
94
            hdlr.setFormatter(formatter)
95
            self._log.addHandler(hdlr)
96
            self._log.setLevel(defaultloglevel)
97
            self._log.debug(u'Using default log')
98

    
99
        self._htmlparser = CSSCaptureHTMLParser()
100
        self._cssparser = cssutils.CSSParser(log = self._log)
101

    
102
    def _doRequest(self, url):
103
        """Do an HTTP request
104

105
        Return (url, rawcontent)
106
            url might have been changed by server due to redirects etc
107
        """
108
        self._log.debug(u'    CSSCapture._doRequest\n        * URL: %s' % url)
109

    
110
        req = urllib2.Request(url)
111
        if self._ua:
112
            req.add_header('User-agent', self._ua)
113
            self._log.info('        * Using User-Agent: %s', self._ua)
114

    
115
        try:
116
            res = urllib2.urlopen(req)
117
        except urllib2.HTTPError, e:
118
            self._log.critical('    %s\n%s %s\n%s' % (
119
                e.geturl(), e.code, e.msg, e.headers))
120
            return None, None
121

    
122
        # get real url
123
        if url != res.geturl():
124
            url = res.geturl()
125
            self._log.info('        URL retrieved: %s', url)
126

    
127
        return url, res
128

    
129
    def _createStyleSheet(self, href=None,
130
                          media=None,
131
                          parentStyleSheet=None,
132
                          title=u'',
133
                          cssText=None,
134
                          encoding=None):
135
        """
136
        Return CSSStyleSheet read from href or if cssText is given use that.
137

138
        encoding
139
            used if inline style found, same as self.docencoding
140
        """
141
        if cssText is None:
142
            encoding, enctype, cssText = cssutils.util._readUrl(href, parentEncoding=self.docencoding)
143
            encoding = None # already decoded???
144

    
145
        sheet = self._cssparser.parseString(cssText, href=href, media=media, title=title,
146
                                            encoding=encoding)
147

    
148
        if not sheet:
149
            return None
150

    
151
        else:
152
            self._log.info(u'    %s\n' % sheet)
153
            self._nonparsed[sheet] = cssText
154
            return sheet
155

    
156
    def _findStyleSheets(self, docurl, doctext):
157
        """
158
        parse text for stylesheets
159
        fills stylesheetlist with all found StyleSheets
160

161
        docurl
162
            to build a full url of found StyleSheets @href
163
        doctext
164
            to parse
165
        """
166
        # TODO: ownerNode should be set to the <link> node
167
        self._htmlparser.feed(doctext)
168

    
169
        for typ, data in self._htmlparser.sheets:
170
            sheet = None
171

    
172
            if LINK == typ:
173
                self._log.info(u'+ PROCESSING <link> %r' % data)
174

    
175
                atts = data
176
                href = urlparse.urljoin(docurl, atts.get(u'href', None))
177
                sheet = self._createStyleSheet(href=href,
178
                                               media=atts.get(u'media', None),
179
                                               title=atts.get(u'title', None))
180
            elif STYLE == typ:
181
                self._log.info(u'+ PROCESSING <style> %r' % data)
182

    
183
                atts, cssText = data
184
                sheet = self._createStyleSheet(cssText=cssText,
185
                                               href = docurl,
186
                                               media=atts.get(u'media', None),
187
                                               title=atts.get(u'title', None),
188
                                               encoding=self.docencoding)
189
                if sheet:
190
                    sheet._href = None # inline have no href!
191
                print sheet.cssText
192

    
193
            if sheet:
194
                self.stylesheetlist.append(sheet)
195
                self._doImports(sheet, base=docurl)
196

    
197

    
198
    def _doImports(self, parentStyleSheet, base=None):
199
        """
200
        handle all @import CSS stylesheet recursively
201
        found CSS stylesheets are appended to stylesheetlist
202
        """
203
        # TODO: only if not parsed these have to be read extra!
204

    
205
        for rule in parentStyleSheet.cssRules:
206
            if rule.type == rule.IMPORT_RULE:
207
                self._log.info(u'+ PROCESSING @import:')
208
                self._log.debug(u'    IN: %s\n' % parentStyleSheet.href)
209
                sheet = rule.styleSheet
210
                href = urlparse.urljoin(base, rule.href)
211
                if sheet:
212
                    self._log.info(u'    %s\n' % sheet)
213
                    self.stylesheetlist.append(sheet)
214
                    self._doImports(sheet, base=href)
215

    
216
    def capture(self, url):
217
        """
218
        Capture all stylesheets at given URL's HTML document.
219
        Any HTTPError is raised to caller.
220

221
        url
222
            to capture CSS from
223

224
        Returns ``cssutils.stylesheets.StyleSheetList``.
225
        """
226
        self._log.info(u'\nCapturing CSS from URL:\n    %s\n', url)
227
        self._nonparsed = {}
228
        self.stylesheetlist = cssutils.stylesheets.StyleSheetList()
229

    
230
        # used to save inline styles
231
        scheme, loc, path, query, fragment = urlparse.urlsplit(url)
232
        self._filename = os.path.basename(path)
233

    
234
        # get url content
235
        url, res = self._doRequest(url)
236
        if not res:
237
            sys.exit(1)
238

    
239
        rawdoc = res.read()
240

    
241
        self.docencoding = encutils.getEncodingInfo(
242
            res, rawdoc, log=self._log).encoding
243
        self._log.info(u'\nUsing Encoding: %s\n', self.docencoding)
244

    
245
        doctext = rawdoc.decode(self.docencoding)
246

    
247
        # fill list of stylesheets and list of raw css
248
        self._findStyleSheets(url, doctext)
249

    
250
        return self.stylesheetlist
251

    
252
    def saveto(self, dir, saveraw=False, minified=False):
253
        """
254
        saves css in "dir" in the same layout as on the server
255
        internal stylesheets are saved as "dir/__INLINE_STYLE__.html.css"
256

257
        dir
258
            directory to save files to
259
        saveparsed
260
            save literal CSS from server or save the parsed CSS
261
        minified
262
            save minified CSS
263

264
        Both parsed and minified (which is also parsed of course) will
265
        loose information which cssutils is unable to understand or where
266
        it is simple buggy. You might to first save the raw version before
267
        parsing of even minifying it.
268
        """
269
        msg = 'parsed'
270
        if saveraw:
271
            msg = 'raw'
272
        if minified:
273
            cssutils.ser.prefs.useMinified()
274
            msg = 'minified'
275

    
276
        inlines = 0
277
        for i, sheet in enumerate(self.stylesheetlist):
278
            url = sheet.href
279
            if not url:
280
                inlines += 1
281
                url = u'%s_INLINE_%s.css' % (self._filename, inlines)
282

    
283
            # build savepath
284
            scheme, loc, path, query, fragment = urlparse.urlsplit(url)
285
            # no absolute path
286
            if path and path.startswith('/'):
287
                path = path[1:]
288
            path = os.path.normpath(path)
289
            path, fn = os.path.split(path)
290
            savepath = os.path.join(dir, path)
291
            savefn = os.path.join(savepath, fn)
292
            try:
293
                os.makedirs(savepath)
294
            except OSError, e:
295
                if e.errno != errno.EEXIST:
296
                    raise e
297
                self._log.debug(u'Path "%s" already exists.', savepath)
298

    
299
            self._log.info(u'SAVING %s, %s %r' % (i+1, msg, savefn))
300

    
301
            sf = open(savefn, 'wb')
302
            if saveraw:
303
                cssText = self._nonparsed[sheet]
304
                uf = codecs.getwriter('css')(sf)
305
                uf.write(cssText)
306
            else:
307
                sf.write(sheet.cssText)
308
            sf.close()
309

    
310
def csscombine(path=None, url=None, cssText=None, href=None,
311
               sourceencoding=None, targetencoding=None, 
312
               minify=True, resolveVariables=True):
313
    """Combine sheets referred to by @import rules in given CSS proxy sheet
314
    into a single new sheet.
315

316
    :returns: combined cssText, normal or minified
317
    :Parameters:
318
        `path` or `url` or `cssText` + `href`
319
            path or URL to a CSSStyleSheet or a cssText of a sheet which imports
320
            other sheets which are then combined into one sheet.
321
            `cssText` normally needs `href` to be able to resolve relative
322
            imports.
323
        `sourceencoding` = 'utf-8'
324
            explicit encoding of the source proxysheet
325
        `targetencoding`
326
            encoding of the combined stylesheet
327
        `minify` = True
328
            defines if the combined sheet should be minified, in this case
329
            comments are not parsed at all!
330
        `resolveVariables` = True
331
            defines if variables in combined sheet should be resolved
332
    """
333
    cssutils.log.info(u'Combining files from %r' % url, 
334
                      neverraise=True)
335
    if sourceencoding is not None:
336
        cssutils.log.info(u'Using source encoding %r' % sourceencoding,
337
                          neverraise=True)
338
        
339
    parser = cssutils.CSSParser(parseComments=not minify)
340
        
341
    if path and not cssText:
342
        src = parser.parseFile(path, encoding=sourceencoding)
343
    elif url:
344
        src = parser.parseUrl(url, encoding=sourceencoding)
345
    elif cssText:
346
        src = parser.parseString(cssText, href=href, encoding=sourceencoding)
347
    else:
348
        sys.exit('Path or URL must be given')
349

    
350
    result = cssutils.resolveImports(src)
351
    result.encoding = targetencoding
352
    cssutils.log.info(u'Using target encoding: %r' % targetencoding, neverraise=True)
353

    
354
    oldser = cssutils.ser
355
    cssutils.setSerializer(cssutils.serialize.CSSSerializer())
356
    if minify:
357
        cssutils.ser.prefs.useMinified()
358
    cssutils.ser.prefs.resolveVariables = resolveVariables
359
    cssText = result.cssText
360
    cssutils.setSerializer(oldser)
361
    
362
    return cssText