gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / cssutils / script.py @ 475
History | View | Annotate | Download (12.7 KB)
1 |
"""classes and functions used by cssutils scripts
|
---|---|
2 |
"""
|
3 |
__all__ = ['CSSCapture', 'csscombine'] |
4 |
__docformat__ = 'restructuredtext'
|
5 |
__version__ = '$Id: parse.py 1323 2008-07-06 18:13:57Z cthedot $'
|
6 |
|
7 |
import HTMLParser |
8 |
import codecs |
9 |
import cssutils |
10 |
import errno |
11 |
import logging |
12 |
import os |
13 |
import sys |
14 |
import urllib2 |
15 |
import urlparse |
16 |
|
17 |
try:
|
18 |
import cssutils.encutils as encutils |
19 |
except ImportError: |
20 |
try:
|
21 |
import encutils |
22 |
except ImportError: |
23 |
sys.exit("You need encutils from http://cthedot.de/encutils/")
|
24 |
|
25 |
# types of sheets in HTML
|
26 |
LINK = 0 # <link rel="stylesheet" type="text/css" href="..." [@title="..." @media="..."]/> |
27 |
STYLE = 1 # <style type="text/css" [@title="..."]>...</style> |
28 |
|
29 |
class CSSCaptureHTMLParser(HTMLParser.HTMLParser): |
30 |
"""CSSCapture helper: Parse given data for link and style elements"""
|
31 |
curtag = u''
|
32 |
sheets = [] # (type, [atts, cssText])
|
33 |
|
34 |
def _loweratts(self, atts): |
35 |
return dict([(a.lower(), v.lower()) for a, v in atts]) |
36 |
|
37 |
def handle_starttag(self, tag, atts): |
38 |
if tag == u'link': |
39 |
atts = self._loweratts(atts)
|
40 |
if u'text/css' == atts.get(u'type', u''): |
41 |
self.sheets.append((LINK, atts))
|
42 |
elif tag == u'style': |
43 |
# also get content of style
|
44 |
atts = self._loweratts(atts)
|
45 |
if u'text/css' == atts.get(u'type', u''): |
46 |
self.sheets.append((STYLE, [atts, u''])) |
47 |
self.curtag = tag
|
48 |
else:
|
49 |
# close as only intersting <style> cannot contain any elements
|
50 |
self.curtag = u'' |
51 |
|
52 |
def handle_data(self, data): |
53 |
if self.curtag == u'style': |
54 |
self.sheets[-1][1][1] = data # replace cssText |
55 |
|
56 |
def handle_comment(self, data): |
57 |
# style might have comment content, treat same as data
|
58 |
self.handle_data(data)
|
59 |
|
60 |
def handle_endtag(self, tag): |
61 |
# close as style cannot contain any elements
|
62 |
self.curtag = u'' |
63 |
|
64 |
|
65 |
class CSSCapture(object): |
66 |
"""
|
67 |
Retrieve all CSS stylesheets including embedded for a given URL.
|
68 |
Optional setting of User-Agent used for retrieval possible
|
69 |
to handle browser sniffing servers.
|
70 |
|
71 |
raises urllib2.HTTPError
|
72 |
"""
|
73 |
def __init__(self, ua=None, log=None, defaultloglevel=logging.INFO): |
74 |
"""
|
75 |
initialize a new Capture object
|
76 |
|
77 |
ua
|
78 |
init User-Agent to use for requests
|
79 |
log
|
80 |
supply a log object which is used instead of the default
|
81 |
log which writes to sys.stderr
|
82 |
defaultloglevel
|
83 |
constant of logging package which defines the level of the
|
84 |
default log if no explicit log given
|
85 |
"""
|
86 |
self._ua = ua
|
87 |
|
88 |
if log:
|
89 |
self._log = log
|
90 |
else:
|
91 |
self._log = logging.getLogger('CSSCapture') |
92 |
hdlr = logging.StreamHandler(sys.stderr) |
93 |
formatter = logging.Formatter('%(message)s')
|
94 |
hdlr.setFormatter(formatter) |
95 |
self._log.addHandler(hdlr)
|
96 |
self._log.setLevel(defaultloglevel)
|
97 |
self._log.debug(u'Using default log') |
98 |
|
99 |
self._htmlparser = CSSCaptureHTMLParser()
|
100 |
self._cssparser = cssutils.CSSParser(log = self._log) |
101 |
|
102 |
def _doRequest(self, url): |
103 |
"""Do an HTTP request
|
104 |
|
105 |
Return (url, rawcontent)
|
106 |
url might have been changed by server due to redirects etc
|
107 |
"""
|
108 |
self._log.debug(u' CSSCapture._doRequest\n * URL: %s' % url) |
109 |
|
110 |
req = urllib2.Request(url) |
111 |
if self._ua: |
112 |
req.add_header('User-agent', self._ua) |
113 |
self._log.info(' * Using User-Agent: %s', self._ua) |
114 |
|
115 |
try:
|
116 |
res = urllib2.urlopen(req) |
117 |
except urllib2.HTTPError, e:
|
118 |
self._log.critical(' %s\n%s %s\n%s' % ( |
119 |
e.geturl(), e.code, e.msg, e.headers)) |
120 |
return None, None |
121 |
|
122 |
# get real url
|
123 |
if url != res.geturl():
|
124 |
url = res.geturl() |
125 |
self._log.info(' URL retrieved: %s', url) |
126 |
|
127 |
return url, res
|
128 |
|
129 |
def _createStyleSheet(self, href=None, |
130 |
media=None,
|
131 |
parentStyleSheet=None,
|
132 |
title=u'',
|
133 |
cssText=None,
|
134 |
encoding=None):
|
135 |
"""
|
136 |
Return CSSStyleSheet read from href or if cssText is given use that.
|
137 |
|
138 |
encoding
|
139 |
used if inline style found, same as self.docencoding
|
140 |
"""
|
141 |
if cssText is None: |
142 |
encoding, enctype, cssText = cssutils.util._readUrl(href, parentEncoding=self.docencoding)
|
143 |
encoding = None # already decoded??? |
144 |
|
145 |
sheet = self._cssparser.parseString(cssText, href=href, media=media, title=title,
|
146 |
encoding=encoding) |
147 |
|
148 |
if not sheet: |
149 |
return None |
150 |
|
151 |
else:
|
152 |
self._log.info(u' %s\n' % sheet) |
153 |
self._nonparsed[sheet] = cssText
|
154 |
return sheet
|
155 |
|
156 |
def _findStyleSheets(self, docurl, doctext): |
157 |
"""
|
158 |
parse text for stylesheets
|
159 |
fills stylesheetlist with all found StyleSheets
|
160 |
|
161 |
docurl
|
162 |
to build a full url of found StyleSheets @href
|
163 |
doctext
|
164 |
to parse
|
165 |
"""
|
166 |
# TODO: ownerNode should be set to the <link> node
|
167 |
self._htmlparser.feed(doctext)
|
168 |
|
169 |
for typ, data in self._htmlparser.sheets: |
170 |
sheet = None
|
171 |
|
172 |
if LINK == typ:
|
173 |
self._log.info(u'+ PROCESSING <link> %r' % data) |
174 |
|
175 |
atts = data |
176 |
href = urlparse.urljoin(docurl, atts.get(u'href', None)) |
177 |
sheet = self._createStyleSheet(href=href,
|
178 |
media=atts.get(u'media', None), |
179 |
title=atts.get(u'title', None)) |
180 |
elif STYLE == typ:
|
181 |
self._log.info(u'+ PROCESSING <style> %r' % data) |
182 |
|
183 |
atts, cssText = data |
184 |
sheet = self._createStyleSheet(cssText=cssText,
|
185 |
href = docurl, |
186 |
media=atts.get(u'media', None), |
187 |
title=atts.get(u'title', None), |
188 |
encoding=self.docencoding)
|
189 |
if sheet:
|
190 |
sheet._href = None # inline have no href! |
191 |
print sheet.cssText
|
192 |
|
193 |
if sheet:
|
194 |
self.stylesheetlist.append(sheet)
|
195 |
self._doImports(sheet, base=docurl)
|
196 |
|
197 |
|
198 |
def _doImports(self, parentStyleSheet, base=None): |
199 |
"""
|
200 |
handle all @import CSS stylesheet recursively
|
201 |
found CSS stylesheets are appended to stylesheetlist
|
202 |
"""
|
203 |
# TODO: only if not parsed these have to be read extra!
|
204 |
|
205 |
for rule in parentStyleSheet.cssRules: |
206 |
if rule.type == rule.IMPORT_RULE:
|
207 |
self._log.info(u'+ PROCESSING @import:') |
208 |
self._log.debug(u' IN: %s\n' % parentStyleSheet.href) |
209 |
sheet = rule.styleSheet |
210 |
href = urlparse.urljoin(base, rule.href) |
211 |
if sheet:
|
212 |
self._log.info(u' %s\n' % sheet) |
213 |
self.stylesheetlist.append(sheet)
|
214 |
self._doImports(sheet, base=href)
|
215 |
|
216 |
def capture(self, url): |
217 |
"""
|
218 |
Capture all stylesheets at given URL's HTML document.
|
219 |
Any HTTPError is raised to caller.
|
220 |
|
221 |
url
|
222 |
to capture CSS from
|
223 |
|
224 |
Returns ``cssutils.stylesheets.StyleSheetList``.
|
225 |
"""
|
226 |
self._log.info(u'\nCapturing CSS from URL:\n %s\n', url) |
227 |
self._nonparsed = {}
|
228 |
self.stylesheetlist = cssutils.stylesheets.StyleSheetList()
|
229 |
|
230 |
# used to save inline styles
|
231 |
scheme, loc, path, query, fragment = urlparse.urlsplit(url) |
232 |
self._filename = os.path.basename(path)
|
233 |
|
234 |
# get url content
|
235 |
url, res = self._doRequest(url)
|
236 |
if not res: |
237 |
sys.exit(1)
|
238 |
|
239 |
rawdoc = res.read() |
240 |
|
241 |
self.docencoding = encutils.getEncodingInfo(
|
242 |
res, rawdoc, log=self._log).encoding
|
243 |
self._log.info(u'\nUsing Encoding: %s\n', self.docencoding) |
244 |
|
245 |
doctext = rawdoc.decode(self.docencoding)
|
246 |
|
247 |
# fill list of stylesheets and list of raw css
|
248 |
self._findStyleSheets(url, doctext)
|
249 |
|
250 |
return self.stylesheetlist |
251 |
|
252 |
def saveto(self, dir, saveraw=False, minified=False): |
253 |
"""
|
254 |
saves css in "dir" in the same layout as on the server
|
255 |
internal stylesheets are saved as "dir/__INLINE_STYLE__.html.css"
|
256 |
|
257 |
dir
|
258 |
directory to save files to
|
259 |
saveparsed
|
260 |
save literal CSS from server or save the parsed CSS
|
261 |
minified
|
262 |
save minified CSS
|
263 |
|
264 |
Both parsed and minified (which is also parsed of course) will
|
265 |
loose information which cssutils is unable to understand or where
|
266 |
it is simple buggy. You might to first save the raw version before
|
267 |
parsing of even minifying it.
|
268 |
"""
|
269 |
msg = 'parsed'
|
270 |
if saveraw:
|
271 |
msg = 'raw'
|
272 |
if minified:
|
273 |
cssutils.ser.prefs.useMinified() |
274 |
msg = 'minified'
|
275 |
|
276 |
inlines = 0
|
277 |
for i, sheet in enumerate(self.stylesheetlist): |
278 |
url = sheet.href |
279 |
if not url: |
280 |
inlines += 1
|
281 |
url = u'%s_INLINE_%s.css' % (self._filename, inlines) |
282 |
|
283 |
# build savepath
|
284 |
scheme, loc, path, query, fragment = urlparse.urlsplit(url) |
285 |
# no absolute path
|
286 |
if path and path.startswith('/'): |
287 |
path = path[1:]
|
288 |
path = os.path.normpath(path) |
289 |
path, fn = os.path.split(path) |
290 |
savepath = os.path.join(dir, path)
|
291 |
savefn = os.path.join(savepath, fn) |
292 |
try:
|
293 |
os.makedirs(savepath) |
294 |
except OSError, e: |
295 |
if e.errno != errno.EEXIST:
|
296 |
raise e
|
297 |
self._log.debug(u'Path "%s" already exists.', savepath) |
298 |
|
299 |
self._log.info(u'SAVING %s, %s %r' % (i+1, msg, savefn)) |
300 |
|
301 |
sf = open(savefn, 'wb') |
302 |
if saveraw:
|
303 |
cssText = self._nonparsed[sheet]
|
304 |
uf = codecs.getwriter('css')(sf)
|
305 |
uf.write(cssText) |
306 |
else:
|
307 |
sf.write(sheet.cssText) |
308 |
sf.close() |
309 |
|
310 |
def csscombine(path=None, url=None, cssText=None, href=None, |
311 |
sourceencoding=None, targetencoding=None, |
312 |
minify=True, resolveVariables=True): |
313 |
"""Combine sheets referred to by @import rules in given CSS proxy sheet
|
314 |
into a single new sheet.
|
315 |
|
316 |
:returns: combined cssText, normal or minified
|
317 |
:Parameters:
|
318 |
`path` or `url` or `cssText` + `href`
|
319 |
path or URL to a CSSStyleSheet or a cssText of a sheet which imports
|
320 |
other sheets which are then combined into one sheet.
|
321 |
`cssText` normally needs `href` to be able to resolve relative
|
322 |
imports.
|
323 |
`sourceencoding` = 'utf-8'
|
324 |
explicit encoding of the source proxysheet
|
325 |
`targetencoding`
|
326 |
encoding of the combined stylesheet
|
327 |
`minify` = True
|
328 |
defines if the combined sheet should be minified, in this case
|
329 |
comments are not parsed at all!
|
330 |
`resolveVariables` = True
|
331 |
defines if variables in combined sheet should be resolved
|
332 |
"""
|
333 |
cssutils.log.info(u'Combining files from %r' % url,
|
334 |
neverraise=True)
|
335 |
if sourceencoding is not None: |
336 |
cssutils.log.info(u'Using source encoding %r' % sourceencoding,
|
337 |
neverraise=True)
|
338 |
|
339 |
parser = cssutils.CSSParser(parseComments=not minify)
|
340 |
|
341 |
if path and not cssText: |
342 |
src = parser.parseFile(path, encoding=sourceencoding) |
343 |
elif url:
|
344 |
src = parser.parseUrl(url, encoding=sourceencoding) |
345 |
elif cssText:
|
346 |
src = parser.parseString(cssText, href=href, encoding=sourceencoding) |
347 |
else:
|
348 |
sys.exit('Path or URL must be given')
|
349 |
|
350 |
result = cssutils.resolveImports(src) |
351 |
result.encoding = targetencoding |
352 |
cssutils.log.info(u'Using target encoding: %r' % targetencoding, neverraise=True) |
353 |
|
354 |
oldser = cssutils.ser |
355 |
cssutils.setSerializer(cssutils.serialize.CSSSerializer()) |
356 |
if minify:
|
357 |
cssutils.ser.prefs.useMinified() |
358 |
cssutils.ser.prefs.resolveVariables = resolveVariables |
359 |
cssText = result.cssText |
360 |
cssutils.setSerializer(oldser) |
361 |
|
362 |
return cssText
|