Statistics
| Revision:

root / tags / v1_0_2_Build_904 / extensions / extScripting / scripts / jython / Lib / urllib.py @ 10724

History | View | Annotate | Download (46.5 KB)

1
"""Open an arbitrary URL.
2

3
See the following document for more info on URLs:
4
"Names and Addresses, URIs, URLs, URNs, URCs", at
5
http://www.w3.org/pub/WWW/Addressing/Overview.html
6

7
See also the HTTP spec (from which the error codes are derived):
8
"HTTP - Hypertext Transfer Protocol", at
9
http://www.w3.org/pub/WWW/Protocols/
10

11
Related standards and specs:
12
- RFC1808: the "relative URL" spec. (authoritative status)
13
- RFC1738 - the "URL standard". (authoritative status)
14
- RFC1630 - the "URI spec". (informational status)
15

16
The object returned by URLopener().open(file) will differ per
17
protocol.  All you know is that is has methods read(), readline(),
18
readlines(), fileno(), close() and info().  The read*(), fileno()
19
and close() methods work like those of open files.
20
The info() method returns a mimetools.Message object which can be
21
used to query various info about the object, if available.
22
(mimetools.Message objects are queried with the getheader() method.)
23
"""
24

    
25
import string
26
import socket
27
import os
28
import sys
29
import types
30

    
31
__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
32
           "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
33
           "urlencode", "url2pathname", "pathname2url", "splittag",
34
           "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
35
           "splittype", "splithost", "splituser", "splitpasswd", "splitport",
36
           "splitnport", "splitquery", "splitattr", "splitvalue",
37
           "splitgophertype", "getproxies"]
38

    
39
__version__ = '1.15'    # XXX This version is not always updated :-(
40

    
41
MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
42

    
43
# Helper for non-unix systems
44
if os.name == 'mac':
45
    from macurl2path import url2pathname, pathname2url
46
elif os.name == 'nt':
47
    from nturl2path import url2pathname, pathname2url
48
elif os.name == 'riscos':
49
    from rourl2path import url2pathname, pathname2url
50
else:
51
    def url2pathname(pathname):
52
        return unquote(pathname)
53
    def pathname2url(pathname):
54
        return quote(pathname)
55

    
56
# This really consists of two pieces:
57
# (1) a class which handles opening of all sorts of URLs
58
#     (plus assorted utilities etc.)
59
# (2) a set of functions for parsing URLs
60
# XXX Should these be separated out into different modules?
61

    
62

    
63
# Shortcut for basic usage
64
_urlopener = None
65
def urlopen(url, data=None):
66
    """urlopen(url [, data]) -> open file-like object"""
67
    global _urlopener
68
    if not _urlopener:
69
        _urlopener = FancyURLopener()
70
    if data is None:
71
        return _urlopener.open(url)
72
    else:
73
        return _urlopener.open(url, data)
74
def urlretrieve(url, filename=None, reporthook=None, data=None):
75
    global _urlopener
76
    if not _urlopener:
77
        _urlopener = FancyURLopener()
78
    return _urlopener.retrieve(url, filename, reporthook, data)
79
def urlcleanup():
80
    if _urlopener:
81
        _urlopener.cleanup()
82

    
83

    
84
ftpcache = {}
85
class URLopener:
86
    """Class to open URLs.
87
    This is a class rather than just a subroutine because we may need
88
    more than one set of global protocol-specific options.
89
    Note -- this is a base class for those who don't want the
90
    automatic handling of errors type 302 (relocated) and 401
91
    (authorization needed)."""
92

    
93
    __tempfiles = None
94

    
95
    version = "Python-urllib/%s" % __version__
96

    
97
    # Constructor
98
    def __init__(self, proxies=None, **x509):
99
        if proxies is None:
100
            proxies = getproxies()
101
        assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
102
        self.proxies = proxies
103
        self.key_file = x509.get('key_file')
104
        self.cert_file = x509.get('cert_file')
105
        self.addheaders = [('User-agent', self.version)]
106
        self.__tempfiles = []
107
        self.__unlink = os.unlink # See cleanup()
108
        self.tempcache = None
109
        # Undocumented feature: if you assign {} to tempcache,
110
        # it is used to cache files retrieved with
111
        # self.retrieve().  This is not enabled by default
112
        # since it does not work for changing documents (and I
113
        # haven't got the logic to check expiration headers
114
        # yet).
115
        self.ftpcache = ftpcache
116
        # Undocumented feature: you can use a different
117
        # ftp cache by assigning to the .ftpcache member;
118
        # in case you want logically independent URL openers
119
        # XXX This is not threadsafe.  Bah.
120

    
121
    def __del__(self):
122
        self.close()
123

    
124
    def close(self):
125
        self.cleanup()
126

    
127
    def cleanup(self):
128
        # This code sometimes runs when the rest of this module
129
        # has already been deleted, so it can't use any globals
130
        # or import anything.
131
        if self.__tempfiles:
132
            for file in self.__tempfiles:
133
                try:
134
                    self.__unlink(file)
135
                except:
136
                    pass
137
            del self.__tempfiles[:]
138
        if self.tempcache:
139
            self.tempcache.clear()
140

    
141
    def addheader(self, *args):
142
        """Add a header to be used by the HTTP interface only
143
        e.g. u.addheader('Accept', 'sound/basic')"""
144
        self.addheaders.append(args)
145

    
146
    # External interface
147
    def open(self, fullurl, data=None):
148
        """Use URLopener().open(file) instead of open(file, 'r')."""
149
        fullurl = unwrap(toBytes(fullurl))
150
        if self.tempcache and self.tempcache.has_key(fullurl):
151
            filename, headers = self.tempcache[fullurl]
152
            fp = open(filename, 'rb')
153
            return addinfourl(fp, headers, fullurl)
154
        urltype, url = splittype(fullurl)
155
        if not urltype:
156
            urltype = 'file'
157
        if self.proxies.has_key(urltype):
158
            proxy = self.proxies[urltype]
159
            urltype, proxyhost = splittype(proxy)
160
            host, selector = splithost(proxyhost)
161
            url = (host, fullurl) # Signal special case to open_*()
162
        else:
163
            proxy = None
164
        name = 'open_' + urltype
165
        self.type = urltype
166
        if '-' in name:
167
            # replace - with _
168
            name = '_'.join(name.split('-'))
169
        if not hasattr(self, name):
170
            if proxy:
171
                return self.open_unknown_proxy(proxy, fullurl, data)
172
            else:
173
                return self.open_unknown(fullurl, data)
174
        try:
175
            if data is None:
176
                return getattr(self, name)(url)
177
            else:
178
                return getattr(self, name)(url, data)
179
        except socket.error, msg:
180
            raise IOError, ('socket error', msg), sys.exc_info()[2]
181

    
182
    def open_unknown(self, fullurl, data=None):
183
        """Overridable interface to open unknown URL type."""
184
        type, url = splittype(fullurl)
185
        raise IOError, ('url error', 'unknown url type', type)
186

    
187
    def open_unknown_proxy(self, proxy, fullurl, data=None):
188
        """Overridable interface to open unknown URL type."""
189
        type, url = splittype(fullurl)
190
        raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
191

    
192
    # External interface
193
    def retrieve(self, url, filename=None, reporthook=None, data=None):
194
        """retrieve(url) returns (filename, None) for a local object
195
        or (tempfilename, headers) for a remote object."""
196
        url = unwrap(toBytes(url))
197
        if self.tempcache and self.tempcache.has_key(url):
198
            return self.tempcache[url]
199
        type, url1 = splittype(url)
200
        if not filename and (not type or type == 'file'):
201
            try:
202
                fp = self.open_local_file(url1)
203
                hdrs = fp.info()
204
                del fp
205
                return url2pathname(splithost(url1)[1]), hdrs
206
            except IOError, msg:
207
                pass
208
        fp = self.open(url, data)
209
        headers = fp.info()
210
        if not filename:
211
            import tempfile
212
            garbage, path = splittype(url)
213
            garbage, path = splithost(path or "")
214
            path, garbage = splitquery(path or "")
215
            path, garbage = splitattr(path or "")
216
            suffix = os.path.splitext(path)[1]
217
            filename = tempfile.mktemp(suffix)
218
            self.__tempfiles.append(filename)
219
        result = filename, headers
220
        if self.tempcache is not None:
221
            self.tempcache[url] = result
222
        tfp = open(filename, 'wb')
223
        bs = 1024*8
224
        size = -1
225
        blocknum = 1
226
        if reporthook:
227
            if headers.has_key("content-length"):
228
                size = int(headers["Content-Length"])
229
            reporthook(0, bs, size)
230
        block = fp.read(bs)
231
        if reporthook:
232
            reporthook(1, bs, size)
233
        while block:
234
            tfp.write(block)
235
            block = fp.read(bs)
236
            blocknum = blocknum + 1
237
            if reporthook:
238
                reporthook(blocknum, bs, size)
239
        fp.close()
240
        tfp.close()
241
        del fp
242
        del tfp
243
        return result
244

    
245
    # Each method named open_<type> knows how to open that type of URL
246

    
247
    def open_http(self, url, data=None):
248
        """Use HTTP protocol."""
249
        import httplib
250
        user_passwd = None
251
        if type(url) is types.StringType:
252
            host, selector = splithost(url)
253
            if host:
254
                user_passwd, host = splituser(host)
255
                host = unquote(host)
256
            realhost = host
257
        else:
258
            host, selector = url
259
            urltype, rest = splittype(selector)
260
            url = rest
261
            user_passwd = None
262
            if urltype.lower() != 'http':
263
                realhost = None
264
            else:
265
                realhost, rest = splithost(rest)
266
                if realhost:
267
                    user_passwd, realhost = splituser(realhost)
268
                if user_passwd:
269
                    selector = "%s://%s%s" % (urltype, realhost, rest)
270
            #print "proxy via http:", host, selector
271
        if not host: raise IOError, ('http error', 'no host given')
272
        if user_passwd:
273
            import base64
274
            auth = base64.encodestring(user_passwd).strip()
275
        else:
276
            auth = None
277
        h = httplib.HTTP(host)
278
        if data is not None:
279
            h.putrequest('POST', selector)
280
            h.putheader('Content-type', 'application/x-www-form-urlencoded')
281
            h.putheader('Content-length', '%d' % len(data))
282
        else:
283
            h.putrequest('GET', selector)
284
        if auth: h.putheader('Authorization', 'Basic %s' % auth)
285
        if realhost: h.putheader('Host', realhost)
286
        for args in self.addheaders: apply(h.putheader, args)
287
        h.endheaders()
288
        if data is not None:
289
            h.send(data)
290
        errcode, errmsg, headers = h.getreply()
291
        fp = h.getfile()
292
        if errcode == 200:
293
            return addinfourl(fp, headers, "http:" + url)
294
        else:
295
            if data is None:
296
                return self.http_error(url, fp, errcode, errmsg, headers)
297
            else:
298
                return self.http_error(url, fp, errcode, errmsg, headers, data)
299

    
300
    def http_error(self, url, fp, errcode, errmsg, headers, data=None):
301
        """Handle http errors.
302
        Derived class can override this, or provide specific handlers
303
        named http_error_DDD where DDD is the 3-digit error code."""
304
        # First check if there's a specific handler for this error
305
        name = 'http_error_%d' % errcode
306
        if hasattr(self, name):
307
            method = getattr(self, name)
308
            if data is None:
309
                result = method(url, fp, errcode, errmsg, headers)
310
            else:
311
                result = method(url, fp, errcode, errmsg, headers, data)
312
            if result: return result
313
        return self.http_error_default(url, fp, errcode, errmsg, headers)
314

    
315
    def http_error_default(self, url, fp, errcode, errmsg, headers):
316
        """Default error handler: close the connection and raise IOError."""
317
        void = fp.read()
318
        fp.close()
319
        raise IOError, ('http error', errcode, errmsg, headers)
320

    
321
    if hasattr(socket, "ssl"):
322
        def open_https(self, url, data=None):
323
            """Use HTTPS protocol."""
324
            import httplib
325
            user_passwd = None
326
            if type(url) is types.StringType:
327
                host, selector = splithost(url)
328
                if host:
329
                    user_passwd, host = splituser(host)
330
                    host = unquote(host)
331
                realhost = host
332
            else:
333
                host, selector = url
334
                urltype, rest = splittype(selector)
335
                url = rest
336
                user_passwd = None
337
                if urltype.lower() != 'https':
338
                    realhost = None
339
                else:
340
                    realhost, rest = splithost(rest)
341
                    if realhost:
342
                        user_passwd, realhost = splituser(realhost)
343
                    if user_passwd:
344
                        selector = "%s://%s%s" % (urltype, realhost, rest)
345
                #print "proxy via https:", host, selector
346
            if not host: raise IOError, ('https error', 'no host given')
347
            if user_passwd:
348
                import base64
349
                auth = base64.encodestring(user_passwd).strip()
350
            else:
351
                auth = None
352
            h = httplib.HTTPS(host, 0,
353
                              key_file=self.key_file,
354
                              cert_file=self.cert_file)
355
            if data is not None:
356
                h.putrequest('POST', selector)
357
                h.putheader('Content-type',
358
                            'application/x-www-form-urlencoded')
359
                h.putheader('Content-length', '%d' % len(data))
360
            else:
361
                h.putrequest('GET', selector)
362
            if auth: h.putheader('Authorization: Basic %s' % auth)
363
            if realhost: h.putheader('Host', realhost)
364
            for args in self.addheaders: apply(h.putheader, args)
365
            h.endheaders()
366
            if data is not None:
367
                h.send(data)
368
            errcode, errmsg, headers = h.getreply()
369
            fp = h.getfile()
370
            if errcode == 200:
371
                return addinfourl(fp, headers, url)
372
            else:
373
                if data is None:
374
                    return self.http_error(url, fp, errcode, errmsg, headers)
375
                else:
376
                    return self.http_error(url, fp, errcode, errmsg, headers,
377
                                           data)
378

    
379
    def open_gopher(self, url):
380
        """Use Gopher protocol."""
381
        import gopherlib
382
        host, selector = splithost(url)
383
        if not host: raise IOError, ('gopher error', 'no host given')
384
        host = unquote(host)
385
        type, selector = splitgophertype(selector)
386
        selector, query = splitquery(selector)
387
        selector = unquote(selector)
388
        if query:
389
            query = unquote(query)
390
            fp = gopherlib.send_query(selector, query, host)
391
        else:
392
            fp = gopherlib.send_selector(selector, host)
393
        return addinfourl(fp, noheaders(), "gopher:" + url)
394

    
395
    def open_file(self, url):
396
        """Use local file or FTP depending on form of URL."""
397
        if url[:2] == '//' and url[2:3] != '/':
398
            return self.open_ftp(url)
399
        else:
400
            return self.open_local_file(url)
401

    
402
    def open_local_file(self, url):
403
        """Use local file."""
404
        import mimetypes, mimetools, StringIO
405
        mtype = mimetypes.guess_type(url)[0]
406
        headers = mimetools.Message(StringIO.StringIO(
407
            'Content-Type: %s\n' % (mtype or 'text/plain')))
408
        host, file = splithost(url)
409
        if not host:
410
            urlfile = file
411
            if file[:1] == '/':
412
                urlfile = 'file://' + file
413
            return addinfourl(open(url2pathname(file), 'rb'),
414
                              headers, urlfile)
415
        host, port = splitport(host)
416
        if not port \
417
           and socket.gethostbyname(host) in (localhost(), thishost()):
418
            urlfile = file
419
            if file[:1] == '/':
420
                urlfile = 'file://' + file
421
            return addinfourl(open(url2pathname(file), 'rb'),
422
                              headers, urlfile)
423
        raise IOError, ('local file error', 'not on local host')
424

    
425
    def open_ftp(self, url):
426
        """Use FTP protocol."""
427
        host, path = splithost(url)
428
        if not host: raise IOError, ('ftp error', 'no host given')
429
        host, port = splitport(host)
430
        user, host = splituser(host)
431
        if user: user, passwd = splitpasswd(user)
432
        else: passwd = None
433
        host = unquote(host)
434
        user = unquote(user or '')
435
        passwd = unquote(passwd or '')
436
        host = socket.gethostbyname(host)
437
        if not port:
438
            import ftplib
439
            port = ftplib.FTP_PORT
440
        else:
441
            port = int(port)
442
        path, attrs = splitattr(path)
443
        path = unquote(path)
444
        dirs = path.split('/')
445
        dirs, file = dirs[:-1], dirs[-1]
446
        if dirs and not dirs[0]: dirs = dirs[1:]
447
        if dirs and not dirs[0]: dirs[0] = '/'
448
        key = user, host, port, '/'.join(dirs)
449
        # XXX thread unsafe!
450
        if len(self.ftpcache) > MAXFTPCACHE:
451
            # Prune the cache, rather arbitrarily
452
            for k in self.ftpcache.keys():
453
                if k != key:
454
                    v = self.ftpcache[k]
455
                    del self.ftpcache[k]
456
                    v.close()
457
        try:
458
            if not self.ftpcache.has_key(key):
459
                self.ftpcache[key] = \
460
                    ftpwrapper(user, passwd, host, port, dirs)
461
            if not file: type = 'D'
462
            else: type = 'I'
463
            for attr in attrs:
464
                attr, value = splitvalue(attr)
465
                if attr.lower() == 'type' and \
466
                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
467
                    type = value.upper()
468
            (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
469
            if retrlen is not None and retrlen >= 0:
470
                import mimetools, StringIO
471
                headers = mimetools.Message(StringIO.StringIO(
472
                    'Content-Length: %d\n' % retrlen))
473
            else:
474
                headers = noheaders()
475
            return addinfourl(fp, headers, "ftp:" + url)
476
        except ftperrors(), msg:
477
            raise IOError, ('ftp error', msg), sys.exc_info()[2]
478

    
479
    def open_data(self, url, data=None):
480
        """Use "data" URL."""
481
        # ignore POSTed data
482
        #
483
        # syntax of data URLs:
484
        # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
485
        # mediatype := [ type "/" subtype ] *( ";" parameter )
486
        # data      := *urlchar
487
        # parameter := attribute "=" value
488
        import StringIO, mimetools, time
489
        try:
490
            [type, data] = url.split(',', 1)
491
        except ValueError:
492
            raise IOError, ('data error', 'bad data URL')
493
        if not type:
494
            type = 'text/plain;charset=US-ASCII'
495
        semi = type.rfind(';')
496
        if semi >= 0 and '=' not in type[semi:]:
497
            encoding = type[semi+1:]
498
            type = type[:semi]
499
        else:
500
            encoding = ''
501
        msg = []
502
        msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
503
                                            time.gmtime(time.time())))
504
        msg.append('Content-type: %s' % type)
505
        if encoding == 'base64':
506
            import base64
507
            data = base64.decodestring(data)
508
        else:
509
            data = unquote(data)
510
        msg.append('Content-length: %d' % len(data))
511
        msg.append('')
512
        msg.append(data)
513
        msg = '\n'.join(msg)
514
        f = StringIO.StringIO(msg)
515
        headers = mimetools.Message(f, 0)
516
        f.fileno = None     # needed for addinfourl
517
        return addinfourl(f, headers, url)
518

    
519

    
520
class FancyURLopener(URLopener):
521
    """Derived class with handlers for errors we can handle (perhaps)."""
522

    
523
    def __init__(self, *args):
524
        apply(URLopener.__init__, (self,) + args)
525
        self.auth_cache = {}
526
        self.tries = 0
527
        self.maxtries = 10
528

    
529
    def http_error_default(self, url, fp, errcode, errmsg, headers):
530
        """Default error handling -- don't raise an exception."""
531
        return addinfourl(fp, headers, "http:" + url)
532

    
533
    def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
534
        """Error 302 -- relocated (temporarily)."""
535
        self.tries += 1
536
        if self.maxtries and self.tries >= self.maxtries:
537
            if hasattr(self, "http_error_500"):
538
                meth = self.http_error_500
539
            else:
540
                meth = self.http_error_default
541
            self.tries = 0
542
            return meth(url, fp, 500,
543
                        "Internal Server Error: Redirect Recursion", headers)
544
        result = self.redirect_internal(url, fp, errcode, errmsg, headers,
545
                                        data)
546
        self.tries = 0
547
        return result
548

    
549
    def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
550
        if headers.has_key('location'):
551
            newurl = headers['location']
552
        elif headers.has_key('uri'):
553
            newurl = headers['uri']
554
        else:
555
            return
556
        void = fp.read()
557
        fp.close()
558
        # In case the server sent a relative URL, join with original:
559
        newurl = basejoin(self.type + ":" + url, newurl)
560
        if data is None:
561
            return self.open(newurl)
562
        else:
563
            return self.open(newurl, data)
564

    
565
    def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
566
        """Error 301 -- also relocated (permanently)."""
567
        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
568

    
569
    def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
570
        """Error 401 -- authentication required.
571
        See this URL for a description of the basic authentication scheme:
572
        http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt"""
573
        if not headers.has_key('www-authenticate'):
574
            URLopener.http_error_default(self, url, fp,
575
                                         errmsg, headers)
576
        stuff = headers['www-authenticate']
577
        import re
578
        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
579
        if not match:
580
            URLopener.http_error_default(self, url, fp,
581
                                         errcode, errmsg, headers)
582
        scheme, realm = match.groups()
583
        if scheme.lower() != 'basic':
584
            URLopener.http_error_default(self, url, fp,
585
                                         errcode, errmsg, headers)
586
        name = 'retry_' + self.type + '_basic_auth'
587
        if data is None:
588
            return getattr(self,name)(url, realm)
589
        else:
590
            return getattr(self,name)(url, realm, data)
591

    
592
    def retry_http_basic_auth(self, url, realm, data=None):
593
        host, selector = splithost(url)
594
        i = host.find('@') + 1
595
        host = host[i:]
596
        user, passwd = self.get_user_passwd(host, realm, i)
597
        if not (user or passwd): return None
598
        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
599
        newurl = 'http://' + host + selector
600
        if data is None:
601
            return self.open(newurl)
602
        else:
603
            return self.open(newurl, data)
604

    
605
    def retry_https_basic_auth(self, url, realm, data=None):
606
        host, selector = splithost(url)
607
        i = host.find('@') + 1
608
        host = host[i:]
609
        user, passwd = self.get_user_passwd(host, realm, i)
610
        if not (user or passwd): return None
611
        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
612
        newurl = '//' + host + selector
613
        return self.open_https(newurl, data)
614

    
615
    def get_user_passwd(self, host, realm, clear_cache = 0):
616
        key = realm + '@' + host.lower()
617
        if self.auth_cache.has_key(key):
618
            if clear_cache:
619
                del self.auth_cache[key]
620
            else:
621
                return self.auth_cache[key]
622
        user, passwd = self.prompt_user_passwd(host, realm)
623
        if user or passwd: self.auth_cache[key] = (user, passwd)
624
        return user, passwd
625

    
626
    def prompt_user_passwd(self, host, realm):
627
        """Override this in a GUI environment!"""
628
        import getpass
629
        try:
630
            user = raw_input("Enter username for %s at %s: " % (realm,
631
                                                                host))
632
            passwd = getpass.getpass("Enter password for %s in %s at %s: " %
633
                (user, realm, host))
634
            return user, passwd
635
        except KeyboardInterrupt:
636
            print
637
            return None, None
638

    
639

    
640
# Utility functions
641

    
642
_localhost = None
643
def localhost():
644
    """Return the IP address of the magic hostname 'localhost'."""
645
    global _localhost
646
    if not _localhost:
647
        _localhost = socket.gethostbyname('localhost')
648
    return _localhost
649

    
650
_thishost = None
651
def thishost():
652
    """Return the IP address of the current host."""
653
    global _thishost
654
    if not _thishost:
655
        _thishost = socket.gethostbyname(socket.gethostname())
656
    return _thishost
657

    
658
_ftperrors = None
659
def ftperrors():
660
    """Return the set of errors raised by the FTP class."""
661
    global _ftperrors
662
    if not _ftperrors:
663
        import ftplib
664
        _ftperrors = ftplib.all_errors
665
    return _ftperrors
666

    
667
_noheaders = None
668
def noheaders():
669
    """Return an empty mimetools.Message object."""
670
    global _noheaders
671
    if not _noheaders:
672
        import mimetools
673
        import StringIO
674
        _noheaders = mimetools.Message(StringIO.StringIO(), 0)
675
        _noheaders.fp.close()   # Recycle file descriptor
676
    return _noheaders
677

    
678

    
679
# Utility classes
680

    
681
class ftpwrapper:
682
    """Class used by open_ftp() for cache of open FTP connections."""
683

    
684
    def __init__(self, user, passwd, host, port, dirs):
685
        self.user = user
686
        self.passwd = passwd
687
        self.host = host
688
        self.port = port
689
        self.dirs = dirs
690
        self.init()
691

    
692
    def init(self):
693
        import ftplib
694
        self.busy = 0
695
        self.ftp = ftplib.FTP()
696
        self.ftp.connect(self.host, self.port)
697
        self.ftp.login(self.user, self.passwd)
698
        for dir in self.dirs:
699
            self.ftp.cwd(dir)
700

    
701
    def retrfile(self, file, type):
702
        import ftplib
703
        self.endtransfer()
704
        if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
705
        else: cmd = 'TYPE ' + type; isdir = 0
706
        try:
707
            self.ftp.voidcmd(cmd)
708
        except ftplib.all_errors:
709
            self.init()
710
            self.ftp.voidcmd(cmd)
711
        conn = None
712
        if file and not isdir:
713
            # Use nlst to see if the file exists at all
714
            try:
715
                self.ftp.nlst(file)
716
            except ftplib.error_perm, reason:
717
                raise IOError, ('ftp error', reason), sys.exc_info()[2]
718
            # Restore the transfer mode!
719
            self.ftp.voidcmd(cmd)
720
            # Try to retrieve as a file
721
            try:
722
                cmd = 'RETR ' + file
723
                conn = self.ftp.ntransfercmd(cmd)
724
            except ftplib.error_perm, reason:
725
                if str(reason)[:3] != '550':
726
                    raise IOError, ('ftp error', reason), sys.exc_info()[2]
727
        if not conn:
728
            # Set transfer mode to ASCII!
729
            self.ftp.voidcmd('TYPE A')
730
            # Try a directory listing
731
            if file: cmd = 'LIST ' + file
732
            else: cmd = 'LIST'
733
            conn = self.ftp.ntransfercmd(cmd)
734
        self.busy = 1
735
        # Pass back both a suitably decorated object and a retrieval length
736
        return (addclosehook(conn[0].makefile('rb'),
737
                             self.endtransfer), conn[1])
738
    def endtransfer(self):
739
        if not self.busy:
740
            return
741
        self.busy = 0
742
        try:
743
            self.ftp.voidresp()
744
        except ftperrors():
745
            pass
746

    
747
    def close(self):
748
        self.endtransfer()
749
        try:
750
            self.ftp.close()
751
        except ftperrors():
752
            pass
753

    
754
class addbase:
755
    """Base class for addinfo and addclosehook."""
756

    
757
    def __init__(self, fp):
758
        self.fp = fp
759
        self.read = self.fp.read
760
        self.readline = self.fp.readline
761
        if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
762
        if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno
763

    
764
    def __repr__(self):
765
        return '<%s at %s whose fp = %s>' % (self.__class__.__name__,
766
                                             `id(self)`, `self.fp`)
767

    
768
    def close(self):
769
        self.read = None
770
        self.readline = None
771
        self.readlines = None
772
        self.fileno = None
773
        if self.fp: self.fp.close()
774
        self.fp = None
775

    
776
class addclosehook(addbase):
777
    """Class to add a close hook to an open file."""
778

    
779
    def __init__(self, fp, closehook, *hookargs):
780
        addbase.__init__(self, fp)
781
        self.closehook = closehook
782
        self.hookargs = hookargs
783

    
784
    def close(self):
785
        addbase.close(self)
786
        if self.closehook:
787
            apply(self.closehook, self.hookargs)
788
            self.closehook = None
789
            self.hookargs = None
790

    
791
class addinfo(addbase):
792
    """class to add an info() method to an open file."""
793

    
794
    def __init__(self, fp, headers):
795
        addbase.__init__(self, fp)
796
        self.headers = headers
797

    
798
    def info(self):
799
        return self.headers
800

    
801
class addinfourl(addbase):
802
    """class to add info() and geturl() methods to an open file."""
803

    
804
    def __init__(self, fp, headers, url):
805
        addbase.__init__(self, fp)
806
        self.headers = headers
807
        self.url = url
808

    
809
    def info(self):
810
        return self.headers
811

    
812
    def geturl(self):
813
        return self.url
814

    
815

    
816
def basejoin(base, url):
817
    """Utility to combine a URL with a base URL to form a new URL."""
818
    type, path = splittype(url)
819
    if type:
820
        # if url is complete (i.e., it contains a type), return it
821
        return url
822
    host, path = splithost(path)
823
    type, basepath = splittype(base) # inherit type from base
824
    if host:
825
        # if url contains host, just inherit type
826
        if type: return type + '://' + host + path
827
        else:
828
            # no type inherited, so url must have started with //
829
            # just return it
830
            return url
831
    host, basepath = splithost(basepath) # inherit host
832
    basepath, basetag = splittag(basepath) # remove extraneous cruft
833
    basepath, basequery = splitquery(basepath) # idem
834
    if path[:1] != '/':
835
        # non-absolute path name
836
        if path[:1] in ('#', '?'):
837
            # path is just a tag or query, attach to basepath
838
            i = len(basepath)
839
        else:
840
            # else replace last component
841
            i = basepath.rfind('/')
842
        if i < 0:
843
            # basepath not absolute
844
            if host:
845
                # host present, make absolute
846
                basepath = '/'
847
            else:
848
                # else keep non-absolute
849
                basepath = ''
850
        else:
851
            # remove last file component
852
            basepath = basepath[:i+1]
853
        # Interpret ../ (important because of symlinks)
854
        while basepath and path[:3] == '../':
855
            path = path[3:]
856
            i = basepath[:-1].rfind('/')
857
            if i > 0:
858
                basepath = basepath[:i+1]
859
            elif i == 0:
860
                basepath = '/'
861
                break
862
            else:
863
                basepath = ''
864

    
865
        path = basepath + path
866
    if host and path and path[0] != '/':
867
        path = '/' + path
868
    if type and host: return type + '://' + host + path
869
    elif type: return type + ':' + path
870
    elif host: return '//' + host + path # don't know what this means
871
    else: return path
872

    
873

    
874
# Utilities to parse URLs (most of these return None for missing parts):
875
# unwrap('<URL:type://host/path>') --> 'type://host/path'
876
# splittype('type:opaquestring') --> 'type', 'opaquestring'
877
# splithost('//host[:port]/path') --> 'host[:port]', '/path'
878
# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
879
# splitpasswd('user:passwd') -> 'user', 'passwd'
880
# splitport('host:port') --> 'host', 'port'
881
# splitquery('/path?query') --> '/path', 'query'
882
# splittag('/path#tag') --> '/path', 'tag'
883
# splitattr('/path;attr1=value1;attr2=value2;...') ->
884
#   '/path', ['attr1=value1', 'attr2=value2', ...]
885
# splitvalue('attr=value') --> 'attr', 'value'
886
# splitgophertype('/Xselector') --> 'X', 'selector'
887
# unquote('abc%20def') -> 'abc def'
888
# quote('abc def') -> 'abc%20def')
889

    
890
def toBytes(url):
891
    """toBytes(u"URL") --> 'URL'."""
892
    # Most URL schemes require ASCII. If that changes, the conversion
893
    # can be relaxed
894
    if type(url) is types.UnicodeType:
895
        try:
896
            url = url.encode("ASCII")
897
        except UnicodeError:
898
            raise UnicodeError("URL " + repr(url) +
899
                               " contains non-ASCII characters")
900
    return url
901

    
902
def unwrap(url):
903
    """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
904
    url = url.strip()
905
    if url[:1] == '<' and url[-1:] == '>':
906
        url = url[1:-1].strip()
907
    if url[:4] == 'URL:': url = url[4:].strip()
908
    return url
909

    
910
_typeprog = None
911
def splittype(url):
912
    """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
913
    global _typeprog
914
    if _typeprog is None:
915
        import re
916
        _typeprog = re.compile('^([^/:]+):')
917

    
918
    match = _typeprog.match(url)
919
    if match:
920
        scheme = match.group(1)
921
        return scheme.lower(), url[len(scheme) + 1:]
922
    return None, url
923

    
924
_hostprog = None
925
def splithost(url):
926
    """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
927
    global _hostprog
928
    if _hostprog is None:
929
        import re
930
        _hostprog = re.compile('^//([^/]*)(.*)$')
931

    
932
    match = _hostprog.match(url)
933
    if match: return match.group(1, 2)
934
    return None, url
935

    
936
_userprog = None
937
def splituser(host):
938
    """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
939
    global _userprog
940
    if _userprog is None:
941
        import re
942
        _userprog = re.compile('^([^@]*)@(.*)$')
943

    
944
    match = _userprog.match(host)
945
    if match: return map(unquote, match.group(1, 2))
946
    return None, host
947

    
948
_passwdprog = None
949
def splitpasswd(user):
950
    """splitpasswd('user:passwd') -> 'user', 'passwd'."""
951
    global _passwdprog
952
    if _passwdprog is None:
953
        import re
954
        _passwdprog = re.compile('^([^:]*):(.*)$')
955

    
956
    match = _passwdprog.match(user)
957
    if match: return match.group(1, 2)
958
    return user, None
959

    
960
# splittag('/path#tag') --> '/path', 'tag'
961
_portprog = None
962
def splitport(host):
963
    """splitport('host:port') --> 'host', 'port'."""
964
    global _portprog
965
    if _portprog is None:
966
        import re
967
        _portprog = re.compile('^(.*):([0-9]+)$')
968

    
969
    match = _portprog.match(host)
970
    if match: return match.group(1, 2)
971
    return host, None
972

    
973
_nportprog = None
974
def splitnport(host, defport=-1):
975
    """Split host and port, returning numeric port.
976
    Return given default port if no ':' found; defaults to -1.
977
    Return numerical port if a valid number are found after ':'.
978
    Return None if ':' but not a valid number."""
979
    global _nportprog
980
    if _nportprog is None:
981
        import re
982
        _nportprog = re.compile('^(.*):(.*)$')
983

    
984
    match = _nportprog.match(host)
985
    if match:
986
        host, port = match.group(1, 2)
987
        try:
988
            if not port: raise ValueError, "no digits"
989
            nport = int(port)
990
        except ValueError:
991
            nport = None
992
        return host, nport
993
    return host, defport
994

    
995
_queryprog = None
996
def splitquery(url):
997
    """splitquery('/path?query') --> '/path', 'query'."""
998
    global _queryprog
999
    if _queryprog is None:
1000
        import re
1001
        _queryprog = re.compile('^(.*)\?([^?]*)$')
1002

    
1003
    match = _queryprog.match(url)
1004
    if match: return match.group(1, 2)
1005
    return url, None
1006

    
1007
_tagprog = None
1008
def splittag(url):
1009
    """splittag('/path#tag') --> '/path', 'tag'."""
1010
    global _tagprog
1011
    if _tagprog is None:
1012
        import re
1013
        _tagprog = re.compile('^(.*)#([^#]*)$')
1014

    
1015
    match = _tagprog.match(url)
1016
    if match: return match.group(1, 2)
1017
    return url, None
1018

    
1019
def splitattr(url):
1020
    """splitattr('/path;attr1=value1;attr2=value2;...') ->
1021
        '/path', ['attr1=value1', 'attr2=value2', ...]."""
1022
    words = url.split(';')
1023
    return words[0], words[1:]
1024

    
1025
_valueprog = None
1026
def splitvalue(attr):
1027
    """splitvalue('attr=value') --> 'attr', 'value'."""
1028
    global _valueprog
1029
    if _valueprog is None:
1030
        import re
1031
        _valueprog = re.compile('^([^=]*)=(.*)$')
1032

    
1033
    match = _valueprog.match(attr)
1034
    if match: return match.group(1, 2)
1035
    return attr, None
1036

    
1037
def splitgophertype(selector):
1038
    """splitgophertype('/Xselector') --> 'X', 'selector'."""
1039
    if selector[:1] == '/' and selector[1:2]:
1040
        return selector[1], selector[2:]
1041
    return None, selector
1042

    
1043
def unquote(s):
1044
    """unquote('abc%20def') -> 'abc def'."""
1045
    mychr = chr
1046
    myatoi = int
1047
    list = s.split('%')
1048
    res = [list[0]]
1049
    myappend = res.append
1050
    del list[0]
1051
    for item in list:
1052
        if item[1:2]:
1053
            try:
1054
                myappend(mychr(myatoi(item[:2], 16))
1055
                     + item[2:])
1056
            except:
1057
                myappend('%' + item)
1058
        else:
1059
            myappend('%' + item)
1060
    return "".join(res)
1061

    
1062
def unquote_plus(s):
1063
    """unquote('%7e/abc+def') -> '~/abc def'"""
1064
    if '+' in s:
1065
        # replace '+' with ' '
1066
        s = ' '.join(s.split('+'))
1067
    return unquote(s)
1068

    
1069
always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1070
               'abcdefghijklmnopqrstuvwxyz'
1071
               '0123456789' '_.-')
1072

    
1073
_fast_safe_test = always_safe + '/'
1074
_fast_safe = None
1075

    
1076
def _fast_quote(s):
1077
    global _fast_safe
1078
    if _fast_safe is None:
1079
        _fast_safe = {}
1080
        for c in _fast_safe_test:
1081
            _fast_safe[c] = c
1082
    res = list(s)
1083
    for i in range(len(res)):
1084
        c = res[i]
1085
        if not _fast_safe.has_key(c):
1086
            res[i] = '%%%02X' % ord(c)
1087
    return ''.join(res)
1088

    
1089
def quote(s, safe = '/'):
1090
    """quote('abc def') -> 'abc%20def'
1091

1092
    Each part of a URL, e.g. the path info, the query, etc., has a
1093
    different set of reserved characters that must be quoted.
1094

1095
    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1096
    the following reserved characters.
1097

1098
    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1099
                  "$" | ","
1100

1101
    Each of these characters is reserved in some component of a URL,
1102
    but not necessarily in all of them.
1103

1104
    By default, the quote function is intended for quoting the path
1105
    section of a URL.  Thus, it will not encode '/'.  This character
1106
    is reserved, but in typical usage the quote function is being
1107
    called on a path where the existing slash characters are used as
1108
    reserved characters.
1109
    """
1110
    safe = always_safe + safe
1111
    if _fast_safe_test == safe:
1112
        return _fast_quote(s)
1113
    res = list(s)
1114
    for i in range(len(res)):
1115
        c = res[i]
1116
        if c not in safe:
1117
            res[i] = '%%%02X' % ord(c)
1118
    return ''.join(res)
1119

    
1120
def quote_plus(s, safe = ''):
1121
    """Quote the query fragment of a URL; replacing ' ' with '+'"""
1122
    if ' ' in s:
1123
        l = s.split(' ')
1124
        for i in range(len(l)):
1125
            l[i] = quote(l[i], safe)
1126
        return '+'.join(l)
1127
    else:
1128
        return quote(s, safe)
1129

    
1130
def urlencode(query,doseq=0):
1131
    """Encode a sequence of two-element tuples or dictionary into a URL query string.
1132

1133
    If any values in the query arg are sequences and doseq is true, each
1134
    sequence element is converted to a separate parameter.
1135

1136
    If the query arg is a sequence of two-element tuples, the order of the
1137
    parameters in the output will match the order of parameters in the
1138
    input.
1139
    """
1140

    
1141
    if hasattr(query,"items"):
1142
        # mapping objects
1143
        query = query.items()
1144
    else:
1145
        # it's a bother at times that strings and string-like objects are
1146
        # sequences...
1147
        try:
1148
            # non-sequence items should not work with len()
1149
            x = len(query)
1150
            # non-empty strings will fail this
1151
            if len(query) and type(query[0]) != types.TupleType:
1152
                raise TypeError
1153
            # zero-length sequences of all types will get here and succeed,
1154
            # but that's a minor nit - since the original implementation
1155
            # allowed empty dicts that type of behavior probably should be
1156
            # preserved for consistency
1157
        except TypeError:
1158
            ty,va,tb = sys.exc_info()
1159
            raise TypeError, "not a valid non-string sequence or mapping object", tb
1160

    
1161
    l = []
1162
    if not doseq:
1163
        # preserve old behavior
1164
        for k, v in query:
1165
            k = quote_plus(str(k))
1166
            v = quote_plus(str(v))
1167
            l.append(k + '=' + v)
1168
    else:
1169
        for k, v in query:
1170
            k = quote_plus(str(k))
1171
            if type(v) == types.StringType:
1172
                v = quote_plus(v)
1173
                l.append(k + '=' + v)
1174
            elif type(v) == types.UnicodeType:
1175
                # is there a reasonable way to convert to ASCII?
1176
                # encode generates a string, but "replace" or "ignore"
1177
                # lose information and "strict" can raise UnicodeError
1178
                v = quote_plus(v.encode("ASCII","replace"))
1179
                l.append(k + '=' + v)
1180
            else:
1181
                try:
1182
                    # is this a sufficient test for sequence-ness?
1183
                    x = len(v)
1184
                except TypeError:
1185
                    # not a sequence
1186
                    v = quote_plus(str(v))
1187
                    l.append(k + '=' + v)
1188
                else:
1189
                    # loop over the sequence
1190
                    for elt in v:
1191
                        l.append(k + '=' + quote_plus(str(elt)))
1192
    return '&'.join(l)
1193

    
1194
# Proxy handling
1195
def getproxies_environment():
1196
    """Return a dictionary of scheme -> proxy server URL mappings.
1197

1198
    Scan the environment for variables named <scheme>_proxy;
1199
    this seems to be the standard convention.  If you need a
1200
    different way, you can pass a proxies dictionary to the
1201
    [Fancy]URLopener constructor.
1202

1203
    """
1204
    proxies = {}
1205
    for name, value in os.environ.items():
1206
        name = name.lower()
1207
        if value and name[-6:] == '_proxy':
1208
            proxies[name[:-6]] = value
1209
    return proxies
1210

    
1211
if os.name == 'mac':
1212
    def getproxies():
1213
        """Return a dictionary of scheme -> proxy server URL mappings.
1214

1215
        By convention the mac uses Internet Config to store
1216
        proxies.  An HTTP proxy, for instance, is stored under
1217
        the HttpProxy key.
1218

1219
        """
1220
        try:
1221
            import ic
1222
        except ImportError:
1223
            return {}
1224

    
1225
        try:
1226
            config = ic.IC()
1227
        except ic.error:
1228
            return {}
1229
        proxies = {}
1230
        # HTTP:
1231
        if config.has_key('UseHTTPProxy') and config['UseHTTPProxy']:
1232
            try:
1233
                value = config['HTTPProxyHost']
1234
            except ic.error:
1235
                pass
1236
            else:
1237
                proxies['http'] = 'http://%s' % value
1238
        # FTP: XXXX To be done.
1239
        # Gopher: XXXX To be done.
1240
        return proxies
1241

    
1242
elif os.name == 'nt':
1243
    def getproxies_registry():
1244
        """Return a dictionary of scheme -> proxy server URL mappings.
1245

1246
        Win32 uses the registry to store proxies.
1247

1248
        """
1249
        proxies = {}
1250
        try:
1251
            import _winreg
1252
        except ImportError:
1253
            # Std module, so should be around - but you never know!
1254
            return proxies
1255
        try:
1256
            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1257
                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1258
            proxyEnable = _winreg.QueryValueEx(internetSettings,
1259
                                               'ProxyEnable')[0]
1260
            if proxyEnable:
1261
                # Returned as Unicode but problems if not converted to ASCII
1262
                proxyServer = str(_winreg.QueryValueEx(internetSettings,
1263
                                                       'ProxyServer')[0])
1264
                if '=' in proxyServer:
1265
                    # Per-protocol settings
1266
                    for p in proxyServer.split(';'):
1267
                        protocol, address = p.split('=', 1)
1268
                        proxies[protocol] = '%s://%s' % (protocol, address)
1269
                else:
1270
                    # Use one setting for all protocols
1271
                    if proxyServer[:5] == 'http:':
1272
                        proxies['http'] = proxyServer
1273
                    else:
1274
                        proxies['http'] = 'http://%s' % proxyServer
1275
                        proxies['ftp'] = 'ftp://%s' % proxyServer
1276
            internetSettings.Close()
1277
        except (WindowsError, ValueError, TypeError):
1278
            # Either registry key not found etc, or the value in an
1279
            # unexpected format.
1280
            # proxies already set up to be empty so nothing to do
1281
            pass
1282
        return proxies
1283

    
1284
    def getproxies():
1285
        """Return a dictionary of scheme -> proxy server URL mappings.
1286

1287
        Returns settings gathered from the environment, if specified,
1288
        or the registry.
1289

1290
        """
1291
        return getproxies_environment() or getproxies_registry()
1292
else:
1293
    # By default use environment variables
1294
    getproxies = getproxies_environment
1295

    
1296

    
1297
# Test and time quote() and unquote()
1298
def test1():
1299
    import time
1300
    s = ''
1301
    for i in range(256): s = s + chr(i)
1302
    s = s*4
1303
    t0 = time.time()
1304
    qs = quote(s)
1305
    uqs = unquote(qs)
1306
    t1 = time.time()
1307
    if uqs != s:
1308
        print 'Wrong!'
1309
    print `s`
1310
    print `qs`
1311
    print `uqs`
1312
    print round(t1 - t0, 3), 'sec'
1313

    
1314

    
1315
def reporthook(blocknum, blocksize, totalsize):
1316
    # Report during remote transfers
1317
    print "Block number: %d, Block size: %d, Total size: %d" % (
1318
        blocknum, blocksize, totalsize)
1319

    
1320
# Test program
1321
def test(args=[]):
1322
    if not args:
1323
        args = [
1324
            '/etc/passwd',
1325
            'file:/etc/passwd',
1326
            'file://localhost/etc/passwd',
1327
            'ftp://ftp.python.org/etc/passwd',
1328
##          'gopher://gopher.micro.umn.edu/1/',
1329
            'http://www.python.org/index.html',
1330
            ]
1331
        if hasattr(URLopener, "open_https"):
1332
            args.append('https://synergy.as.cmu.edu/~geek/')
1333
    try:
1334
        for url in args:
1335
            print '-'*10, url, '-'*10
1336
            fn, h = urlretrieve(url, None, reporthook)
1337
            print fn
1338
            if h:
1339
                print '======'
1340
                for k in h.keys(): print k + ':', h[k]
1341
                print '======'
1342
            fp = open(fn, 'rb')
1343
            data = fp.read()
1344
            del fp
1345
            if '\r' in data:
1346
                table = string.maketrans("", "")
1347
                data = data.translate(table, "\r")
1348
            print data
1349
            fn, h = None, None
1350
        print '-'*40
1351
    finally:
1352
        urlcleanup()
1353

    
1354
def main():
1355
    import getopt, sys
1356
    try:
1357
        opts, args = getopt.getopt(sys.argv[1:], "th")
1358
    except getopt.error, msg:
1359
        print msg
1360
        print "Use -h for help"
1361
        return
1362
    t = 0
1363
    for o, a in opts:
1364
        if o == '-t':
1365
            t = t + 1
1366
        if o == '-h':
1367
            print "Usage: python urllib.py [-t] [url ...]"
1368
            print "-t runs self-test;",
1369
            print "otherwise, contents of urls are printed"
1370
            return
1371
    if t:
1372
        if t > 1:
1373
            test1()
1374
        test(args)
1375
    else:
1376
        if not args:
1377
            print "Use -h for help"
1378
        for url in args:
1379
            print urlopen(url).read(),
1380

    
1381
# Run test program when run as a script
1382
if __name__ == '__main__':
1383
    main()