Source code for utah.url

# Ubuntu Testing Automation Harness
# Copyright 2012 Canonical Ltd.

# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 3, as published
# by the Free Software Foundation.

# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranties of
# MERCHANTABILITY, SATISFACTORY QUALITY, or FITNESS FOR A PARTICULAR
# PURPOSE.  See the GNU General Public License for more details.

# You should have received a copy of the GNU General Public License along
# with this program.  If not, see <http://www.gnu.org/licenses/>.

r"""This module provides the classes/functions needed to:

 - Check that a URL is valid and readable
 - Use a url type in an `argparse.ArgumentParser` object

"""


import os
import urllib
import urllib2
import tempfile
import logging
from urlparse import urlparse
from argparse import ArgumentTypeError

import bzrlib.builtins
import bzrlib.plugin
import bzrlib.errors

from utah.cleanup import cleanup
from utah.exceptions import UTAHException
from utah.timeout import timeout
from utah.retry import retry


# Inspired by: http://stackoverflow.com/a/2070916/183066
[docs]class HeadRequest(urllib2.Request): """A request that sends HEAD method instead of GET. .. seealso:: :class:`URLChecker` """
[docs] def get_method(self): """Return Method used to get URL. :returns: 'HEAD' :rtype: str """ return 'HEAD'
[docs]class URLChecker(urllib.URLopener): """An opener to checks a URL is valid and readable. To use it, create an object instance and call the `open` method passing the url to be checked as argument. """
[docs] def open_http(self, url): """Check if http URL exists and is readable. The check is performed by sending an HTTP HEAD request, waiting for the response and checking that the code is 200 OK. If a redirect response is received, the URL will still be reported as working fine, but the underlying implementation will use an HTTP GET method instead, so it won't be as efficient as in the standard case. :param url: The HTTP URL to be checked :type url: `basestring` :returns: The url passed as argument when it's valid and readable. :rtype: `basestring` :raises URLNotFound: When there's a problem opening the URL or isn't found. :Example: >>> from utah.url import URLChecker >>> opener = URLChecker() >>> opener.open('http://www.ubuntu.com') 'http://www.ubuntu.com' .. note:: This method is called by the `open` method when the URL protocol is http, so it's not expected to be called directly. .. seealso:: :meth:`open_local_file`, :class:`URLNotFound` """ # This is redundant becuase urllib2 will call urllib # under the hood, but makes code easy to read. # Note that in the case of a redirect, # a GET request will be sent instead of a HEAD one # since that's how the urllib2.HTTPRedirectHandler # implementation works url = 'http:' + url try: response = urllib2.urlopen(HeadRequest(url)) except urllib2.URLError: raise URLNotFound(url) if response.getcode() != 200: raise URLNotFound(url) return url
[docs] def open_local_file(self, url): """Check if local file exists. :param url: The file URL to be checked :type url: `basestring` :returns: The path to the file if it was found and readable. .. note:: The returned value is a path, not a URL, so it can be used to open the file the same way as any other files. :rtype: `basestring` :raises URLNotFound: when the path to the file doesn't exist. :raises URLNotReadable: when the user doesn't have read permissions to open the file. :Example: >>> import tempfile >>> with tempfile.NamedTemporaryFile() as f: # doctest: +ELLIPSIS ... opener = URLChecker() ... opener.open(f.name) '/tmp/tmp...' .. note:: This method is called by the `open` method when the URL protocol is file, so it's not expected to be called directly. .. seealso:: :meth:`open_http`, :class:`URLNotFound`, :class:`URLNotReadable` """ # Based on urllib.URLopener.open_local_file implementation _host, filename = urllib.splithost(url) path = os.path.abspath(urllib.url2pathname(filename)) if not os.path.exists(path): raise URLNotFound(path) if not os.access(path, os.R_OK): raise URLNotReadable(path) return path
[docs]def url_argument(url): """URL argument to be used in an `argparse.ArgumentParser` object. :param url: URL as passed to the parser object. .. note:: The URL passed as argument can be a launchpad URL. In that case, the file pointed by the URL will be downloaded as when using `bzr export` and the returned value is the path to the downloaded file. :type url: `basestring` :returns: URL or path to local file :rtype: `basestring` :raises argparse.ArgumentTypeError: when the URL is invalid or unreadable. In any case, the error message will provide information to be displayed by the `argparse.ArgumentParser` object in the command line. :Example: >>> from utah.url import url_argument >>> import argparse >>> parser = argparse.ArgumentParser() >>> parser.add_argument('url', type=url_argument) # doctest: +ELLIPSIS _StoreAction(... dest='url', ...) >>> parser.parse_args(['http://www.ubuntu.com']) Namespace(url='http://www.ubuntu.com') >>> parser.parse_args(['lp:utah/setup.py']) # doctest: +ELLIPSIS Namespace(url='/tmp/utah_...') .. seealso:: :class:`URLChecker` """ if url == '': return None parse_result = urlparse(url) if parse_result.scheme in ('', 'file', 'http', 'https'): url_checker = URLChecker() try: full_url = url_checker.open(url) except URLNotFound: raise ArgumentTypeError('URL not found: {}'.format(url)) except URLNotReadable: raise ArgumentTypeError('URL not readable: {}'.format(url)) elif parse_result.scheme in ('lp', 'bzr+ssh'): # Ignore bazaar logging messages bzr_logger = logging.getLogger('bzr') bzr_logger.addHandler(logging.NullHandler()) bzrlib.plugin.load_plugins() # Enable launchpad URLs in bazaar cmd = bzrlib.builtins.cmd_export() assert cmd is not None tmp_dir = tempfile.mkdtemp(prefix='utah_') cleanup.add_path(tmp_dir) def bzr_export_retriable(): """bzr export a URL retrying on http errors This is a workaround to launchpad problems with http URLs that happen from time to time """ try: cmd.run(tmp_dir, url) except bzrlib.errors.InvalidHttpResponse as exception: raise UTAHException(exception.path, exception.msg, retry=True) try: # Retry bzr export on http errors for 60 seconds timeout(60, retry, bzr_export_retriable, logmethod=bzr_logger.debug) except bzrlib.errors.BzrError as exception: raise ArgumentTypeError('Bazaar export error: {}' .format(exception)) full_url = os.path.join(tmp_dir, os.path.basename(url)) if os.path.islink(full_url): link_path = os.readlink(full_url) if link_path.startswith('/'): raise ArgumentTypeError('URL points to a link to an absolute ' 'path in a bazaar branch: {}' .format(url)) dirname = os.path.dirname(url) link_url = os.path.join(dirname, link_path) target_path = url_argument(link_url) os.remove(full_url) os.link(target_path, full_url) elif not os.path.isfile(full_url): raise ArgumentTypeError("URL doesn't point to a file " 'in a bazaar branch: {}' .format(url)) else: raise ArgumentTypeError('Unknown url scheme ({!r}) for url: {}' .format(parse_result.scheme, url)) return full_url
[docs]class URLNotFound(UTAHException): """Exception raised when a URL isn't found. :Example: >>> opener = URLChecker() >>> opener.open('http://localhost/invalid_url') Traceback (most recent call last): ... URLNotFound: http://localhost/invalid_url >>> opener.open('file:///tmp/invalid_url') Traceback (most recent call last): ... URLNotFound: /tmp/invalid_url .. seealso:: :class:`URLChecker` """ pass
[docs]class URLNotReadable(UTAHException): """Exception raised when a URL isn't readable. :Example: >>> import os >>> with tempfile.NamedTemporaryFile() as f: # doctest: +ELLIPSIS ... os.chmod(f.name, 0000) ... opener = URLChecker() ... opener.open(f.name) Traceback (most recent call last): ... URLNotReadable: /tmp/tmp... .. seealso:: :class:`URLChecker` """ pass
Read the Docs v: latest
Versions
latest
Downloads
PDF
HTML
Epub
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.