Source code for utah.url
# Ubuntu Testing Automation Harness
# Copyright 2012 Canonical Ltd.
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 3, as published
# by the Free Software Foundation.
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranties of
# MERCHANTABILITY, SATISFACTORY QUALITY, or FITNESS FOR A PARTICULAR
# PURPOSE. See the GNU General Public License for more details.
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
r"""This module provides the classes/functions needed to:
- Check that a URL is valid and readable
- Use a url type in an `argparse.ArgumentParser` object
"""
import os
import urllib
import urllib2
import tempfile
import logging
from urlparse import urlparse
from argparse import ArgumentTypeError
import bzrlib.builtins
import bzrlib.plugin
import bzrlib.errors
from utah.cleanup import cleanup
from utah.exceptions import UTAHException
from utah.timeout import timeout
from utah.retry import retry
# Inspired by: http://stackoverflow.com/a/2070916/183066
[docs]class HeadRequest(urllib2.Request):
"""A request that sends HEAD method instead of GET.
.. seealso:: :class:`URLChecker`
"""
[docs] def get_method(self):
"""Return Method used to get URL.
:returns: 'HEAD'
:rtype: str
"""
return 'HEAD'
[docs]class URLChecker(urllib.URLopener):
"""An opener to checks a URL is valid and readable.
To use it, create an object instance and call the `open`
method passing the url to be checked as argument.
"""
[docs] def open_http(self, url):
"""Check if http URL exists and is readable.
The check is performed by sending an HTTP HEAD
request, waiting for the response and checking that
the code is 200 OK.
If a redirect response is received, the URL will still
be reported as working fine, but the underlying
implementation will use an HTTP GET method instead, so
it won't be as efficient as in the standard case.
:param url: The HTTP URL to be checked
:type url: `basestring`
:returns: The url passed as argument when it's valid and readable.
:rtype: `basestring`
:raises URLNotFound:
When there's a problem opening the URL or isn't found.
:Example:
>>> from utah.url import URLChecker
>>> opener = URLChecker()
>>> opener.open('http://www.ubuntu.com')
'http://www.ubuntu.com'
.. note::
This method is called by the `open` method when the URL protocol is
http, so it's not expected to be called directly.
.. seealso:: :meth:`open_local_file`, :class:`URLNotFound`
"""
# This is redundant becuase urllib2 will call urllib
# under the hood, but makes code easy to read.
# Note that in the case of a redirect,
# a GET request will be sent instead of a HEAD one
# since that's how the urllib2.HTTPRedirectHandler
# implementation works
url = 'http:' + url
try:
response = urllib2.urlopen(HeadRequest(url))
except urllib2.URLError:
raise URLNotFound(url)
if response.getcode() != 200:
raise URLNotFound(url)
return url
[docs] def open_local_file(self, url):
"""Check if local file exists.
:param url: The file URL to be checked
:type url: `basestring`
:returns: The path to the file if it was found and readable.
.. note::
The returned value is a path, not a URL, so it
can be used to open the file the same way as
any other files.
:rtype: `basestring`
:raises URLNotFound:
when the path to the file doesn't exist.
:raises URLNotReadable:
when the user doesn't have read permissions to open the file.
:Example:
>>> import tempfile
>>> with tempfile.NamedTemporaryFile() as f: # doctest: +ELLIPSIS
... opener = URLChecker()
... opener.open(f.name)
'/tmp/tmp...'
.. note::
This method is called by the `open` method when the URL protocol is
file, so it's not expected to be called directly.
.. seealso::
:meth:`open_http`, :class:`URLNotFound`, :class:`URLNotReadable`
"""
# Based on urllib.URLopener.open_local_file implementation
_host, filename = urllib.splithost(url)
path = os.path.abspath(urllib.url2pathname(filename))
if not os.path.exists(path):
raise URLNotFound(path)
if not os.access(path, os.R_OK):
raise URLNotReadable(path)
return path
[docs]def url_argument(url):
"""URL argument to be used in an `argparse.ArgumentParser` object.
:param url: URL as passed to the parser object.
.. note::
The URL passed as argument can be a launchpad URL. In that case,
the file pointed by the URL will be downloaded as when using `bzr
export` and the returned value is the path to the downloaded file.
:type url: `basestring`
:returns: URL or path to local file
:rtype: `basestring`
:raises argparse.ArgumentTypeError:
when the URL is invalid or unreadable. In any case, the error message
will provide information to be displayed by the
`argparse.ArgumentParser` object in the command line.
:Example:
>>> from utah.url import url_argument
>>> import argparse
>>> parser = argparse.ArgumentParser()
>>> parser.add_argument('url', type=url_argument) # doctest: +ELLIPSIS
_StoreAction(... dest='url', ...)
>>> parser.parse_args(['http://www.ubuntu.com'])
Namespace(url='http://www.ubuntu.com')
>>> parser.parse_args(['lp:utah/setup.py']) # doctest: +ELLIPSIS
Namespace(url='/tmp/utah_...')
.. seealso:: :class:`URLChecker`
"""
if url == '':
return None
parse_result = urlparse(url)
if parse_result.scheme in ('', 'file', 'http', 'https'):
url_checker = URLChecker()
try:
full_url = url_checker.open(url)
except URLNotFound:
raise ArgumentTypeError('URL not found: {}'.format(url))
except URLNotReadable:
raise ArgumentTypeError('URL not readable: {}'.format(url))
elif parse_result.scheme in ('lp', 'bzr+ssh'):
# Ignore bazaar logging messages
bzr_logger = logging.getLogger('bzr')
bzr_logger.addHandler(logging.NullHandler())
bzrlib.plugin.load_plugins() # Enable launchpad URLs in bazaar
cmd = bzrlib.builtins.cmd_export()
assert cmd is not None
tmp_dir = tempfile.mkdtemp(prefix='utah_')
cleanup.add_path(tmp_dir)
def bzr_export_retriable():
"""bzr export a URL retrying on http errors
This is a workaround to launchpad problems with http URLs that
happen from time to time
"""
try:
cmd.run(tmp_dir, url)
except bzrlib.errors.InvalidHttpResponse as exception:
raise UTAHException(exception.path, exception.msg, retry=True)
try:
# Retry bzr export on http errors for 60 seconds
timeout(60, retry, bzr_export_retriable,
logmethod=bzr_logger.debug)
except bzrlib.errors.BzrError as exception:
raise ArgumentTypeError('Bazaar export error: {}'
.format(exception))
full_url = os.path.join(tmp_dir, os.path.basename(url))
if os.path.islink(full_url):
link_path = os.readlink(full_url)
if link_path.startswith('/'):
raise ArgumentTypeError('URL points to a link to an absolute '
'path in a bazaar branch: {}'
.format(url))
dirname = os.path.dirname(url)
link_url = os.path.join(dirname, link_path)
target_path = url_argument(link_url)
os.remove(full_url)
os.link(target_path, full_url)
elif not os.path.isfile(full_url):
raise ArgumentTypeError("URL doesn't point to a file "
'in a bazaar branch: {}'
.format(url))
else:
raise ArgumentTypeError('Unknown url scheme ({!r}) for url: {}'
.format(parse_result.scheme, url))
return full_url
[docs]class URLNotFound(UTAHException):
"""Exception raised when a URL isn't found.
:Example:
>>> opener = URLChecker()
>>> opener.open('http://localhost/invalid_url')
Traceback (most recent call last):
...
URLNotFound: http://localhost/invalid_url
>>> opener.open('file:///tmp/invalid_url')
Traceback (most recent call last):
...
URLNotFound: /tmp/invalid_url
.. seealso:: :class:`URLChecker`
"""
pass
[docs]class URLNotReadable(UTAHException):
"""Exception raised when a URL isn't readable.
:Example:
>>> import os
>>> with tempfile.NamedTemporaryFile() as f: # doctest: +ELLIPSIS
... os.chmod(f.name, 0000)
... opener = URLChecker()
... opener.open(f.name)
Traceback (most recent call last):
...
URLNotReadable: /tmp/tmp...
.. seealso:: :class:`URLChecker`
"""
pass