diff --git a/ckanext/harvest/harvesters/ckanharvester.py b/ckanext/harvest/harvesters/ckanharvester.py index fb190b9..e352350 100644 --- a/ckanext/harvest/harvesters/ckanharvester.py +++ b/ckanext/harvest/harvesters/ckanharvester.py @@ -1,5 +1,5 @@ import requests -from requests.exceptions import RequestException +from requests.exceptions import HTTPError, RequestException import datetime from urllib3.contrib import pyopenssl @@ -43,8 +43,10 @@ class CKANHarvester(HarvesterBase): try: http_request = requests.get(url, headers=headers) + except HTTPError as e: + raise ContentFetchError('HTTP error: %s %s' % (e.response.status_code, e.request.url)) except RequestException as e: - raise ContentFetchError('HTTP error: %s' % e.code) + raise ContentFetchError('Request error: %s' % e) except Exception as e: raise ContentFetchError('HTTP general exception: %s' % e) return http_request.text diff --git a/ckanext/harvest/tests/harvesters/test_ckanharvester.py b/ckanext/harvest/tests/harvesters/test_ckanharvester.py index d46fece..aa91fb3 100644 --- a/ckanext/harvest/tests/harvesters/test_ckanharvester.py +++ b/ckanext/harvest/tests/harvesters/test_ckanharvester.py @@ -2,8 +2,8 @@ import copy from nose.tools import assert_equal, assert_raises, assert_in import json -from mock import patch, MagicMock - +from mock import patch, MagicMock, Mock +from requests.exceptions import HTTPError, RequestException try: from ckan.tests.helpers import reset_db, call_action @@ -14,6 +14,7 @@ except ImportError: from ckan import model from ckan.plugins import toolkit +from ckanext.harvest.harvesters.ckanharvester import ContentFetchError from ckanext.harvest.tests.factories import (HarvestSourceObj, HarvestJobObj, HarvestObjectObj) from ckanext.harvest.tests.lib import run_harvest @@ -338,3 +339,40 @@ class TestCkanHarvester(object): config=json.dumps(config)) assert_in('default_extras must be a dictionary', str(harvest_context.exception)) + + @patch('ckanext.harvest.harvesters.ckanharvester.pyopenssl.inject_into_urllib3') + @patch('ckanext.harvest.harvesters.ckanharvester.CKANHarvester.config') + @patch('ckanext.harvest.harvesters.ckanharvester.requests.get', side_effect=RequestException('Test exception')) + def test_get_content_handles_request_exception( + self, mock_requests_get, mock_config, mock_pyopenssl_inject + ): + mock_config.return_value = {} + + harvester = CKANHarvester() + + with assert_raises(ContentFetchError) as context: + harvester._get_content("http://test.example.gov.uk") + + assert str(context.exception) == 'Request error: Test exception' + + class MockHTTPError(HTTPError): + def __init__(self): + self.response = Mock() + self.response.status_code = 404 + self.request = Mock() + self.request.url = "http://test.example.gov.uk" + + @patch('ckanext.harvest.harvesters.ckanharvester.pyopenssl.inject_into_urllib3') + @patch('ckanext.harvest.harvesters.ckanharvester.CKANHarvester.config') + @patch('ckanext.harvest.harvesters.ckanharvester.requests.get', side_effect=MockHTTPError()) + def test_get_content_handles_http_error( + self, mock_requests_get, mock_config, mock_pyopenssl_inject + ): + mock_config.return_value = {} + + harvester = CKANHarvester() + + with assert_raises(ContentFetchError) as context: + harvester._get_content("http://test.example.gov.uk") + + assert str(context.exception) == 'HTTP error: 404 http://test.example.gov.uk'