diff --git a/ckanext/harvest/harvesters/ckanharvester.py b/ckanext/harvest/harvesters/ckanharvester.py index 93f9c49..9099b38 100644 --- a/ckanext/harvest/harvesters/ckanharvester.py +++ b/ckanext/harvest/harvesters/ckanharvester.py @@ -45,7 +45,10 @@ class CKANHarvester(HarvesterBase): try: http_response = urllib2.urlopen(http_request) except urllib2.HTTPError, e: - raise ContentFetchError('HTTP error: %s' % e.code) + if e.getcode() == 404: + raise ContentNotFoundError('HTTP error: %s' % e.code) + else: + raise ContentFetchError('HTTP error: %s' % e.code) except urllib2.URLError, e: raise ContentFetchError('URL error: %s' % e.reason) except httplib.HTTPException, e: @@ -205,11 +208,15 @@ class CKANHarvester(HarvesterBase): revision = json.loads(content) package_ids = revision['packages'] else: - log.info('No packages have been updated on the remote CKAN instance since the last harvest job') + log.info('No revisions since last harvest %s', + last_time) return [] + except ContentNotFoundError, e: + log.info('No revisions since last harvest %s', last_time) + return [] except ContentFetchError, e: - # Any error at all indicates that revision filtering is not + # Any other error indicates that revision filtering is not # working for whatever reason, so fallback to just getting # all the packages, which is expensive but reliable. log.info('CKAN instance %s does not suport revision ' @@ -468,5 +475,8 @@ class CKANHarvester(HarvesterBase): class ContentFetchError(Exception): pass +class ContentNotFoundError(ContentFetchError): + pass + class RemoteResourceError(Exception): pass