Merge pull request #198 from ckan/catch-exceptions
Catch exceptions from urllib2.urlopen more comprehensively
This commit is contained in:
commit
121e8bd918
|
@ -1,4 +1,5 @@
|
||||||
import urllib2
|
import urllib2
|
||||||
|
import httplib
|
||||||
|
|
||||||
from ckan.lib.base import c
|
from ckan.lib.base import c
|
||||||
from ckan import model
|
from ckan import model
|
||||||
|
@ -35,21 +36,23 @@ class CKANHarvester(HarvesterBase):
|
||||||
return '/api/%d/search' % self.api_version
|
return '/api/%d/search' % self.api_version
|
||||||
|
|
||||||
def _get_content(self, url):
|
def _get_content(self, url):
|
||||||
http_request = urllib2.Request(
|
http_request = urllib2.Request(url=url)
|
||||||
url = url,
|
|
||||||
)
|
|
||||||
|
|
||||||
api_key = self.config.get('api_key',None)
|
api_key = self.config.get('api_key')
|
||||||
if api_key:
|
if api_key:
|
||||||
http_request.add_header('Authorization',api_key)
|
http_request.add_header('Authorization', api_key)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
http_response = urllib2.urlopen(http_request)
|
http_response = urllib2.urlopen(http_request)
|
||||||
|
except urllib2.HTTPError, e:
|
||||||
|
if e.getcode() == 404:
|
||||||
|
raise ContentNotFoundError('HTTP error: %s' % e.code)
|
||||||
|
else:
|
||||||
|
raise ContentFetchError('HTTP error: %s' % e.code)
|
||||||
except urllib2.URLError, e:
|
except urllib2.URLError, e:
|
||||||
raise ContentFetchError(
|
raise ContentFetchError('URL error: %s' % e.reason)
|
||||||
'Could not fetch url: %s, error: %s' %
|
except httplib.HTTPException, e:
|
||||||
(url, str(e))
|
raise ContentFetchError('HTTP Exception: %s' % e)
|
||||||
)
|
|
||||||
return http_response.read()
|
return http_response.read()
|
||||||
|
|
||||||
def _get_group(self, base_url, group_name):
|
def _get_group(self, base_url, group_name):
|
||||||
|
@ -205,16 +208,21 @@ class CKANHarvester(HarvesterBase):
|
||||||
revision = json.loads(content)
|
revision = json.loads(content)
|
||||||
package_ids = revision['packages']
|
package_ids = revision['packages']
|
||||||
else:
|
else:
|
||||||
log.info('No packages have been updated on the remote CKAN instance since the last harvest job')
|
log.info('No revisions since last harvest %s',
|
||||||
|
last_time)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
except urllib2.HTTPError,e:
|
except ContentNotFoundError, e:
|
||||||
if e.getcode() == 400:
|
log.info('No revisions since last harvest %s', last_time)
|
||||||
log.info('CKAN instance %s does not suport revision filtering' % base_url)
|
return []
|
||||||
|
except ContentFetchError, e:
|
||||||
|
# Any other error indicates that revision filtering is not
|
||||||
|
# working for whatever reason, so fallback to just getting
|
||||||
|
# all the packages, which is expensive but reliable.
|
||||||
|
log.info('CKAN instance %s does not suport revision '
|
||||||
|
'filtering: %s',
|
||||||
|
base_url, e)
|
||||||
get_all_packages = True
|
get_all_packages = True
|
||||||
else:
|
|
||||||
self._save_gather_error('Unable to get content for URL: %s: %s' % (url, str(e)),harvest_job)
|
|
||||||
return None
|
|
||||||
|
|
||||||
if get_all_packages:
|
if get_all_packages:
|
||||||
# Request all remote packages
|
# Request all remote packages
|
||||||
|
@ -467,5 +475,8 @@ class CKANHarvester(HarvesterBase):
|
||||||
class ContentFetchError(Exception):
|
class ContentFetchError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
class ContentNotFoundError(ContentFetchError):
|
||||||
|
pass
|
||||||
|
|
||||||
class RemoteResourceError(Exception):
|
class RemoteResourceError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
Loading…
Reference in New Issue