From 6b6458f2eadd4eb022f5dd5d76993e9abbd39e7b Mon Sep 17 00:00:00 2001 From: Andrea Grandi Date: Mon, 29 Oct 2018 11:50:39 +0000 Subject: [PATCH 1/4] Use requests lib instead of urllib2 --- ckanext/harvest/harvesters/ckanharvester.py | 32 ++++++++++++--------- pip-requirements.txt | 7 +++++ 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/ckanext/harvest/harvesters/ckanharvester.py b/ckanext/harvest/harvesters/ckanharvester.py index 30a9cbf..9374c69 100644 --- a/ckanext/harvest/harvesters/ckanharvester.py +++ b/ckanext/harvest/harvesters/ckanharvester.py @@ -1,8 +1,11 @@ -import urllib -import urllib2 +import requests +from requests.exceptions import HTTPError +from requests.exceptions import InvalidURL + import httplib import datetime import socket +from urllib3.contrib import pyopenssl from ckan import model from ckan.logic import ValidationError, NotFound, get_action @@ -11,12 +14,11 @@ from ckan.lib.munge import munge_name from ckan.plugins import toolkit from ckanext.harvest.model import HarvestObject +from base import HarvesterBase import logging log = logging.getLogger(__name__) -from base import HarvesterBase - class CKANHarvester(HarvesterBase): ''' @@ -34,28 +36,30 @@ class CKANHarvester(HarvesterBase): return '%s/package_search' % self._get_action_api_offset() def _get_content(self, url): - http_request = urllib2.Request(url=url) + headers = {} api_key = self.config.get('api_key') if api_key: - http_request.add_header('Authorization', api_key) + headers['Authorization'] = api_key + + pyopenssl.inject_into_urllib3() try: - http_response = urllib2.urlopen(http_request) - except urllib2.HTTPError, e: - if e.getcode() == 404: + http_request = requests.get(url, headers=headers) + except HTTPError as e: + if e.response.status_code == 404: raise ContentNotFoundError('HTTP error: %s' % e.code) else: raise ContentFetchError('HTTP error: %s' % e.code) - except urllib2.URLError, e: + except InvalidURL as e: raise ContentFetchError('URL error: %s' % e.reason) - except httplib.HTTPException, e: + except httplib.HTTPException as e: raise ContentFetchError('HTTP Exception: %s' % e) - except socket.error, e: + except socket.error as e: raise ContentFetchError('HTTP socket error: %s' % e) - except Exception, e: + except Exception as e: raise ContentFetchError('HTTP general exception: %s' % e) - return http_response.read() + return http_request.text def _get_group(self, base_url, group): url = base_url + self._get_action_api_offset() + '/group_show?id=' + \ diff --git a/pip-requirements.txt b/pip-requirements.txt index 5790afd..91d2d32 100644 --- a/pip-requirements.txt +++ b/pip-requirements.txt @@ -1,2 +1,9 @@ pika==0.9.8 redis==2.10.1 +requests==2.20.0 +idna==2.7 +ndg-httpsclient==0.5.1 +pyasn1==0.4.4 +pyOpenSSL==18.0.0 +urllib3==1.23 +enum34==1.1.6 From 6f82f19049a9746b01a58fca9a140610bd608ad6 Mon Sep 17 00:00:00 2001 From: Andrea Grandi Date: Tue, 30 Oct 2018 09:52:38 +0000 Subject: [PATCH 2/4] Simplify exception handling --- ckanext/harvest/harvesters/ckanharvester.py | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/ckanext/harvest/harvesters/ckanharvester.py b/ckanext/harvest/harvesters/ckanharvester.py index 9374c69..364b862 100644 --- a/ckanext/harvest/harvesters/ckanharvester.py +++ b/ckanext/harvest/harvesters/ckanharvester.py @@ -1,16 +1,12 @@ import requests -from requests.exceptions import HTTPError -from requests.exceptions import InvalidURL +from requests.exceptions import RequestException -import httplib import datetime -import socket from urllib3.contrib import pyopenssl from ckan import model from ckan.logic import ValidationError, NotFound, get_action from ckan.lib.helpers import json -from ckan.lib.munge import munge_name from ckan.plugins import toolkit from ckanext.harvest.model import HarvestObject @@ -46,17 +42,8 @@ class CKANHarvester(HarvesterBase): try: http_request = requests.get(url, headers=headers) - except HTTPError as e: - if e.response.status_code == 404: - raise ContentNotFoundError('HTTP error: %s' % e.code) - else: - raise ContentFetchError('HTTP error: %s' % e.code) - except InvalidURL as e: - raise ContentFetchError('URL error: %s' % e.reason) - except httplib.HTTPException as e: - raise ContentFetchError('HTTP Exception: %s' % e) - except socket.error as e: - raise ContentFetchError('HTTP socket error: %s' % e) + except RequestException as e: + raise ContentFetchError('HTTP error: %s' % e.code) except Exception as e: raise ContentFetchError('HTTP general exception: %s' % e) return http_request.text From 685843e58318cbc967f15ddeb939387262eb7d94 Mon Sep 17 00:00:00 2001 From: Andrea Grandi Date: Thu, 1 Nov 2018 14:00:38 +0000 Subject: [PATCH 3/4] Restore missing import --- ckanext/harvest/harvesters/ckanharvester.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ckanext/harvest/harvesters/ckanharvester.py b/ckanext/harvest/harvesters/ckanharvester.py index 364b862..694c151 100644 --- a/ckanext/harvest/harvesters/ckanharvester.py +++ b/ckanext/harvest/harvesters/ckanharvester.py @@ -3,6 +3,7 @@ from requests.exceptions import RequestException import datetime from urllib3.contrib import pyopenssl +import urllib from ckan import model from ckan.logic import ValidationError, NotFound, get_action From 5b84067790bd1ff156b9ac533b104aeb1bdd1918 Mon Sep 17 00:00:00 2001 From: Andrea Grandi Date: Mon, 5 Nov 2018 10:35:28 +0000 Subject: [PATCH 4/4] Remove not needed dependencies --- pip-requirements.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pip-requirements.txt b/pip-requirements.txt index 91d2d32..37dcd18 100644 --- a/pip-requirements.txt +++ b/pip-requirements.txt @@ -1,9 +1,4 @@ pika==0.9.8 redis==2.10.1 requests==2.20.0 -idna==2.7 -ndg-httpsclient==0.5.1 -pyasn1==0.4.4 pyOpenSSL==18.0.0 -urllib3==1.23 -enum34==1.1.6