From 1b458b17720ab4dfe688f16445463e9fb33bef48 Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 28 Apr 2014 12:48:09 +0100 Subject: [PATCH] [#96] Handle encoding errors on harvest object endpoint When parsing the harvest object content to see if it is an XML file, etree.fromstring would fail id there are incorrect unicode errors. --- ckanext/harvest/controllers/view.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ckanext/harvest/controllers/view.py b/ckanext/harvest/controllers/view.py index b3d0f94..b21d291 100644 --- a/ckanext/harvest/controllers/view.py +++ b/ckanext/harvest/controllers/view.py @@ -103,8 +103,10 @@ class ViewController(BaseController): content = obj['extras']['original_document'] else: abort(404,_('No content found')) - - etree.fromstring(re.sub('<\?xml(.*)\?>','',content)) + try: + etree.fromstring(re.sub('<\?xml(.*)\?>','',content)) + except UnicodeEncodeError: + etree.fromstring(re.sub('<\?xml(.*)\?>','',content.encode('utf-8'))) response.content_type = 'application/xml; charset=utf-8' if not '\n' + content