[#96] Handle encoding errors on harvest object endpoint
When parsing the harvest object content to see if it is an XML file, etree.fromstring would fail id there are incorrect unicode errors.
This commit is contained in:
parent
040984d4ec
commit
1b458b1772
|
@ -103,8 +103,10 @@ class ViewController(BaseController):
|
|||
content = obj['extras']['original_document']
|
||||
else:
|
||||
abort(404,_('No content found'))
|
||||
|
||||
try:
|
||||
etree.fromstring(re.sub('<\?xml(.*)\?>','',content))
|
||||
except UnicodeEncodeError:
|
||||
etree.fromstring(re.sub('<\?xml(.*)\?>','',content.encode('utf-8')))
|
||||
response.content_type = 'application/xml; charset=utf-8'
|
||||
if not '<?xml' in content.split('\n')[0]:
|
||||
content = u'<?xml version="1.0" encoding="UTF-8"?>\n' + content
|
||||
|
|
Loading…
Reference in New Issue