2012-08-09 14:38:17 +02:00
|
|
|
import re
|
2013-05-24 19:12:02 +02:00
|
|
|
import xml.etree.ElementTree as etree
|
2018-10-26 13:38:09 +02:00
|
|
|
|
2014-02-10 19:44:46 +01:00
|
|
|
try:
|
|
|
|
# Python 2.7
|
|
|
|
xml_parser_exception = etree.ParseError
|
|
|
|
except AttributeError:
|
|
|
|
# Python 2.6
|
|
|
|
from xml.parsers import expat
|
2018-10-26 13:38:09 +02:00
|
|
|
|
2014-02-10 19:44:46 +01:00
|
|
|
xml_parser_exception = expat.ExpatError
|
|
|
|
|
2011-03-25 18:01:26 +01:00
|
|
|
from pylons.i18n import _
|
|
|
|
|
2012-02-29 11:59:02 +01:00
|
|
|
from ckan import model
|
|
|
|
|
2013-03-06 17:54:33 +01:00
|
|
|
import ckan.plugins as p
|
2011-03-25 18:01:26 +01:00
|
|
|
import ckan.lib.helpers as h, json
|
2018-10-26 13:38:09 +02:00
|
|
|
from ckan.lib.base import BaseController, c, request, response, render, abort
|
2011-03-22 18:33:58 +01:00
|
|
|
|
2015-11-27 12:57:40 +01:00
|
|
|
from ckanext.harvest.logic import HarvestJobExists, HarvestSourceInactiveError
|
2013-01-23 18:33:44 +01:00
|
|
|
from ckanext.harvest.plugin import DATASET_TYPE_NAME
|
2012-03-07 16:04:50 +01:00
|
|
|
|
2011-05-13 15:17:58 +02:00
|
|
|
import logging
|
2018-10-26 13:38:09 +02:00
|
|
|
|
2011-05-13 15:17:58 +02:00
|
|
|
log = logging.getLogger(__name__)
|
2011-03-09 19:56:55 +01:00
|
|
|
|
2018-10-26 13:38:09 +02:00
|
|
|
|
2011-03-09 19:56:55 +01:00
|
|
|
class ViewController(BaseController):
|
|
|
|
|
2013-01-28 17:32:53 +01:00
|
|
|
not_auth_message = p.toolkit._('Not authorized to see this page')
|
2011-03-25 18:01:26 +01:00
|
|
|
|
2012-03-06 17:01:43 +01:00
|
|
|
def __before__(self, action, **params):
|
|
|
|
|
2018-10-26 13:38:09 +02:00
|
|
|
super(ViewController, self).__before__(action, **params)
|
2012-03-06 17:01:43 +01:00
|
|
|
|
2013-01-23 18:33:44 +01:00
|
|
|
c.dataset_type = DATASET_TYPE_NAME
|
|
|
|
|
2018-10-26 13:38:09 +02:00
|
|
|
def delete(self, id):
|
2011-03-23 18:02:02 +01:00
|
|
|
try:
|
2018-10-26 13:38:09 +02:00
|
|
|
context = {'model': model, 'user': c.user}
|
2013-05-20 15:30:22 +02:00
|
|
|
|
2018-10-26 13:38:09 +02:00
|
|
|
context['clear_source'] = request.params.get('clear', '').lower() in (
|
|
|
|
u'true',
|
|
|
|
u'1',
|
|
|
|
)
|
2013-05-20 15:30:22 +02:00
|
|
|
|
2018-10-26 13:38:09 +02:00
|
|
|
p.toolkit.get_action('harvest_source_delete')(context, {'id': id})
|
2011-03-23 18:02:02 +01:00
|
|
|
|
2013-05-20 15:30:22 +02:00
|
|
|
if context['clear_source']:
|
|
|
|
h.flash_success(_('Harvesting source successfully cleared'))
|
|
|
|
else:
|
|
|
|
h.flash_success(_('Harvesting source successfully inactivated'))
|
2013-03-12 15:06:54 +01:00
|
|
|
|
2016-09-18 12:23:29 +02:00
|
|
|
h.redirect_to(h.url_for('{0}_admin'.format(DATASET_TYPE_NAME), id=id))
|
2013-03-06 17:54:33 +01:00
|
|
|
except p.toolkit.ObjectNotFound:
|
2018-10-26 13:38:09 +02:00
|
|
|
abort(404, _('Harvest source not found'))
|
2013-03-06 17:54:33 +01:00
|
|
|
except p.toolkit.NotAuthorized:
|
2018-10-26 13:38:09 +02:00
|
|
|
abort(401, self.not_auth_message)
|
2011-04-05 14:39:23 +02:00
|
|
|
|
2013-03-06 17:33:46 +01:00
|
|
|
def refresh(self, id):
|
2011-03-11 13:35:27 +01:00
|
|
|
try:
|
2018-10-26 13:38:09 +02:00
|
|
|
context = {'model': model, 'user': c.user, 'session': model.Session}
|
2015-10-28 22:58:36 +01:00
|
|
|
p.toolkit.get_action('harvest_job_create')(
|
2018-10-26 13:38:09 +02:00
|
|
|
context, {'source_id': id, 'run': True}
|
|
|
|
)
|
|
|
|
h.flash_success(
|
|
|
|
_('Harvest will start shortly. Refresh this page for updates.')
|
|
|
|
)
|
2013-03-06 17:54:33 +01:00
|
|
|
except p.toolkit.ObjectNotFound:
|
2018-10-26 13:38:09 +02:00
|
|
|
abort(404, _('Harvest source not found'))
|
2013-03-06 17:54:33 +01:00
|
|
|
except p.toolkit.NotAuthorized:
|
2018-10-26 13:38:09 +02:00
|
|
|
abort(401, self.not_auth_message)
|
|
|
|
except HarvestSourceInactiveError:
|
|
|
|
h.flash_error(
|
|
|
|
_(
|
|
|
|
'Cannot create new harvest jobs on inactive '
|
|
|
|
'sources. First, please change the source status '
|
|
|
|
'to "active".'
|
|
|
|
)
|
|
|
|
)
|
|
|
|
except HarvestJobExists:
|
|
|
|
h.flash_notice(
|
|
|
|
_('A harvest job has already been scheduled for ' 'this source')
|
|
|
|
)
|
|
|
|
except Exception as e:
|
|
|
|
msg = 'An error occurred: [%s]' % str(e)
|
|
|
|
h.flash_error(msg)
|
2011-03-22 18:33:58 +01:00
|
|
|
|
2016-09-18 12:23:29 +02:00
|
|
|
h.redirect_to(h.url_for('{0}_admin'.format(DATASET_TYPE_NAME), id=id))
|
2011-09-08 11:27:36 +02:00
|
|
|
|
2013-05-16 18:51:48 +02:00
|
|
|
def clear(self, id):
|
|
|
|
try:
|
2018-10-26 13:38:09 +02:00
|
|
|
context = {'model': model, 'user': c.user, 'session': model.Session}
|
|
|
|
p.toolkit.get_action('harvest_source_clear')(context, {'id': id})
|
2013-05-16 18:51:48 +02:00
|
|
|
h.flash_success(_('Harvest source cleared'))
|
|
|
|
except p.toolkit.ObjectNotFound:
|
2018-10-26 13:38:09 +02:00
|
|
|
abort(404, _('Harvest source not found'))
|
2013-05-16 18:51:48 +02:00
|
|
|
except p.toolkit.NotAuthorized:
|
2018-10-26 13:38:09 +02:00
|
|
|
abort(401, self.not_auth_message)
|
|
|
|
except Exception as e:
|
2013-05-16 18:51:48 +02:00
|
|
|
msg = 'An error occurred: [%s]' % str(e)
|
|
|
|
h.flash_error(msg)
|
|
|
|
|
2016-09-18 12:23:29 +02:00
|
|
|
h.redirect_to(h.url_for('{0}_admin'.format(DATASET_TYPE_NAME), id=id))
|
2013-05-16 18:51:48 +02:00
|
|
|
|
2014-04-30 18:45:07 +02:00
|
|
|
def show_object(self, id, ref_type='object'):
|
2012-03-01 13:02:16 +01:00
|
|
|
|
2011-09-08 11:27:36 +02:00
|
|
|
try:
|
2018-10-26 13:38:09 +02:00
|
|
|
context = {'model': model, 'user': c.user}
|
2014-04-30 18:45:07 +02:00
|
|
|
if ref_type == 'object':
|
|
|
|
obj = p.toolkit.get_action('harvest_object_show')(context, {'id': id})
|
|
|
|
elif ref_type == 'dataset':
|
2018-10-26 13:38:09 +02:00
|
|
|
obj = p.toolkit.get_action('harvest_object_show')(
|
|
|
|
context, {'dataset_id': id}
|
|
|
|
)
|
2012-02-29 11:59:02 +01:00
|
|
|
|
2011-09-08 11:27:36 +02:00
|
|
|
# Check content type. It will probably be either XML or JSON
|
|
|
|
try:
|
2012-11-13 13:06:36 +01:00
|
|
|
|
|
|
|
if obj['content']:
|
|
|
|
content = obj['content']
|
|
|
|
elif 'original_document' in obj['extras']:
|
|
|
|
content = obj['extras']['original_document']
|
|
|
|
else:
|
2018-10-26 13:38:09 +02:00
|
|
|
abort(404, _('No content found'))
|
2014-04-28 13:48:09 +02:00
|
|
|
try:
|
2018-10-26 13:38:09 +02:00
|
|
|
etree.fromstring(re.sub('<\?xml(.*)\?>', '', content))
|
2014-04-28 13:48:09 +02:00
|
|
|
except UnicodeEncodeError:
|
2018-10-26 13:38:09 +02:00
|
|
|
etree.fromstring(
|
|
|
|
re.sub('<\?xml(.*)\?>', '', content.encode('utf-8'))
|
|
|
|
)
|
2012-11-20 16:43:39 +01:00
|
|
|
response.content_type = 'application/xml; charset=utf-8'
|
2013-03-01 18:25:35 +01:00
|
|
|
if not '<?xml' in content.split('\n')[0]:
|
|
|
|
content = u'<?xml version="1.0" encoding="UTF-8"?>\n' + content
|
|
|
|
|
2014-02-10 19:44:46 +01:00
|
|
|
except xml_parser_exception:
|
2011-09-08 11:27:36 +02:00
|
|
|
try:
|
2012-02-29 11:59:02 +01:00
|
|
|
json.loads(obj['content'])
|
2012-11-20 16:43:39 +01:00
|
|
|
response.content_type = 'application/json; charset=utf-8'
|
2011-09-08 11:27:36 +02:00
|
|
|
except ValueError:
|
2012-11-20 16:43:39 +01:00
|
|
|
# Just return whatever it is
|
2011-09-08 11:27:36 +02:00
|
|
|
pass
|
|
|
|
|
2012-11-13 13:06:36 +01:00
|
|
|
response.headers['Content-Length'] = len(content)
|
2013-03-01 18:25:35 +01:00
|
|
|
return content.encode('utf-8')
|
2018-10-26 13:38:09 +02:00
|
|
|
except p.toolkit.ObjectNotFound as e:
|
|
|
|
abort(404, _(str(e)))
|
2013-03-06 17:54:33 +01:00
|
|
|
except p.toolkit.NotAuthorized:
|
2018-10-26 13:38:09 +02:00
|
|
|
abort(401, self.not_auth_message)
|
|
|
|
except Exception as e:
|
2012-08-09 14:38:17 +02:00
|
|
|
msg = 'An error occurred: [%s]' % str(e)
|
2018-10-26 13:38:09 +02:00
|
|
|
abort(500, msg)
|
2013-01-28 17:32:53 +01:00
|
|
|
|
|
|
|
def _get_source_for_job(self, source_id):
|
|
|
|
|
|
|
|
try:
|
|
|
|
context = {'model': model, 'user': c.user}
|
2018-10-26 13:38:09 +02:00
|
|
|
source_dict = p.toolkit.get_action('harvest_source_show')(
|
|
|
|
context, {'id': source_id}
|
|
|
|
)
|
2013-03-06 17:54:33 +01:00
|
|
|
except p.toolkit.ObjectNotFound:
|
2013-01-28 17:32:53 +01:00
|
|
|
abort(404, p.toolkit._('Harvest source not found'))
|
2013-03-06 17:54:33 +01:00
|
|
|
except p.toolkit.NotAuthorized:
|
2018-10-26 13:38:09 +02:00
|
|
|
abort(401, self.not_auth_message)
|
|
|
|
except Exception as e:
|
2013-01-28 17:32:53 +01:00
|
|
|
msg = 'An error occurred: [%s]' % str(e)
|
2018-10-26 13:38:09 +02:00
|
|
|
abort(500, msg)
|
2013-01-28 17:32:53 +01:00
|
|
|
|
|
|
|
return source_dict
|
|
|
|
|
2013-01-23 18:33:44 +01:00
|
|
|
def show_job(self, id, source_dict=False, is_last=False):
|
|
|
|
|
|
|
|
try:
|
2018-10-26 13:38:09 +02:00
|
|
|
context = {'model': model, 'user': c.user}
|
|
|
|
job = p.toolkit.get_action('harvest_job_show')(context, {'id': id})
|
|
|
|
job_report = p.toolkit.get_action('harvest_job_report')(
|
|
|
|
context, {'id': id}
|
|
|
|
)
|
2013-01-23 18:33:44 +01:00
|
|
|
|
|
|
|
if not source_dict:
|
2018-10-26 13:38:09 +02:00
|
|
|
source_dict = p.toolkit.get_action('harvest_source_show')(
|
|
|
|
context, {'id': job['source_id']}
|
|
|
|
)
|
|
|
|
|
|
|
|
return render(
|
|
|
|
'source/job/read.html',
|
|
|
|
extra_vars={
|
|
|
|
'harvest_source': source_dict,
|
|
|
|
'job': job,
|
|
|
|
'job_report': job_report,
|
|
|
|
'is_last_job': is_last,
|
|
|
|
},
|
|
|
|
)
|
2013-01-23 18:33:44 +01:00
|
|
|
|
2013-03-06 17:54:33 +01:00
|
|
|
except p.toolkit.ObjectNotFound:
|
2018-10-26 13:38:09 +02:00
|
|
|
abort(404, _('Harvest job not found'))
|
2013-03-06 17:54:33 +01:00
|
|
|
except p.toolkit.NotAuthorized:
|
2018-10-26 13:38:09 +02:00
|
|
|
abort(401, self.not_auth_message)
|
|
|
|
except Exception as e:
|
2013-01-23 18:33:44 +01:00
|
|
|
msg = 'An error occurred: [%s]' % str(e)
|
2018-10-26 13:38:09 +02:00
|
|
|
abort(500, msg)
|
2013-01-23 18:33:44 +01:00
|
|
|
|
2013-02-08 13:15:14 +01:00
|
|
|
def about(self, id):
|
|
|
|
try:
|
2018-10-26 13:38:09 +02:00
|
|
|
context = {'model': model, 'user': c.user}
|
|
|
|
harvest_source = p.toolkit.get_action('harvest_source_show')(
|
|
|
|
context, {'id': id}
|
|
|
|
)
|
|
|
|
return render(
|
|
|
|
'source/about.html', extra_vars={'harvest_source': harvest_source}
|
|
|
|
)
|
2013-03-06 17:54:33 +01:00
|
|
|
except p.toolkit.ObjectNotFound:
|
2018-10-26 13:38:09 +02:00
|
|
|
abort(404, _('Harvest source not found'))
|
2013-03-06 17:54:33 +01:00
|
|
|
except p.toolkit.NotAuthorized:
|
2018-10-26 13:38:09 +02:00
|
|
|
abort(401, self.not_auth_message)
|
2013-01-28 17:32:53 +01:00
|
|
|
|
2013-02-08 14:52:48 +01:00
|
|
|
def admin(self, id):
|
|
|
|
try:
|
2018-10-26 13:38:09 +02:00
|
|
|
context = {'model': model, 'user': c.user}
|
2013-02-25 14:10:30 +01:00
|
|
|
p.toolkit.check_access('harvest_source_update', context, {'id': id})
|
2018-10-26 13:38:09 +02:00
|
|
|
harvest_source = p.toolkit.get_action('harvest_source_show')(
|
|
|
|
context, {'id': id}
|
|
|
|
)
|
|
|
|
return render(
|
|
|
|
'source/admin.html', extra_vars={'harvest_source': harvest_source}
|
|
|
|
)
|
2013-03-06 17:54:33 +01:00
|
|
|
except p.toolkit.ObjectNotFound:
|
2018-10-26 13:38:09 +02:00
|
|
|
abort(404, _('Harvest source not found'))
|
2013-03-06 17:54:33 +01:00
|
|
|
except p.toolkit.NotAuthorized:
|
2018-10-26 13:38:09 +02:00
|
|
|
abort(401, self.not_auth_message)
|
|
|
|
|
2017-10-25 16:46:08 +02:00
|
|
|
def abort_job(self, source, id):
|
|
|
|
try:
|
|
|
|
h.flash_success(_('Harvest job stopped'))
|
2018-10-26 13:38:09 +02:00
|
|
|
|
2017-10-25 16:46:08 +02:00
|
|
|
except p.toolkit.ObjectNotFound:
|
2018-10-26 13:38:09 +02:00
|
|
|
abort(404, _('Harvest job not found'))
|
2017-10-25 16:46:08 +02:00
|
|
|
except p.toolkit.NotAuthorized:
|
2018-10-26 13:38:09 +02:00
|
|
|
abort(401, self.not_auth_message)
|
|
|
|
except Exception as e:
|
2017-10-25 16:46:08 +02:00
|
|
|
msg = 'An error occurred: [%s]' % str(e)
|
2018-10-26 13:38:09 +02:00
|
|
|
abort(500, msg)
|
|
|
|
|
2017-10-25 16:46:08 +02:00
|
|
|
h.redirect_to(h.url_for('{0}_admin'.format(DATASET_TYPE_NAME), id=source))
|
2013-02-08 14:52:48 +01:00
|
|
|
|
2013-01-23 18:33:44 +01:00
|
|
|
def show_last_job(self, source):
|
|
|
|
|
2013-01-28 17:32:53 +01:00
|
|
|
source_dict = self._get_source_for_job(source)
|
2013-01-23 18:33:44 +01:00
|
|
|
|
2013-02-25 13:49:14 +01:00
|
|
|
if not source_dict['status']['last_job']:
|
|
|
|
abort(404, _('No jobs yet for this source'))
|
|
|
|
|
2018-10-26 13:38:09 +02:00
|
|
|
return self.show_job(
|
|
|
|
source_dict['status']['last_job']['id'],
|
|
|
|
source_dict=source_dict,
|
|
|
|
is_last=True,
|
|
|
|
)
|
2012-08-14 19:01:29 +02:00
|
|
|
|
2013-02-04 19:20:58 +01:00
|
|
|
def list_jobs(self, source):
|
|
|
|
|
|
|
|
try:
|
2018-10-26 13:38:09 +02:00
|
|
|
context = {'model': model, 'user': c.user}
|
|
|
|
harvest_source = p.toolkit.get_action('harvest_source_show')(
|
|
|
|
context, {'id': source}
|
|
|
|
)
|
|
|
|
jobs = p.toolkit.get_action('harvest_job_list')(
|
|
|
|
context, {'source_id': harvest_source['id']}
|
|
|
|
)
|
|
|
|
|
|
|
|
return render(
|
|
|
|
'source/job/list.html',
|
|
|
|
extra_vars={'harvest_source': harvest_source, 'jobs': jobs},
|
|
|
|
)
|
2013-02-04 19:20:58 +01:00
|
|
|
|
2013-03-06 17:54:33 +01:00
|
|
|
except p.toolkit.ObjectNotFound:
|
2018-10-26 13:38:09 +02:00
|
|
|
abort(404, _('Harvest source not found'))
|
|
|
|
except p.toolkit.NotAuthorized:
|
|
|
|
abort(401, self.not_auth_message)
|
|
|
|
except Exception as e:
|
2013-02-04 19:20:58 +01:00
|
|
|
msg = 'An error occurred: [%s]' % str(e)
|
2018-10-26 13:38:09 +02:00
|
|
|
abort(500, msg)
|