From 803b228d1c09315ca47cb9fd381a9e3dc468150c Mon Sep 17 00:00:00 2001 From: amercader Date: Fri, 30 Nov 2012 14:03:04 +0000 Subject: [PATCH] Update harvest source create and update logic functions `harvest_source_create` and `harvest_source_update` now call `package_create` and `package_update` respectively, making sure to define a 'harvest_source' type. The returned dict uses the db_to_form schema. --- ckanext/harvest/logic/action/create.py | 83 +++++++------ ckanext/harvest/logic/action/update.py | 87 +++++++------- ckanext/harvest/tests/test_action.py | 156 +++++++++++++++++++++++++ 3 files changed, 253 insertions(+), 73 deletions(-) create mode 100644 ckanext/harvest/tests/test_action.py diff --git a/ckanext/harvest/logic/action/create.py b/ckanext/harvest/logic/action/create.py index 459c4a1..a71250a 100644 --- a/ckanext/harvest/logic/action/create.py +++ b/ckanext/harvest/logic/action/create.py @@ -1,51 +1,68 @@ import re import logging -from ckan.logic import NotFound, ValidationError, check_access -from ckanext.harvest.logic import HarvestJobExists -from ckan.lib.navl.dictization_functions import validate +from ckan import logic -from ckanext.harvest.model import (HarvestSource, HarvestJob, HarvestObject) -from ckanext.harvest.logic.schema import old_default_harvest_source_schema as default_harvest_source_schema -from ckanext.harvest.logic.dictization import (harvest_source_dictize, - harvest_job_dictize) +from ckan.logic import NotFound, check_access +from ckanext.harvest.logic import HarvestJobExists + +from ckanext.harvest.plugin import DATASET_TYPE_NAME +from ckanext.harvest.model import (HarvestSource, HarvestJob) +from ckanext.harvest.logic.dictization import harvest_job_dictize +from ckanext.harvest.logic.schema import harvest_source_db_to_form_schema from ckanext.harvest.logic.action.get import harvest_source_list,harvest_job_list log = logging.getLogger(__name__) def harvest_source_create(context,data_dict): + ''' + Creates a new harvest source + + This method just proxies the request to package_create, + which will create a harvest_source dataset type and the + HarvestSource object. All auth checks and validation will + be done there .We only make sure to set the dataset type. + + Note that the harvest source type (ckan, waf, csw, etc) + is now set via the source_type field. + + :param url: the URL for the harvest source + :type url: string + :param name: the name of the new harvest source, must be between 2 and 100 + characters long and contain only lowercase alphanumeric characters + :type name: string + :param title: the title of the dataset (optional, default: same as + ``name``) + :type title: string + :param notes: a description of the harvest source (optional) + :type notes: string + :param source_type: the harvester type for this source. This must be one + of the registerd harvesters, eg 'ckan', 'csw', etc. + :type source_type: string + :param frequency: the frequency in wich this harvester should run. See + ``ckanext.harvest.model`` source for possible values. Default is + 'MANUAL' + :type frequency: string + :param config: extra configuration options for the particular harvester + type. Should be a serialized as JSON. (optional) + :type config: string + + + :returns: the newly created harvest source + :rtype: dictionary + ''' log.info('Creating harvest source: %r', data_dict) - check_access('harvest_source_create',context,data_dict) - model = context['model'] - session = context['session'] - schema = context.get('schema') or default_harvest_source_schema() + data_dict['type'] = DATASET_TYPE_NAME - data, errors = validate(data_dict, schema) + context['extras_as_string'] = True + package_dict = logic.get_action('package_create')(context, data_dict) - if errors: - session.rollback() - log.warn('Harvest source does not validate: %r', errors) - raise ValidationError(errors,_error_summary(errors)) + context['schema'] = harvest_source_db_to_form_schema() + source = logic.get_action('package_show')(context, package_dict) - source = HarvestSource() - source.url = data['url'].strip() - source.type = data['type'] - - opt = ['active','title','description','user_id', - 'publisher_id','config', 'frequency'] - for o in opt: - if o in data and data[o] is not None: - source.__setattr__(o,data[o]) - - if 'active' in data_dict: - source.active = data['active'] - - source.save() - log.info('Harvest source created: %s', source.id) - - return harvest_source_dictize(source,context) + return source def harvest_job_create(context,data_dict): diff --git a/ckanext/harvest/logic/action/update.py b/ckanext/harvest/logic/action/update.py index d91b60a..91c206d 100644 --- a/ckanext/harvest/logic/action/update.py +++ b/ckanext/harvest/logic/action/update.py @@ -8,69 +8,76 @@ from ckan.logic import get_action from ckanext.harvest.interfaces import IHarvester from ckan.model import Package +from ckan import logic -from ckan.logic import NotFound, ValidationError, check_access -from ckan.lib.navl.dictization_functions import validate +from ckan.logic import NotFound, check_access +from ckanext.harvest.plugin import DATASET_TYPE_NAME from ckanext.harvest.queue import get_gather_publisher -from ckanext.harvest.model import (HarvestSource, HarvestJob, HarvestObject) -from ckanext.harvest.logic.schema import old_default_harvest_source_schema as default_harvest_source_schema +from ckanext.harvest.model import HarvestSource, HarvestObject from ckanext.harvest.logic import HarvestJobExists -from ckanext.harvest.logic.dictization import (harvest_source_dictize,harvest_object_dictize) +from ckanext.harvest.logic.schema import harvest_source_db_to_form_schema + -from ckanext.harvest.logic.action.create import _error_summary from ckanext.harvest.logic.action.get import harvest_source_show, harvest_job_list, _get_sources_for_user log = logging.getLogger(__name__) def harvest_source_update(context,data_dict): + ''' + Updates an existing harvest source - check_access('harvest_source_update',context,data_dict) + This method just proxies the request to package_update, + which will create a harvest_source dataset type and the + HarvestSource object. All auth checks and validation will + be done there .We only make sure to set the dataset type - model = context['model'] - session = context['session'] + Note that the harvest source type (ckan, waf, csw, etc) + is now set via the source_type field. - source_id = data_dict.get('id') - schema = context.get('schema') or default_harvest_source_schema() + :param id: the name or id of the harvest source to update + :type id: string + :param url: the URL for the harvest source + :type url: string + :param name: the name of the new harvest source, must be between 2 and 100 + characters long and contain only lowercase alphanumeric characters + :type name: string + :param title: the title of the dataset (optional, default: same as + ``name``) + :type title: string + :param notes: a description of the harvest source (optional) + :type notes: string + :param source_type: the harvester type for this source. This must be one + of the registerd harvesters, eg 'ckan', 'csw', etc. + :type source_type: string + :param frequency: the frequency in wich this harvester should run. See + ``ckanext.harvest.model`` source for possible values. Default is + 'MANUAL' + :type frequency: string + :param config: extra configuration options for the particular harvester + type. Should be a serialized as JSON. (optional) + :type config: string - log.info('Harvest source %s update: %r', source_id, data_dict) - source = HarvestSource.get(source_id) - if not source: - log.error('Harvest source %s does not exist', source_id) - raise NotFound('Harvest source %s does not exist' % source_id) - data, errors = validate(data_dict, schema) + :returns: the newly created harvest source + :rtype: dictionary - if errors: - session.rollback() - raise ValidationError(errors,_error_summary(errors)) + ''' + log.info('Updating harvest source: %r', data_dict) - fields = ['url','title','type','description','user_id','publisher_id'] - for f in fields: - if f in data and data[f] is not None: - if f == 'url': - data[f] = data[f].strip() - source.__setattr__(f,data[f]) + data_dict['type'] = DATASET_TYPE_NAME - if 'active' in data_dict: - source.active = data['active'] + context['extras_as_string'] = True + package_dict = logic.get_action('package_update')(context, data_dict) - if 'config' in data_dict: - source.config = data['config'] + context['schema'] = harvest_source_db_to_form_schema() + source = logic.get_action('package_show')(context, package_dict) + + return source - source.save() - # Abort any pending jobs - if not source.active: - jobs = HarvestJob.filter(source=source,status=u'New') - log.info('Harvest source %s not active, so aborting %i outstanding jobs', source_id, jobs.count()) - if jobs: - for job in jobs: - job.status = u'Aborted' - job.save() - return harvest_source_dictize(source,context) def harvest_objects_import(context,data_dict): ''' diff --git a/ckanext/harvest/tests/test_action.py b/ckanext/harvest/tests/test_action.py new file mode 100644 index 0000000..ac76233 --- /dev/null +++ b/ckanext/harvest/tests/test_action.py @@ -0,0 +1,156 @@ +import copy +import ckan +import paste +import pylons.test + +from ckan import tests +import ckanext.harvest.model as harvest_model + +from ckanext.harvest.tests.test_queue import TestHarvester + +class HarvestSourceActionBase(object): + + @classmethod + def setup_class(cls): + harvest_model.setup() + tests.CreateTestData.create() + + sysadmin_user = ckan.model.User.get('testsysadmin') + cls.sysadmin = { + 'id': sysadmin_user.id, + 'apikey': sysadmin_user.apikey, + 'name': sysadmin_user.name, + } + + + cls.app = paste.fixture.TestApp(pylons.test.pylonsapp) + + cls.default_source_dict = { + "url": "http://test.action.com", + "name": "test-source-action", + "title": "Test source action", + "notes": "Test source action desc", + "source_type": "test", + "frequency": "MANUAL", + "config": "bb" + } + + + + @classmethod + def teardown_class(cls): + ckan.model.repo.rebuild_db() + + def teardown(self): + pass + # ckan.model.Session.query(harvest_model.HarvestSource).delete() + + def test_invalid_missing_values(self): + + source_dict = {} + if 'id' in self.default_source_dict: + source_dict['id'] = self.default_source_dict['id'] + + result = tests.call_action_api(self.app, self.action, + apikey=self.sysadmin['apikey'], status=409, **source_dict) + + for key in ('name','title','url','source_type'): + assert result[key] == [u'Missing value'] + + def test_invalid_unknown_type(self): + + source_dict = copy.deepcopy(self.default_source_dict) + source_dict['source_type'] = 'unknown' + + result = tests.call_action_api(self.app, self.action, + apikey=self.sysadmin['apikey'], status=409, **source_dict) + + assert 'source_type' in result + assert u'Unknown harvester type' in result['source_type'][0] + + def test_invalid_unknown_frequency(self): + wrong_frequency = 'ANNUALLY' + source_dict = copy.deepcopy(self.default_source_dict) + source_dict['frequency'] = wrong_frequency + + result = tests.call_action_api(self.app, self.action, + apikey=self.sysadmin['apikey'], status=409, **source_dict) + + assert 'frequency' in result + assert u'Frequency {0} not recognised'.format(wrong_frequency) in result['frequency'][0] + + +class TestHarvestSourceActionCreate(HarvestSourceActionBase): + + def __init__(self): + self.action = 'harvest_source_create' + + + + def test_create(self): + + source_dict = self.default_source_dict + + result = tests.call_action_api(self.app, 'harvest_source_create', + apikey=self.sysadmin['apikey'], **source_dict) + + for key in source_dict.keys(): + assert source_dict[key] == result[key] + + # Check that source was actually created + source = harvest_model.HarvestSource.get(result['id']) + assert source.url == source_dict['url'] + assert source.type == source_dict['source_type'] + + + # Trying to create a source with the same URL fails + + source_dict = copy.deepcopy(self.default_source_dict) + source_dict['name'] = 'test-source-action-new' + + result = tests.call_action_api(self.app, 'harvest_source_create', + apikey=self.sysadmin['apikey'], status=409, **source_dict) + + assert 'url' in result + assert u'There already is a Harvest Source for this URL' in result['url'][0] + +class TestHarvestSourceActionUpdate(HarvestSourceActionBase): + + @classmethod + def setup_class(cls): + + cls.action = 'harvest_source_update' + + super(TestHarvestSourceActionUpdate, cls).setup_class() + + # Create a source to udpate + source_dict = cls.default_source_dict + result = tests.call_action_api(cls.app, 'harvest_source_create', + apikey=cls.sysadmin['apikey'], **source_dict) + + cls.default_source_dict['id'] = result['id'] + + def test_update(self): + + source_dict = self.default_source_dict + source_dict.update({ + "url": "http://test.action.updated.com", + "name": "test-source-action-updated", + "title": "Test source action updated", + "notes": "Test source action desc updated", + "source_type": "test", + "frequency": "MONTHLY", + "config": "cc" + }) + + result = tests.call_action_api(self.app, 'harvest_source_update', + apikey=self.sysadmin['apikey'], **source_dict) + + for key in source_dict.keys(): + assert source_dict[key] == result[key] + + # Check that source was actually updated + source = harvest_model.HarvestSource.get(result['id']) + assert source.url == source_dict['url'] + assert source.type == source_dict['source_type'] +