From 1b663bbff4f29a1e34144447755f38585f190bbb Mon Sep 17 00:00:00 2001 From: joetsoi Date: Wed, 4 Sep 2013 14:17:01 +0100 Subject: [PATCH 1/4] add harvest_object_create action --- ckanext/harvest/logic/action/create.py | 21 +++++++++++++++++++-- ckanext/harvest/logic/auth/create.py | 10 ++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/ckanext/harvest/logic/action/create.py b/ckanext/harvest/logic/action/create.py index 392ac48..7e0e059 100644 --- a/ckanext/harvest/logic/action/create.py +++ b/ckanext/harvest/logic/action/create.py @@ -6,8 +6,9 @@ from ckan.logic import NotFound, check_access from ckanext.harvest.logic import HarvestJobExists from ckanext.harvest.plugin import DATASET_TYPE_NAME -from ckanext.harvest.model import (HarvestSource, HarvestJob) -from ckanext.harvest.logic.dictization import harvest_job_dictize +from ckanext.harvest.model import (HarvestSource, HarvestJob, HarvestObject) +from ckanext.harvest.logic.dictization import (harvest_job_dictize, + harvest_object_dictize) from ckanext.harvest.logic.schema import harvest_source_show_package_schema from ckanext.harvest.logic.action.get import harvest_source_list,harvest_job_list @@ -136,3 +137,19 @@ def _check_for_existing_jobs(context, source_id): exist = len(exist_new + exist_running) > 0 return exist + +def harvest_object_create(context, data_dict): + check_access('harvest_object_create', context, data_dict) + + job_id = data_dict['job_id'] + + job = HarvestJob.get(job_id) + if not job: + log.warn('Harvest job %s does not exist', job_id) + raise NotFound('Harvest job %s does not exist' % job_id) + + guid = data_dict['guid'] + content = data_dict.get('content', '') + + obj = HarvestObject(guid=guid, content=content, job=job) + return harvest_object_dictize(obj, context) diff --git a/ckanext/harvest/logic/auth/create.py b/ckanext/harvest/logic/auth/create.py index 03aa01b..953f2d4 100644 --- a/ckanext/harvest/logic/auth/create.py +++ b/ckanext/harvest/logic/auth/create.py @@ -52,3 +52,13 @@ def harvest_job_create_all(context, data_dict): return {'success': False, 'msg': pt._('Only sysadmins can create harvest jobs for all sources')} else: return {'success': True} + +def harvest_object_create(context, data_dict): + """ + Auth check for creating a harvest object + + only the sysadmins can create harvest objects + """ + # sysadmins can run all actions if we've got to this point we're not a sysadmin + return {'success': False, 'msg': pt._('Only the sysadmins can create harvest objects')} + From 5da153c6b69cec88181fda50575f9aa43e4a1ebf Mon Sep 17 00:00:00 2001 From: joetsoi Date: Tue, 17 Sep 2013 16:49:19 +0100 Subject: [PATCH 2/4] [#65] harvest_object_create action update to use schema and validators. Also accept more parameters to data_dict. --- ckanext/harvest/logic/action/create.py | 56 +++++++++++++++++++----- ckanext/harvest/logic/schema.py | 16 +++++++ ckanext/harvest/logic/validators.py | 18 +++++++- ckanext/harvest/tests/factories.py | 14 ++++++ ckanext/harvest/tests/test_action.py | 59 ++++++++++++++++++++++++++ pip-requirements.txt | 1 + 6 files changed, 152 insertions(+), 12 deletions(-) create mode 100644 ckanext/harvest/tests/factories.py diff --git a/ckanext/harvest/logic/action/create.py b/ckanext/harvest/logic/action/create.py index 7e0e059..ba1ae07 100644 --- a/ckanext/harvest/logic/action/create.py +++ b/ckanext/harvest/logic/action/create.py @@ -1,19 +1,27 @@ import logging +import ckan from ckan import logic from ckan.logic import NotFound, check_access from ckanext.harvest.logic import HarvestJobExists from ckanext.harvest.plugin import DATASET_TYPE_NAME -from ckanext.harvest.model import (HarvestSource, HarvestJob, HarvestObject) +from ckanext.harvest.model import (HarvestSource, HarvestJob, HarvestObject, + HarvestObjectExtra) from ckanext.harvest.logic.dictization import (harvest_job_dictize, harvest_object_dictize) -from ckanext.harvest.logic.schema import harvest_source_show_package_schema +from ckanext.harvest.logic.schema import (harvest_source_show_package_schema, + harvest_object_create_schema) from ckanext.harvest.logic.action.get import harvest_source_list,harvest_job_list log = logging.getLogger(__name__) +_validate = ckan.lib.navl.dictization_functions.validate + +class InactiveSource(Exception): + pass + def harvest_source_create(context,data_dict): ''' Creates a new harvest source @@ -138,18 +146,44 @@ def _check_for_existing_jobs(context, source_id): return exist +def _get_harvest_source(source_id): + # Check if source exists + source = HarvestSource.get(source_id) + if not source: + log.warn('Harvest source %s does not exist', source_id) + raise NotFound('Harvest source %s does not exist' % source_id) + + # Check if the source is active + if not source.active: + log.warn('Harvest job cannot be created for inactive source %s', source_id) + raise InactiveSource('Can not create jobs on inactive sources') + return source + def harvest_object_create(context, data_dict): + """ Create a new harvest object + + :type guid: string (optional) + :type content: string (optional) + :type job_id: string + :type source_id: string (optional) + :type package_id: string (optional) + :type extras: dict (optional) + """ check_access('harvest_object_create', context, data_dict) + data, errors = _validate(data_dict, harvest_object_create_schema(), context) - job_id = data_dict['job_id'] + if errors: + raise logic.ValidationError(errors) - job = HarvestJob.get(job_id) - if not job: - log.warn('Harvest job %s does not exist', job_id) - raise NotFound('Harvest job %s does not exist' % job_id) + obj = HarvestObject( + guid=data.get('guid'), + content=data.get('content'), + job=HarvestJob.get(data['job_id']), + harvest_source_id=data.get('source_id'), + package_id=data.get('package_id'), + extras=[ HarvestObjectExtra(key=k, value=v) + for k, v in data.get('extras', {}).items() ] + ) - guid = data_dict['guid'] - content = data_dict.get('content', '') - - obj = HarvestObject(guid=guid, content=content, job=job) + obj.save() return harvest_object_dictize(obj, context) diff --git a/ckanext/harvest/logic/schema.py b/ckanext/harvest/logic/schema.py index b6d6544..0500a5b 100644 --- a/ckanext/harvest/logic/schema.py +++ b/ckanext/harvest/logic/schema.py @@ -20,6 +20,9 @@ from ckanext.harvest.logic.validators import (harvest_source_url_validator, harvest_source_frequency_exists, dataset_type_exists, harvest_source_convert_from_config, + harvest_source_id_exists, + harvest_job_id_exists, + harvest_object_extras_validator, ) def harvest_source_schema(): @@ -74,3 +77,16 @@ def harvest_source_show_package_schema(): }) return schema + +def harvest_object_create_schema(): + schema = { + 'guid': [ignore_missing, unicode], + 'content': [ignore_missing, unicode], + 'state': [ignore_missing, unicode], + 'job_id': [harvest_job_id_exists], + 'source_id': [ignore_missing, harvest_source_id_exists], + 'package_id': [ignore_missing, package_id_exists], + 'extras': [ignore_missing, harvest_object_extras_validator], + } + return schema + diff --git a/ckanext/harvest/logic/validators.py b/ckanext/harvest/logic/validators.py index a1fba93..9a4edb6 100644 --- a/ckanext/harvest/logic/validators.py +++ b/ckanext/harvest/logic/validators.py @@ -7,7 +7,7 @@ from ckan import model from ckan.plugins import PluginImplementations from ckanext.harvest.plugin import DATASET_TYPE_NAME -from ckanext.harvest.model import HarvestSource, UPDATE_FREQUENCIES +from ckanext.harvest.model import HarvestSource, UPDATE_FREQUENCIES, HarvestJob from ckanext.harvest.interfaces import IHarvester from ckan.lib.navl.validators import keep_extras @@ -22,6 +22,14 @@ def harvest_source_id_exists(value, context): raise Invalid('Harvest Source with id %r does not exist.' % str(value)) return value +def harvest_job_id_exists(value, context): + + result = HarvestJob.get(value,None) + + if not result: + raise Invalid('Harvest Job with id %r does not exist.' % str(value)) + return value + def _normalize_url(url): o = urlparse.urlparse(url) @@ -196,3 +204,11 @@ def dataset_type_exists(value): if value != DATASET_TYPE_NAME: value = DATASET_TYPE_NAME return value + +def harvest_object_extras_validator(value, context): + if not isinstance(value, dict): + raise Invalid('extras must be a dict') + for v in value.values(): + if not isinstance(v, basestring): + raise Invalid('extras must be a dict of strings') + return value diff --git a/ckanext/harvest/tests/factories.py b/ckanext/harvest/tests/factories.py new file mode 100644 index 0000000..8910519 --- /dev/null +++ b/ckanext/harvest/tests/factories.py @@ -0,0 +1,14 @@ +import factory +from ckanext.harvest.model import HarvestSource, HarvestJob + +class HarvestSourceFactory(factory.Factory): + FACTORY_FOR = HarvestSource + + url = "http://harvest.test.com" + type = "test-harvest-source" + +class HarvestJobFactory(factory.Factory): + FACTORY_FOR = HarvestJob + + status = "New" + source = factory.SubFactory(HarvestSourceFactory) diff --git a/ckanext/harvest/tests/test_action.py b/ckanext/harvest/tests/test_action.py index 171306b..e89a429 100644 --- a/ckanext/harvest/tests/test_action.py +++ b/ckanext/harvest/tests/test_action.py @@ -3,12 +3,16 @@ import copy import ckan import paste import pylons.test +import factories +import unittest from ckan import tests from ckan import plugins as p +from ckan.plugins import toolkit from ckanext.harvest.interfaces import IHarvester import ckanext.harvest.model as harvest_model + class MockHarvesterForActionTests(p.SingletonPlugin): p.implements(IHarvester) def info(self): @@ -203,3 +207,58 @@ class TestHarvestSourceActionUpdate(HarvestSourceActionBase): assert source.url == source_dict['url'] assert source.type == source_dict['source_type'] +class TestHarvestObject(unittest.TestCase): + @classmethod + def setup_class(cls): + harvest_model.setup() + + @classmethod + def teardown_class(cls): + ckan.model.repo.rebuild_db() + + def test_create(self): + job = factories.HarvestJobFactory() + job.save() + + context = { + 'model' : ckan.model, + 'session': ckan.model.Session, + 'ignore_auth': True, + } + data_dict = { + 'guid' : 'guid', + 'content' : 'content', + 'job_id' : job.id, + 'extras' : { 'a key' : 'a value' }, + } + harvest_object = toolkit.get_action('harvest_object_create')( + context, data_dict) + + # fetch the object from database to check it was created + created_object = harvest_model.HarvestObject.get(harvest_object['id']) + assert created_object.guid == harvest_object['guid'] == data_dict['guid'] + + def test_create_bad_parameters(self): + source_a = factories.HarvestSourceFactory() + source_a.save() + job = factories.HarvestJobFactory() + job.save() + + context = { + 'model' : ckan.model, + 'session': ckan.model.Session, + 'ignore_auth': True, + } + data_dict = { + 'job_id' : job.id, + 'source_id' : source_a.id, + 'extras' : 1 + } + harvest_object_create = toolkit.get_action('harvest_object_create') + self.assertRaises(ckan.logic.ValidationError, harvest_object_create, + context, data_dict) + + data_dict['extras'] = {'test': 1 } + + self.assertRaises(ckan.logic.ValidationError, harvest_object_create, + context, data_dict) diff --git a/pip-requirements.txt b/pip-requirements.txt index c683e0b..e587bc8 100644 --- a/pip-requirements.txt +++ b/pip-requirements.txt @@ -1 +1,2 @@ pika==0.9.8 +factory-boy>=2 From 9b3199b41b82524efe9ff8a9453aab97052f26f6 Mon Sep 17 00:00:00 2001 From: joetsoi Date: Tue, 17 Sep 2013 17:02:38 +0100 Subject: [PATCH 3/4] [#65] remove unused code --- ckanext/harvest/logic/action/create.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/ckanext/harvest/logic/action/create.py b/ckanext/harvest/logic/action/create.py index ba1ae07..2a69235 100644 --- a/ckanext/harvest/logic/action/create.py +++ b/ckanext/harvest/logic/action/create.py @@ -146,19 +146,6 @@ def _check_for_existing_jobs(context, source_id): return exist -def _get_harvest_source(source_id): - # Check if source exists - source = HarvestSource.get(source_id) - if not source: - log.warn('Harvest source %s does not exist', source_id) - raise NotFound('Harvest source %s does not exist' % source_id) - - # Check if the source is active - if not source.active: - log.warn('Harvest job cannot be created for inactive source %s', source_id) - raise InactiveSource('Can not create jobs on inactive sources') - return source - def harvest_object_create(context, data_dict): """ Create a new harvest object From da2fd45e80afebb7dff57b7734e4651261171e25 Mon Sep 17 00:00:00 2001 From: joetsoi Date: Thu, 3 Oct 2013 15:51:37 +0100 Subject: [PATCH 4/4] [#65] make harvest_job_exists validator return model object return the model in the validator instead of checking that it exists in the validator, returning the id and then fetching it again in the action function --- ckanext/harvest/logic/action/create.py | 2 +- ckanext/harvest/logic/schema.py | 4 ++-- ckanext/harvest/logic/validators.py | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ckanext/harvest/logic/action/create.py b/ckanext/harvest/logic/action/create.py index 2a69235..042d8f6 100644 --- a/ckanext/harvest/logic/action/create.py +++ b/ckanext/harvest/logic/action/create.py @@ -165,7 +165,7 @@ def harvest_object_create(context, data_dict): obj = HarvestObject( guid=data.get('guid'), content=data.get('content'), - job=HarvestJob.get(data['job_id']), + job=data['job_id'], harvest_source_id=data.get('source_id'), package_id=data.get('package_id'), extras=[ HarvestObjectExtra(key=k, value=v) diff --git a/ckanext/harvest/logic/schema.py b/ckanext/harvest/logic/schema.py index 0500a5b..d303f95 100644 --- a/ckanext/harvest/logic/schema.py +++ b/ckanext/harvest/logic/schema.py @@ -21,7 +21,7 @@ from ckanext.harvest.logic.validators import (harvest_source_url_validator, dataset_type_exists, harvest_source_convert_from_config, harvest_source_id_exists, - harvest_job_id_exists, + harvest_job_exists, harvest_object_extras_validator, ) @@ -83,7 +83,7 @@ def harvest_object_create_schema(): 'guid': [ignore_missing, unicode], 'content': [ignore_missing, unicode], 'state': [ignore_missing, unicode], - 'job_id': [harvest_job_id_exists], + 'job_id': [harvest_job_exists], 'source_id': [ignore_missing, harvest_source_id_exists], 'package_id': [ignore_missing, package_id_exists], 'extras': [ignore_missing, harvest_object_extras_validator], diff --git a/ckanext/harvest/logic/validators.py b/ckanext/harvest/logic/validators.py index 9a4edb6..4dec758 100644 --- a/ckanext/harvest/logic/validators.py +++ b/ckanext/harvest/logic/validators.py @@ -22,13 +22,13 @@ def harvest_source_id_exists(value, context): raise Invalid('Harvest Source with id %r does not exist.' % str(value)) return value -def harvest_job_id_exists(value, context): - - result = HarvestJob.get(value,None) +def harvest_job_exists(value, context): + """Check if a harvest job exists and returns the model if it does""" + result = HarvestJob.get(value, None) if not result: raise Invalid('Harvest Job with id %r does not exist.' % str(value)) - return value + return result def _normalize_url(url): o = urlparse.urlparse(url)