Update harvest source create and update logic functions

`harvest_source_create` and `harvest_source_update` now call
`package_create` and `package_update` respectively, making sure to
define a 'harvest_source' type. The returned dict uses the db_to_form
schema.
This commit is contained in:
amercader 2012-11-30 14:03:04 +00:00
parent 0e0aed0503
commit 803b228d1c
3 changed files with 253 additions and 73 deletions

View File

@ -1,51 +1,68 @@
import re import re
import logging import logging
from ckan.logic import NotFound, ValidationError, check_access from ckan import logic
from ckanext.harvest.logic import HarvestJobExists
from ckan.lib.navl.dictization_functions import validate
from ckanext.harvest.model import (HarvestSource, HarvestJob, HarvestObject) from ckan.logic import NotFound, check_access
from ckanext.harvest.logic.schema import old_default_harvest_source_schema as default_harvest_source_schema from ckanext.harvest.logic import HarvestJobExists
from ckanext.harvest.logic.dictization import (harvest_source_dictize,
harvest_job_dictize) from ckanext.harvest.plugin import DATASET_TYPE_NAME
from ckanext.harvest.model import (HarvestSource, HarvestJob)
from ckanext.harvest.logic.dictization import harvest_job_dictize
from ckanext.harvest.logic.schema import harvest_source_db_to_form_schema
from ckanext.harvest.logic.action.get import harvest_source_list,harvest_job_list from ckanext.harvest.logic.action.get import harvest_source_list,harvest_job_list
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
def harvest_source_create(context,data_dict): def harvest_source_create(context,data_dict):
'''
Creates a new harvest source
This method just proxies the request to package_create,
which will create a harvest_source dataset type and the
HarvestSource object. All auth checks and validation will
be done there .We only make sure to set the dataset type.
Note that the harvest source type (ckan, waf, csw, etc)
is now set via the source_type field.
:param url: the URL for the harvest source
:type url: string
:param name: the name of the new harvest source, must be between 2 and 100
characters long and contain only lowercase alphanumeric characters
:type name: string
:param title: the title of the dataset (optional, default: same as
``name``)
:type title: string
:param notes: a description of the harvest source (optional)
:type notes: string
:param source_type: the harvester type for this source. This must be one
of the registerd harvesters, eg 'ckan', 'csw', etc.
:type source_type: string
:param frequency: the frequency in wich this harvester should run. See
``ckanext.harvest.model`` source for possible values. Default is
'MANUAL'
:type frequency: string
:param config: extra configuration options for the particular harvester
type. Should be a serialized as JSON. (optional)
:type config: string
:returns: the newly created harvest source
:rtype: dictionary
'''
log.info('Creating harvest source: %r', data_dict) log.info('Creating harvest source: %r', data_dict)
check_access('harvest_source_create',context,data_dict)
model = context['model'] data_dict['type'] = DATASET_TYPE_NAME
session = context['session']
schema = context.get('schema') or default_harvest_source_schema()
data, errors = validate(data_dict, schema) context['extras_as_string'] = True
package_dict = logic.get_action('package_create')(context, data_dict)
if errors: context['schema'] = harvest_source_db_to_form_schema()
session.rollback() source = logic.get_action('package_show')(context, package_dict)
log.warn('Harvest source does not validate: %r', errors)
raise ValidationError(errors,_error_summary(errors))
source = HarvestSource() return source
source.url = data['url'].strip()
source.type = data['type']
opt = ['active','title','description','user_id',
'publisher_id','config', 'frequency']
for o in opt:
if o in data and data[o] is not None:
source.__setattr__(o,data[o])
if 'active' in data_dict:
source.active = data['active']
source.save()
log.info('Harvest source created: %s', source.id)
return harvest_source_dictize(source,context)
def harvest_job_create(context,data_dict): def harvest_job_create(context,data_dict):

View File

@ -8,69 +8,76 @@ from ckan.logic import get_action
from ckanext.harvest.interfaces import IHarvester from ckanext.harvest.interfaces import IHarvester
from ckan.model import Package from ckan.model import Package
from ckan import logic
from ckan.logic import NotFound, ValidationError, check_access from ckan.logic import NotFound, check_access
from ckan.lib.navl.dictization_functions import validate
from ckanext.harvest.plugin import DATASET_TYPE_NAME
from ckanext.harvest.queue import get_gather_publisher from ckanext.harvest.queue import get_gather_publisher
from ckanext.harvest.model import (HarvestSource, HarvestJob, HarvestObject) from ckanext.harvest.model import HarvestSource, HarvestObject
from ckanext.harvest.logic.schema import old_default_harvest_source_schema as default_harvest_source_schema
from ckanext.harvest.logic import HarvestJobExists from ckanext.harvest.logic import HarvestJobExists
from ckanext.harvest.logic.dictization import (harvest_source_dictize,harvest_object_dictize) from ckanext.harvest.logic.schema import harvest_source_db_to_form_schema
from ckanext.harvest.logic.action.create import _error_summary
from ckanext.harvest.logic.action.get import harvest_source_show, harvest_job_list, _get_sources_for_user from ckanext.harvest.logic.action.get import harvest_source_show, harvest_job_list, _get_sources_for_user
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
def harvest_source_update(context,data_dict): def harvest_source_update(context,data_dict):
'''
Updates an existing harvest source
check_access('harvest_source_update',context,data_dict) This method just proxies the request to package_update,
which will create a harvest_source dataset type and the
HarvestSource object. All auth checks and validation will
be done there .We only make sure to set the dataset type
model = context['model'] Note that the harvest source type (ckan, waf, csw, etc)
session = context['session'] is now set via the source_type field.
source_id = data_dict.get('id') :param id: the name or id of the harvest source to update
schema = context.get('schema') or default_harvest_source_schema() :type id: string
:param url: the URL for the harvest source
:type url: string
:param name: the name of the new harvest source, must be between 2 and 100
characters long and contain only lowercase alphanumeric characters
:type name: string
:param title: the title of the dataset (optional, default: same as
``name``)
:type title: string
:param notes: a description of the harvest source (optional)
:type notes: string
:param source_type: the harvester type for this source. This must be one
of the registerd harvesters, eg 'ckan', 'csw', etc.
:type source_type: string
:param frequency: the frequency in wich this harvester should run. See
``ckanext.harvest.model`` source for possible values. Default is
'MANUAL'
:type frequency: string
:param config: extra configuration options for the particular harvester
type. Should be a serialized as JSON. (optional)
:type config: string
log.info('Harvest source %s update: %r', source_id, data_dict)
source = HarvestSource.get(source_id)
if not source:
log.error('Harvest source %s does not exist', source_id)
raise NotFound('Harvest source %s does not exist' % source_id)
data, errors = validate(data_dict, schema) :returns: the newly created harvest source
:rtype: dictionary
if errors: '''
session.rollback() log.info('Updating harvest source: %r', data_dict)
raise ValidationError(errors,_error_summary(errors))
fields = ['url','title','type','description','user_id','publisher_id'] data_dict['type'] = DATASET_TYPE_NAME
for f in fields:
if f in data and data[f] is not None:
if f == 'url':
data[f] = data[f].strip()
source.__setattr__(f,data[f])
if 'active' in data_dict: context['extras_as_string'] = True
source.active = data['active'] package_dict = logic.get_action('package_update')(context, data_dict)
if 'config' in data_dict: context['schema'] = harvest_source_db_to_form_schema()
source.config = data['config'] source = logic.get_action('package_show')(context, package_dict)
return source
source.save()
# Abort any pending jobs
if not source.active:
jobs = HarvestJob.filter(source=source,status=u'New')
log.info('Harvest source %s not active, so aborting %i outstanding jobs', source_id, jobs.count())
if jobs:
for job in jobs:
job.status = u'Aborted'
job.save()
return harvest_source_dictize(source,context)
def harvest_objects_import(context,data_dict): def harvest_objects_import(context,data_dict):
''' '''

View File

@ -0,0 +1,156 @@
import copy
import ckan
import paste
import pylons.test
from ckan import tests
import ckanext.harvest.model as harvest_model
from ckanext.harvest.tests.test_queue import TestHarvester
class HarvestSourceActionBase(object):
@classmethod
def setup_class(cls):
harvest_model.setup()
tests.CreateTestData.create()
sysadmin_user = ckan.model.User.get('testsysadmin')
cls.sysadmin = {
'id': sysadmin_user.id,
'apikey': sysadmin_user.apikey,
'name': sysadmin_user.name,
}
cls.app = paste.fixture.TestApp(pylons.test.pylonsapp)
cls.default_source_dict = {
"url": "http://test.action.com",
"name": "test-source-action",
"title": "Test source action",
"notes": "Test source action desc",
"source_type": "test",
"frequency": "MANUAL",
"config": "bb"
}
@classmethod
def teardown_class(cls):
ckan.model.repo.rebuild_db()
def teardown(self):
pass
# ckan.model.Session.query(harvest_model.HarvestSource).delete()
def test_invalid_missing_values(self):
source_dict = {}
if 'id' in self.default_source_dict:
source_dict['id'] = self.default_source_dict['id']
result = tests.call_action_api(self.app, self.action,
apikey=self.sysadmin['apikey'], status=409, **source_dict)
for key in ('name','title','url','source_type'):
assert result[key] == [u'Missing value']
def test_invalid_unknown_type(self):
source_dict = copy.deepcopy(self.default_source_dict)
source_dict['source_type'] = 'unknown'
result = tests.call_action_api(self.app, self.action,
apikey=self.sysadmin['apikey'], status=409, **source_dict)
assert 'source_type' in result
assert u'Unknown harvester type' in result['source_type'][0]
def test_invalid_unknown_frequency(self):
wrong_frequency = 'ANNUALLY'
source_dict = copy.deepcopy(self.default_source_dict)
source_dict['frequency'] = wrong_frequency
result = tests.call_action_api(self.app, self.action,
apikey=self.sysadmin['apikey'], status=409, **source_dict)
assert 'frequency' in result
assert u'Frequency {0} not recognised'.format(wrong_frequency) in result['frequency'][0]
class TestHarvestSourceActionCreate(HarvestSourceActionBase):
def __init__(self):
self.action = 'harvest_source_create'
def test_create(self):
source_dict = self.default_source_dict
result = tests.call_action_api(self.app, 'harvest_source_create',
apikey=self.sysadmin['apikey'], **source_dict)
for key in source_dict.keys():
assert source_dict[key] == result[key]
# Check that source was actually created
source = harvest_model.HarvestSource.get(result['id'])
assert source.url == source_dict['url']
assert source.type == source_dict['source_type']
# Trying to create a source with the same URL fails
source_dict = copy.deepcopy(self.default_source_dict)
source_dict['name'] = 'test-source-action-new'
result = tests.call_action_api(self.app, 'harvest_source_create',
apikey=self.sysadmin['apikey'], status=409, **source_dict)
assert 'url' in result
assert u'There already is a Harvest Source for this URL' in result['url'][0]
class TestHarvestSourceActionUpdate(HarvestSourceActionBase):
@classmethod
def setup_class(cls):
cls.action = 'harvest_source_update'
super(TestHarvestSourceActionUpdate, cls).setup_class()
# Create a source to udpate
source_dict = cls.default_source_dict
result = tests.call_action_api(cls.app, 'harvest_source_create',
apikey=cls.sysadmin['apikey'], **source_dict)
cls.default_source_dict['id'] = result['id']
def test_update(self):
source_dict = self.default_source_dict
source_dict.update({
"url": "http://test.action.updated.com",
"name": "test-source-action-updated",
"title": "Test source action updated",
"notes": "Test source action desc updated",
"source_type": "test",
"frequency": "MONTHLY",
"config": "cc"
})
result = tests.call_action_api(self.app, 'harvest_source_update',
apikey=self.sysadmin['apikey'], **source_dict)
for key in source_dict.keys():
assert source_dict[key] == result[key]
# Check that source was actually updated
source = harvest_model.HarvestSource.get(result['id'])
assert source.url == source_dict['url']
assert source.type == source_dict['source_type']