From eaa8988440bc0f3df114e1e0c30e7b34472588f5 Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 11 Feb 2013 16:34:52 +0000 Subject: [PATCH 1/3] [#4] Changes in schema to accommodate organizations Basically handle the 'owner_org' field in form_to_db and db_to_form. Added 'owner_org', 'frequency' (has default) and 'config' to surplus keys in check_data_dict. Also remove schema tweaks to let package_show call the appropiate schema function. --- ckanext/harvest/logic/action/get.py | 1 - ckanext/harvest/logic/schema.py | 1 + ckanext/harvest/plugin.py | 16 ++++++---------- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/ckanext/harvest/logic/action/get.py b/ckanext/harvest/logic/action/get.py index b383249..e02bb76 100644 --- a/ckanext/harvest/logic/action/get.py +++ b/ckanext/harvest/logic/action/get.py @@ -34,7 +34,6 @@ def harvest_source_show(context,data_dict): :rtype: dictionary ''' - context['schema'] = harvest_source_db_to_form_schema() source_dict = logic.get_action('package_show')(context, data_dict) # For compatibility with old code, add the active field diff --git a/ckanext/harvest/logic/schema.py b/ckanext/harvest/logic/schema.py index d071c05..d1eb875 100644 --- a/ckanext/harvest/logic/schema.py +++ b/ckanext/harvest/logic/schema.py @@ -61,6 +61,7 @@ def harvest_source_db_to_form_schema(): 'source_type': [convert_from_extras, ignore_missing], 'frequency': [convert_from_extras, ignore_missing], 'config': [convert_from_extras, ignore_missing], + 'owner_org': [ignore_missing] }) return schema diff --git a/ckanext/harvest/plugin.py b/ckanext/harvest/plugin.py index ac5dd05..75783b0 100644 --- a/ckanext/harvest/plugin.py +++ b/ckanext/harvest/plugin.py @@ -138,11 +138,7 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm): Similar to db_to_form_schema but with further options to allow slightly different schemas, eg for creation or deletion on the API. ''' - if options.get('type') == 'show': - return None - else: - return self.db_to_form_schema() - + return self.db_to_form_schema() def db_to_form_schema(self): ''' @@ -157,12 +153,11 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm): '''Check if the return data is correct, mostly for checking out if spammers are submitting only part of the form''' - surplus_keys_schema = ['__extras', '__junk', 'extras', + surplus_keys_schema = ['__extras', '__junk', 'extras', 'notes', 'extras_validation', 'save', 'return_to', 'type', - 'state'] + 'state', 'owner_org', 'frequency', 'config'] #TODO: state and delete - if not schema: schema = self.form_to_db_schema() schema_keys = schema.keys() @@ -170,8 +165,9 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm): missing_keys = keys_in_schema - set(data_dict.keys()) if missing_keys: - log.info('incorrect form fields posted, missing %s' % missing_keys) - raise dictization_functions.DataError(data_dict) + msg = 'Incorrect form fields posted, missing %s' % missing_keys + log.info(msg) + raise dictization_functions.DataError(msg) def configure(self, config): From 177349fd76035b8143816ea33f4ba701c9cb5550 Mon Sep 17 00:00:00 2001 From: amercader Date: Tue, 12 Feb 2013 16:08:39 +0000 Subject: [PATCH 2/3] Update HarvesterBase This is a convenience class that other harvesters can extend. Updates include a cleanup of old functions and porting of enhancements from the spatial harvesters. --- ckanext/harvest/harvesters/base.py | 81 +++++++++++++++++------------- 1 file changed, 46 insertions(+), 35 deletions(-) diff --git a/ckanext/harvest/harvesters/base.py b/ckanext/harvest/harvesters/base.py index 189a5ad..a7876ac 100644 --- a/ckanext/harvest/harvesters/base.py +++ b/ckanext/harvest/harvesters/base.py @@ -1,7 +1,9 @@ import logging import re +import uuid from sqlalchemy.sql import update,and_, bindparam +from sqlalchemy.exc import InvalidRequestError from ckan import model from ckan.model import Session, Package @@ -19,11 +21,13 @@ from ckanext.harvest.interfaces import IHarvester log = logging.getLogger(__name__) + def munge_tag(tag): tag = substitute_ascii_equivalents(tag) tag = tag.lower().strip() return re.sub(r'[^a-zA-Z0-9 -]', '', tag).replace(' ', '-') + class HarvesterBase(SingletonPlugin): ''' Generic class for harvesters with helper functions @@ -32,53 +36,55 @@ class HarvesterBase(SingletonPlugin): config = None - def _gen_new_name(self,title): + def _gen_new_name(self, title): ''' Creates a URL friendly name from a title + + If the name already exists, it will add some random characters at the end ''' + name = munge_title_to_name(title).replace('_', '-') while '--' in name: name = name.replace('--', '-') - return name - - def _check_name(self,name): - ''' - Checks if a package name already exists in the database, and adds - a counter at the end if it does exist. - ''' - like_q = u'%s%%' % name - pkg_query = Session.query(Package).filter(Package.name.ilike(like_q)).limit(100) - taken = [pkg.name for pkg in pkg_query] - if name not in taken: - return name + pkg_obj = Session.query(Package).filter(Package.name == name).first() + if pkg_obj: + return name + str(uuid.uuid4())[:5] else: - counter = 1 - while counter < 101: - if name+str(counter) not in taken: - return name+str(counter) - counter = counter + 1 - return None + return name - def _save_gather_error(self,message,job): - ''' - Helper function to create an error during the gather stage. - ''' - err = HarvestGatherError(message=message,job=job) - err.save() - log.error(message) - def _save_object_error(self,message,obj,stage=u'Fetch'): - ''' - Helper function to create an error during the fetch or import stage. - ''' - err = HarvestObjectError(message=message,object=obj,stage=stage) - err.save() - log.error(message) + def _save_gather_error(self, message, job): + err = HarvestGatherError(message=message, job=job) + try: + err.save() + except InvalidRequestError: + Session.rollback() + err.save() + finally: + log.error(message) + + + def _save_object_error(self, message, obj, stage=u'Fetch', line=None): + err = HarvestObjectError(message=message, + object=obj, + stage=stage, + line=line) + try: + err.save() + except InvalidRequestError, e: + Session.rollback() + err.save() + finally: + log_message = '{0}, line {1}'.format(message,line) if line else message + log.debug(log_message) + def _create_harvest_objects(self, remote_ids, harvest_job): ''' Given a list of remote ids and a Harvest Job, create as many Harvest Objects and - return a list of its ids to be returned to the fetch stage. + return a list of their ids to be passed to the fetch stage. + + TODO: Not sure it is worth keeping this function ''' try: object_ids = [] @@ -94,6 +100,7 @@ class HarvesterBase(SingletonPlugin): except Exception, e: self._save_gather_error('%r' % e.message, harvest_job) + def _create_or_update_package(self, package_dict, harvest_object): ''' Creates a new package or updates an exisiting one according to the @@ -109,6 +116,10 @@ class HarvesterBase(SingletonPlugin): If the remote server provides the modification date of the remote package, add it to package_dict['metadata_modified']. + + TODO: Not sure it is worth keeping this function. If useful it should + use the output of package_show logic function (maybe keeping support + for rest api based dicts ''' try: # Change default schema @@ -159,7 +170,7 @@ class HarvesterBase(SingletonPlugin): # Package needs to be created # Check if name has not already been used - package_dict['name'] = self._check_name(package_dict['name']) + package_dict['name'] = self._gen_new_name(package_dict['title']) log.info('Package with GUID %s does not exist, let\'s create it' % harvest_object.guid) new_package = get_action('package_create_rest')(context, package_dict) From 83f8cf69a65c8b82c55646d6e172649049aff8bf Mon Sep 17 00:00:00 2001 From: amercader Date: Tue, 19 Feb 2013 11:51:22 +0000 Subject: [PATCH 3/3] Remove unnecessary extra quotes (see #381 on CKAN core) --- ckanext/harvest/plugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/harvest/plugin.py b/ckanext/harvest/plugin.py index 75783b0..e8a3ba9 100644 --- a/ckanext/harvest/plugin.py +++ b/ckanext/harvest/plugin.py @@ -50,7 +50,7 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm): data_dict['extras'] = [] data_dict['extras'].append({ - 'key': key, 'value': '"{0}"'.format(value), 'state': u'active' + 'key': key, 'value': value, 'state': u'active' }) if 'type' in data_dict and data_dict['type'] == DATASET_TYPE_NAME: