Merge remote-tracking branch 'origin/2.0-dataset-sources' into 5-improve-job-errors-reporting

This commit is contained in:
kindly 2013-02-20 13:00:33 +00:00
commit 664235903c
4 changed files with 53 additions and 46 deletions

View File

@ -1,7 +1,9 @@
import logging import logging
import re import re
import uuid
from sqlalchemy.sql import update,and_, bindparam from sqlalchemy.sql import update,and_, bindparam
from sqlalchemy.exc import InvalidRequestError
from ckan import model from ckan import model
from ckan.model import Session, Package from ckan.model import Session, Package
@ -19,11 +21,13 @@ from ckanext.harvest.interfaces import IHarvester
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
def munge_tag(tag): def munge_tag(tag):
tag = substitute_ascii_equivalents(tag) tag = substitute_ascii_equivalents(tag)
tag = tag.lower().strip() tag = tag.lower().strip()
return re.sub(r'[^a-zA-Z0-9 -]', '', tag).replace(' ', '-') return re.sub(r'[^a-zA-Z0-9 -]', '', tag).replace(' ', '-')
class HarvesterBase(SingletonPlugin): class HarvesterBase(SingletonPlugin):
''' '''
Generic class for harvesters with helper functions Generic class for harvesters with helper functions
@ -32,53 +36,55 @@ class HarvesterBase(SingletonPlugin):
config = None config = None
def _gen_new_name(self,title): def _gen_new_name(self, title):
''' '''
Creates a URL friendly name from a title Creates a URL friendly name from a title
If the name already exists, it will add some random characters at the end
''' '''
name = munge_title_to_name(title).replace('_', '-') name = munge_title_to_name(title).replace('_', '-')
while '--' in name: while '--' in name:
name = name.replace('--', '-') name = name.replace('--', '-')
return name pkg_obj = Session.query(Package).filter(Package.name == name).first()
if pkg_obj:
def _check_name(self,name): return name + str(uuid.uuid4())[:5]
'''
Checks if a package name already exists in the database, and adds
a counter at the end if it does exist.
'''
like_q = u'%s%%' % name
pkg_query = Session.query(Package).filter(Package.name.ilike(like_q)).limit(100)
taken = [pkg.name for pkg in pkg_query]
if name not in taken:
return name
else: else:
counter = 1 return name
while counter < 101:
if name+str(counter) not in taken:
return name+str(counter)
counter = counter + 1
return None
def _save_gather_error(self,message,job):
'''
Helper function to create an error during the gather stage.
'''
err = HarvestGatherError(message=message,job=job)
err.save()
log.error(message)
def _save_object_error(self,message,obj,stage=u'Fetch'): def _save_gather_error(self, message, job):
''' err = HarvestGatherError(message=message, job=job)
Helper function to create an error during the fetch or import stage. try:
''' err.save()
err = HarvestObjectError(message=message,object=obj,stage=stage) except InvalidRequestError:
err.save() Session.rollback()
log.error(message) err.save()
finally:
log.error(message)
def _save_object_error(self, message, obj, stage=u'Fetch', line=None):
err = HarvestObjectError(message=message,
object=obj,
stage=stage,
line=line)
try:
err.save()
except InvalidRequestError, e:
Session.rollback()
err.save()
finally:
log_message = '{0}, line {1}'.format(message,line) if line else message
log.debug(log_message)
def _create_harvest_objects(self, remote_ids, harvest_job): def _create_harvest_objects(self, remote_ids, harvest_job):
''' '''
Given a list of remote ids and a Harvest Job, create as many Harvest Objects and Given a list of remote ids and a Harvest Job, create as many Harvest Objects and
return a list of its ids to be returned to the fetch stage. return a list of their ids to be passed to the fetch stage.
TODO: Not sure it is worth keeping this function
''' '''
try: try:
object_ids = [] object_ids = []
@ -94,6 +100,7 @@ class HarvesterBase(SingletonPlugin):
except Exception, e: except Exception, e:
self._save_gather_error('%r' % e.message, harvest_job) self._save_gather_error('%r' % e.message, harvest_job)
def _create_or_update_package(self, package_dict, harvest_object): def _create_or_update_package(self, package_dict, harvest_object):
''' '''
Creates a new package or updates an exisiting one according to the Creates a new package or updates an exisiting one according to the
@ -109,6 +116,10 @@ class HarvesterBase(SingletonPlugin):
If the remote server provides the modification date of the remote If the remote server provides the modification date of the remote
package, add it to package_dict['metadata_modified']. package, add it to package_dict['metadata_modified'].
TODO: Not sure it is worth keeping this function. If useful it should
use the output of package_show logic function (maybe keeping support
for rest api based dicts
''' '''
try: try:
# Change default schema # Change default schema
@ -159,7 +170,7 @@ class HarvesterBase(SingletonPlugin):
# Package needs to be created # Package needs to be created
# Check if name has not already been used # Check if name has not already been used
package_dict['name'] = self._check_name(package_dict['name']) package_dict['name'] = self._gen_new_name(package_dict['title'])
log.info('Package with GUID %s does not exist, let\'s create it' % harvest_object.guid) log.info('Package with GUID %s does not exist, let\'s create it' % harvest_object.guid)
new_package = get_action('package_create_rest')(context, package_dict) new_package = get_action('package_create_rest')(context, package_dict)

View File

@ -34,7 +34,6 @@ def harvest_source_show(context,data_dict):
:rtype: dictionary :rtype: dictionary
''' '''
context['schema'] = harvest_source_db_to_form_schema()
source_dict = logic.get_action('package_show')(context, data_dict) source_dict = logic.get_action('package_show')(context, data_dict)
# For compatibility with old code, add the active field # For compatibility with old code, add the active field

View File

@ -61,6 +61,7 @@ def harvest_source_db_to_form_schema():
'source_type': [convert_from_extras, ignore_missing], 'source_type': [convert_from_extras, ignore_missing],
'frequency': [convert_from_extras, ignore_missing], 'frequency': [convert_from_extras, ignore_missing],
'config': [convert_from_extras, ignore_missing], 'config': [convert_from_extras, ignore_missing],
'owner_org': [ignore_missing]
}) })
return schema return schema

View File

@ -138,11 +138,7 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
Similar to db_to_form_schema but with further options to allow Similar to db_to_form_schema but with further options to allow
slightly different schemas, eg for creation or deletion on the API. slightly different schemas, eg for creation or deletion on the API.
''' '''
if options.get('type') == 'show': return self.db_to_form_schema()
return None
else:
return self.db_to_form_schema()
def db_to_form_schema(self): def db_to_form_schema(self):
''' '''
@ -157,12 +153,11 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
'''Check if the return data is correct, mostly for checking out '''Check if the return data is correct, mostly for checking out
if spammers are submitting only part of the form''' if spammers are submitting only part of the form'''
surplus_keys_schema = ['__extras', '__junk', 'extras', surplus_keys_schema = ['__extras', '__junk', 'extras', 'notes',
'extras_validation', 'save', 'return_to', 'type', 'extras_validation', 'save', 'return_to', 'type',
'state'] 'state', 'owner_org', 'frequency', 'config']
#TODO: state and delete #TODO: state and delete
if not schema: if not schema:
schema = self.form_to_db_schema() schema = self.form_to_db_schema()
schema_keys = schema.keys() schema_keys = schema.keys()
@ -170,8 +165,9 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
missing_keys = keys_in_schema - set(data_dict.keys()) missing_keys = keys_in_schema - set(data_dict.keys())
if missing_keys: if missing_keys:
log.info('incorrect form fields posted, missing %s' % missing_keys) msg = 'Incorrect form fields posted, missing %s' % missing_keys
raise dictization_functions.DataError(data_dict) log.info(msg)
raise dictization_functions.DataError(msg)
def configure(self, config): def configure(self, config):