Merge branch '2.0-dataset-sources' into 7-harvest-source-templates
This commit is contained in:
commit
bc2bc1e28f
|
@ -1,7 +1,9 @@
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
import uuid
|
||||||
|
|
||||||
from sqlalchemy.sql import update,and_, bindparam
|
from sqlalchemy.sql import update,and_, bindparam
|
||||||
|
from sqlalchemy.exc import InvalidRequestError
|
||||||
|
|
||||||
from ckan import model
|
from ckan import model
|
||||||
from ckan.model import Session, Package
|
from ckan.model import Session, Package
|
||||||
|
@ -19,11 +21,13 @@ from ckanext.harvest.interfaces import IHarvester
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def munge_tag(tag):
|
def munge_tag(tag):
|
||||||
tag = substitute_ascii_equivalents(tag)
|
tag = substitute_ascii_equivalents(tag)
|
||||||
tag = tag.lower().strip()
|
tag = tag.lower().strip()
|
||||||
return re.sub(r'[^a-zA-Z0-9 -]', '', tag).replace(' ', '-')
|
return re.sub(r'[^a-zA-Z0-9 -]', '', tag).replace(' ', '-')
|
||||||
|
|
||||||
|
|
||||||
class HarvesterBase(SingletonPlugin):
|
class HarvesterBase(SingletonPlugin):
|
||||||
'''
|
'''
|
||||||
Generic class for harvesters with helper functions
|
Generic class for harvesters with helper functions
|
||||||
|
@ -32,53 +36,55 @@ class HarvesterBase(SingletonPlugin):
|
||||||
|
|
||||||
config = None
|
config = None
|
||||||
|
|
||||||
def _gen_new_name(self,title):
|
def _gen_new_name(self, title):
|
||||||
'''
|
'''
|
||||||
Creates a URL friendly name from a title
|
Creates a URL friendly name from a title
|
||||||
|
|
||||||
|
If the name already exists, it will add some random characters at the end
|
||||||
'''
|
'''
|
||||||
|
|
||||||
name = munge_title_to_name(title).replace('_', '-')
|
name = munge_title_to_name(title).replace('_', '-')
|
||||||
while '--' in name:
|
while '--' in name:
|
||||||
name = name.replace('--', '-')
|
name = name.replace('--', '-')
|
||||||
return name
|
pkg_obj = Session.query(Package).filter(Package.name == name).first()
|
||||||
|
if pkg_obj:
|
||||||
def _check_name(self,name):
|
return name + str(uuid.uuid4())[:5]
|
||||||
'''
|
|
||||||
Checks if a package name already exists in the database, and adds
|
|
||||||
a counter at the end if it does exist.
|
|
||||||
'''
|
|
||||||
like_q = u'%s%%' % name
|
|
||||||
pkg_query = Session.query(Package).filter(Package.name.ilike(like_q)).limit(100)
|
|
||||||
taken = [pkg.name for pkg in pkg_query]
|
|
||||||
if name not in taken:
|
|
||||||
return name
|
|
||||||
else:
|
else:
|
||||||
counter = 1
|
return name
|
||||||
while counter < 101:
|
|
||||||
if name+str(counter) not in taken:
|
|
||||||
return name+str(counter)
|
|
||||||
counter = counter + 1
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _save_gather_error(self,message,job):
|
|
||||||
'''
|
def _save_gather_error(self, message, job):
|
||||||
Helper function to create an error during the gather stage.
|
err = HarvestGatherError(message=message, job=job)
|
||||||
'''
|
try:
|
||||||
err = HarvestGatherError(message=message,job=job)
|
|
||||||
err.save()
|
err.save()
|
||||||
|
except InvalidRequestError:
|
||||||
|
Session.rollback()
|
||||||
|
err.save()
|
||||||
|
finally:
|
||||||
log.error(message)
|
log.error(message)
|
||||||
|
|
||||||
def _save_object_error(self,message,obj,stage=u'Fetch'):
|
|
||||||
'''
|
def _save_object_error(self, message, obj, stage=u'Fetch', line=None):
|
||||||
Helper function to create an error during the fetch or import stage.
|
err = HarvestObjectError(message=message,
|
||||||
'''
|
object=obj,
|
||||||
err = HarvestObjectError(message=message,object=obj,stage=stage)
|
stage=stage,
|
||||||
|
line=line)
|
||||||
|
try:
|
||||||
err.save()
|
err.save()
|
||||||
log.error(message)
|
except InvalidRequestError, e:
|
||||||
|
Session.rollback()
|
||||||
|
err.save()
|
||||||
|
finally:
|
||||||
|
log_message = '{0}, line {1}'.format(message,line) if line else message
|
||||||
|
log.debug(log_message)
|
||||||
|
|
||||||
|
|
||||||
def _create_harvest_objects(self, remote_ids, harvest_job):
|
def _create_harvest_objects(self, remote_ids, harvest_job):
|
||||||
'''
|
'''
|
||||||
Given a list of remote ids and a Harvest Job, create as many Harvest Objects and
|
Given a list of remote ids and a Harvest Job, create as many Harvest Objects and
|
||||||
return a list of its ids to be returned to the fetch stage.
|
return a list of their ids to be passed to the fetch stage.
|
||||||
|
|
||||||
|
TODO: Not sure it is worth keeping this function
|
||||||
'''
|
'''
|
||||||
try:
|
try:
|
||||||
object_ids = []
|
object_ids = []
|
||||||
|
@ -94,6 +100,7 @@ class HarvesterBase(SingletonPlugin):
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
self._save_gather_error('%r' % e.message, harvest_job)
|
self._save_gather_error('%r' % e.message, harvest_job)
|
||||||
|
|
||||||
|
|
||||||
def _create_or_update_package(self, package_dict, harvest_object):
|
def _create_or_update_package(self, package_dict, harvest_object):
|
||||||
'''
|
'''
|
||||||
Creates a new package or updates an exisiting one according to the
|
Creates a new package or updates an exisiting one according to the
|
||||||
|
@ -109,6 +116,10 @@ class HarvesterBase(SingletonPlugin):
|
||||||
If the remote server provides the modification date of the remote
|
If the remote server provides the modification date of the remote
|
||||||
package, add it to package_dict['metadata_modified'].
|
package, add it to package_dict['metadata_modified'].
|
||||||
|
|
||||||
|
|
||||||
|
TODO: Not sure it is worth keeping this function. If useful it should
|
||||||
|
use the output of package_show logic function (maybe keeping support
|
||||||
|
for rest api based dicts
|
||||||
'''
|
'''
|
||||||
try:
|
try:
|
||||||
# Change default schema
|
# Change default schema
|
||||||
|
@ -159,7 +170,7 @@ class HarvesterBase(SingletonPlugin):
|
||||||
# Package needs to be created
|
# Package needs to be created
|
||||||
|
|
||||||
# Check if name has not already been used
|
# Check if name has not already been used
|
||||||
package_dict['name'] = self._check_name(package_dict['name'])
|
package_dict['name'] = self._gen_new_name(package_dict['title'])
|
||||||
|
|
||||||
log.info('Package with GUID %s does not exist, let\'s create it' % harvest_object.guid)
|
log.info('Package with GUID %s does not exist, let\'s create it' % harvest_object.guid)
|
||||||
new_package = get_action('package_create_rest')(context, package_dict)
|
new_package = get_action('package_create_rest')(context, package_dict)
|
||||||
|
|
|
@ -34,7 +34,6 @@ def harvest_source_show(context,data_dict):
|
||||||
:rtype: dictionary
|
:rtype: dictionary
|
||||||
'''
|
'''
|
||||||
|
|
||||||
context['schema'] = harvest_source_db_to_form_schema()
|
|
||||||
source_dict = logic.get_action('package_show')(context, data_dict)
|
source_dict = logic.get_action('package_show')(context, data_dict)
|
||||||
|
|
||||||
# For compatibility with old code, add the active field
|
# For compatibility with old code, add the active field
|
||||||
|
|
|
@ -61,6 +61,7 @@ def harvest_source_db_to_form_schema():
|
||||||
'source_type': [convert_from_extras, ignore_missing],
|
'source_type': [convert_from_extras, ignore_missing],
|
||||||
'frequency': [convert_from_extras, ignore_missing],
|
'frequency': [convert_from_extras, ignore_missing],
|
||||||
'config': [convert_from_extras, ignore_missing],
|
'config': [convert_from_extras, ignore_missing],
|
||||||
|
'owner_org': [ignore_missing]
|
||||||
})
|
})
|
||||||
|
|
||||||
return schema
|
return schema
|
||||||
|
|
|
@ -50,7 +50,7 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
|
||||||
data_dict['extras'] = []
|
data_dict['extras'] = []
|
||||||
|
|
||||||
data_dict['extras'].append({
|
data_dict['extras'].append({
|
||||||
'key': key, 'value': '"{0}"'.format(value), 'state': u'active'
|
'key': key, 'value': value, 'state': u'active'
|
||||||
})
|
})
|
||||||
|
|
||||||
if 'type' in data_dict and data_dict['type'] == DATASET_TYPE_NAME:
|
if 'type' in data_dict and data_dict['type'] == DATASET_TYPE_NAME:
|
||||||
|
@ -138,12 +138,8 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
|
||||||
Similar to db_to_form_schema but with further options to allow
|
Similar to db_to_form_schema but with further options to allow
|
||||||
slightly different schemas, eg for creation or deletion on the API.
|
slightly different schemas, eg for creation or deletion on the API.
|
||||||
'''
|
'''
|
||||||
if options.get('type') == 'show':
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
return self.db_to_form_schema()
|
return self.db_to_form_schema()
|
||||||
|
|
||||||
|
|
||||||
def db_to_form_schema(self):
|
def db_to_form_schema(self):
|
||||||
'''
|
'''
|
||||||
Returns the schema for mapping package data from the database into a
|
Returns the schema for mapping package data from the database into a
|
||||||
|
@ -157,12 +153,11 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
|
||||||
'''Check if the return data is correct, mostly for checking out
|
'''Check if the return data is correct, mostly for checking out
|
||||||
if spammers are submitting only part of the form'''
|
if spammers are submitting only part of the form'''
|
||||||
|
|
||||||
surplus_keys_schema = ['__extras', '__junk', 'extras',
|
surplus_keys_schema = ['__extras', '__junk', 'extras', 'notes',
|
||||||
'extras_validation', 'save', 'return_to', 'type',
|
'extras_validation', 'save', 'return_to', 'type',
|
||||||
'state']
|
'state', 'owner_org', 'frequency', 'config']
|
||||||
|
|
||||||
#TODO: state and delete
|
#TODO: state and delete
|
||||||
|
|
||||||
if not schema:
|
if not schema:
|
||||||
schema = self.form_to_db_schema()
|
schema = self.form_to_db_schema()
|
||||||
schema_keys = schema.keys()
|
schema_keys = schema.keys()
|
||||||
|
@ -170,8 +165,9 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
|
||||||
|
|
||||||
missing_keys = keys_in_schema - set(data_dict.keys())
|
missing_keys = keys_in_schema - set(data_dict.keys())
|
||||||
if missing_keys:
|
if missing_keys:
|
||||||
log.info('incorrect form fields posted, missing %s' % missing_keys)
|
msg = 'Incorrect form fields posted, missing %s' % missing_keys
|
||||||
raise dictization_functions.DataError(data_dict)
|
log.info(msg)
|
||||||
|
raise dictization_functions.DataError(msg)
|
||||||
|
|
||||||
def configure(self, config):
|
def configure(self, config):
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue