2012-02-29 16:20:35 +01:00
|
|
|
import re
|
2012-06-08 18:09:22 +02:00
|
|
|
import logging
|
2012-02-29 16:20:35 +01:00
|
|
|
|
2012-03-01 13:02:16 +01:00
|
|
|
from ckan.logic import NotFound, ValidationError, check_access
|
2012-10-29 18:15:02 +01:00
|
|
|
from ckanext.harvest.logic import HarvestJobExists
|
2012-02-29 16:20:35 +01:00
|
|
|
from ckan.lib.navl.dictization_functions import validate
|
|
|
|
|
|
|
|
from ckanext.harvest.model import (HarvestSource, HarvestJob, HarvestObject)
|
2012-03-07 18:08:17 +01:00
|
|
|
from ckanext.harvest.logic.schema import default_harvest_source_schema
|
2012-02-29 16:20:35 +01:00
|
|
|
from ckanext.harvest.logic.dictization import (harvest_source_dictize,
|
|
|
|
harvest_job_dictize)
|
|
|
|
from ckanext.harvest.logic.action.get import harvest_source_list,harvest_job_list
|
|
|
|
|
2012-06-08 18:09:22 +02:00
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
2012-02-29 16:20:35 +01:00
|
|
|
def harvest_source_create(context,data_dict):
|
|
|
|
|
2012-06-08 18:09:22 +02:00
|
|
|
log.info('Creating harvest source: %r', data_dict)
|
2012-03-01 13:02:16 +01:00
|
|
|
check_access('harvest_source_create',context,data_dict)
|
|
|
|
|
2012-02-29 16:20:35 +01:00
|
|
|
model = context['model']
|
2012-03-01 13:02:16 +01:00
|
|
|
session = context['session']
|
2012-03-06 17:01:43 +01:00
|
|
|
schema = context.get('schema') or default_harvest_source_schema()
|
2012-02-29 16:20:35 +01:00
|
|
|
|
|
|
|
data, errors = validate(data_dict, schema)
|
|
|
|
|
|
|
|
if errors:
|
2012-03-01 13:02:16 +01:00
|
|
|
session.rollback()
|
2012-06-08 18:09:22 +02:00
|
|
|
log.warn('Harvest source does not validate: %r', errors)
|
2012-02-29 16:20:35 +01:00
|
|
|
raise ValidationError(errors,_error_summary(errors))
|
|
|
|
|
|
|
|
source = HarvestSource()
|
2012-08-17 13:24:41 +02:00
|
|
|
source.url = data['url'].strip()
|
2012-02-29 16:20:35 +01:00
|
|
|
source.type = data['type']
|
|
|
|
|
2012-10-29 18:15:02 +01:00
|
|
|
opt = ['active','title','description','user_id',
|
|
|
|
'publisher_id','config', 'frequency']
|
2012-02-29 16:20:35 +01:00
|
|
|
for o in opt:
|
|
|
|
if o in data and data[o] is not None:
|
|
|
|
source.__setattr__(o,data[o])
|
|
|
|
|
|
|
|
if 'active' in data_dict:
|
|
|
|
source.active = data['active']
|
|
|
|
|
|
|
|
source.save()
|
2012-06-08 18:09:22 +02:00
|
|
|
log.info('Harvest source created: %s', source.id)
|
2012-02-29 16:20:35 +01:00
|
|
|
|
|
|
|
return harvest_source_dictize(source,context)
|
|
|
|
|
2012-10-29 18:15:02 +01:00
|
|
|
|
2012-02-29 16:20:35 +01:00
|
|
|
def harvest_job_create(context,data_dict):
|
2012-06-08 18:09:22 +02:00
|
|
|
log.info('Harvest job create: %r', data_dict)
|
2012-03-01 13:02:16 +01:00
|
|
|
check_access('harvest_job_create',context,data_dict)
|
|
|
|
|
2012-02-29 16:20:35 +01:00
|
|
|
source_id = data_dict['source_id']
|
|
|
|
|
|
|
|
# Check if source exists
|
|
|
|
source = HarvestSource.get(source_id)
|
|
|
|
if not source:
|
2012-06-08 18:09:22 +02:00
|
|
|
log.warn('Harvest source %s does not exist', source_id)
|
2012-02-29 16:20:35 +01:00
|
|
|
raise NotFound('Harvest source %s does not exist' % source_id)
|
|
|
|
|
|
|
|
# Check if the source is active
|
|
|
|
if not source.active:
|
2012-06-08 18:09:22 +02:00
|
|
|
log.warn('Harvest job cannot be created for inactive source %s', source_id)
|
2012-02-29 16:20:35 +01:00
|
|
|
raise Exception('Can not create jobs on inactive sources')
|
|
|
|
|
|
|
|
# Check if there already is an unrun job for this source
|
|
|
|
data_dict ={
|
|
|
|
'source_id':source_id,
|
|
|
|
'status':u'New'
|
|
|
|
}
|
|
|
|
exists = harvest_job_list(context,data_dict)
|
|
|
|
if len(exists):
|
2012-06-08 18:09:22 +02:00
|
|
|
log.warn('There is already an unrun job %r for this source %s', exists, source_id)
|
2012-10-29 18:15:02 +01:00
|
|
|
raise HarvestJobExists('There already is an unrun job for this source')
|
2012-02-29 16:20:35 +01:00
|
|
|
|
|
|
|
job = HarvestJob()
|
|
|
|
job.source = source
|
|
|
|
|
|
|
|
job.save()
|
2012-06-08 18:09:22 +02:00
|
|
|
log.info('Harvest job saved %s', job.id)
|
2012-02-29 16:20:35 +01:00
|
|
|
return harvest_job_dictize(job,context)
|
|
|
|
|
|
|
|
def harvest_job_create_all(context,data_dict):
|
2012-06-08 18:09:22 +02:00
|
|
|
log.info('Harvest job create all: %r', data_dict)
|
2012-03-01 13:02:16 +01:00
|
|
|
check_access('harvest_job_create_all',context,data_dict)
|
|
|
|
|
2012-02-29 16:20:35 +01:00
|
|
|
data_dict.update({'only_active':True})
|
|
|
|
|
|
|
|
# Get all active sources
|
|
|
|
sources = harvest_source_list(context,data_dict)
|
|
|
|
jobs = []
|
|
|
|
# Create a new job for each, if there isn't already one
|
|
|
|
for source in sources:
|
|
|
|
data_dict ={
|
|
|
|
'source_id':source['id'],
|
|
|
|
'status':u'New'
|
|
|
|
}
|
|
|
|
|
|
|
|
exists = harvest_job_list(context,data_dict)
|
|
|
|
if len(exists):
|
|
|
|
continue
|
|
|
|
|
|
|
|
job = harvest_job_create(context,{'source_id':source['id']})
|
|
|
|
jobs.append(job)
|
|
|
|
|
2012-06-08 18:09:22 +02:00
|
|
|
log.info('Created jobs for %i harvest sources', len(jobs))
|
2012-02-29 16:20:35 +01:00
|
|
|
return jobs
|
|
|
|
|
|
|
|
def _error_summary(error_dict):
|
|
|
|
error_summary = {}
|
|
|
|
for key, error in error_dict.iteritems():
|
|
|
|
error_summary[_prettify(key)] = error[0]
|
|
|
|
return error_summary
|
|
|
|
|
|
|
|
def _prettify(field_name):
|
|
|
|
field_name = re.sub('(?<!\w)[Uu]rl(?!\w)', 'URL', field_name.replace('_', ' ').capitalize())
|
|
|
|
return field_name.replace('_', ' ')
|
|
|
|
|