Script for creating harvest source datasets on old versions

The way we check whether datasets need to be created might need to be
improved.
This commit is contained in:
amercader 2012-12-05 18:54:28 +00:00
parent 22ec9cb5af
commit fcbe6aa6de
2 changed files with 90 additions and 5 deletions

View File

@ -1,5 +1,6 @@
import logging
import datetime
import uuid
from sqlalchemy import event
from sqlalchemy import distinct
@ -11,11 +12,12 @@ from sqlalchemy.engine.reflection import Inspector
from sqlalchemy.orm import backref, relation
from ckan import model
from ckan import logic
from ckan.model.meta import metadata, mapper, Session
from ckan.model.types import make_uuid
from ckan.model.domain_object import DomainObject
from ckan.model.package import Package
from ckan.lib.munge import munge_title_to_name
UPDATE_FREQUENCIES = ['MANUAL','MONTHLY','WEEKLY','BIWEEKLY','DAILY', 'ALWAYS']
@ -69,6 +71,14 @@ def setup():
log.debug('Harvest tables need to be updated')
migrate_v3()
# Check if this instance has harvest source datasets
source_id = Session.query(HarvestSource.id).first()
if source_id:
pkg = Session.query(model.Package).filter(model.Package.id==source_id[0]).first()
if not pkg:
log.debug('Creating harvest source datasets from existing sources')
migrate_v3_create_datasets()
else:
log.debug('Harvest table creation deferred')
@ -383,3 +393,69 @@ UPDATE harvest_source set frequency = 'MANUAL';
Session.commit()
log.info('Harvest tables migrated to v3')
def migrate_v3_create_datasets():
import pylons
from paste.registry import Registry
from ckan.lib.cli import MockTranslator
registry = Registry()
registry.prepare()
registry.register(pylons.translator, MockTranslator())
sources = model.Session.query(HarvestSource).all()
if not sources:
log.debug('No harvest sources to migrate')
return
site_user_name = logic.get_action('get_site_user')({'model': model, 'ignore_auth': True},{})['name']
context = {'model': model,
'session': model.Session,
'user': site_user_name, # TODO: auth of existing sources?
'return_id_only': True,
'extras_as_string': True,
}
def gen_new_name(title):
name = munge_title_to_name(title).replace('_', '-')
while '--' in name:
name = name.replace('--', '-')
pkg_obj = Session.query(Package).filter(Package.name == name).first()
if pkg_obj:
return name + str(uuid.uuid4())[:5]
else:
return name
for source in sources:
if 'id' in context:
del context['id']
if 'package' in context:
del context['package']
# Check if package already exists
try:
logic.get_action('package_show')(context, {'id': source.id})
continue
except logic.NotFound:
pass
package_dict = {
'id': source.id,
'name': gen_new_name(source.title) if source.title else source.id,
'title': source.title if source.title else source.url,
'notes': source.description,
'url': source.url,
'type': 'harvest_source',
'source_type': source.type,
'config': source.config,
'frequency': source.frequency,
}
context['message'] = 'Created package for harvest source {0}'.format(source.id)
try:
logic.get_action('package_create')(context, package_dict)
log.info('Created new package for source {0} ({1})'.format(source.id, source.url))
except logic.ValidationError,e:
log.error('Validation Error: %s' % str(e.error_summary))

View File

@ -3,11 +3,9 @@ from logging import getLogger
from pylons import config
from ckan import logic
from ckan.logic.converters import convert_from_extras
import ckan.plugins as p
from ckan.lib.plugins import DefaultDatasetForm
from ckan.lib.navl import dictization_functions
from ckan.lib.navl.validators import ignore_missing
from ckanext.harvest.model import setup as model_setup
from ckanext.harvest.model import HarvestSource, HarvestJob
@ -29,10 +27,12 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
p.implements(p.IDatasetForm)
p.implements(p.IPackageController, inherit=True)
startup = False
## IPackageController
def after_create(self, data_dict):
if 'type' in data_dict and data_dict['type'] == DATASET_TYPE_NAME:
if 'type' in data_dict and data_dict['type'] == DATASET_TYPE_NAME and not self.startup:
# Create an actual HarvestSource object
_create_harvest_source_object(data_dict)
@ -86,8 +86,13 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
Similar to form_to_db_schema but with further options to allow
slightly different schemas, eg for creation or deletion on the API.
'''
schema = self.form_to_db_schema()
return self.form_to_db_schema()
# Tweak the default schema to allow using the same id as the harvest source
# if creating datasets for the harvest sources
if self.startup:
schema['id'] = [unicode]
return schema
def form_to_db_schema(self):
'''
@ -129,6 +134,8 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
def configure(self, config):
self.startup = True
auth_profile = config.get('ckan.harvest.auth.profile',None)
if auth_profile:
@ -151,6 +158,8 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
# Setup harvest model
model_setup()
self.startup = False
def before_map(self, map):
controller = 'ckanext.harvest.controllers.view:ViewController'