From fcbe6aa6de57f340fed1d49fa8d9104fd22bde7f Mon Sep 17 00:00:00 2001 From: amercader Date: Wed, 5 Dec 2012 18:54:28 +0000 Subject: [PATCH] Script for creating harvest source datasets on old versions The way we check whether datasets need to be created might need to be improved. --- ckanext/harvest/model/__init__.py | 78 ++++++++++++++++++++++++++++++- ckanext/harvest/plugin.py | 17 +++++-- 2 files changed, 90 insertions(+), 5 deletions(-) diff --git a/ckanext/harvest/model/__init__.py b/ckanext/harvest/model/__init__.py index f75171e..46a09e8 100644 --- a/ckanext/harvest/model/__init__.py +++ b/ckanext/harvest/model/__init__.py @@ -1,5 +1,6 @@ import logging import datetime +import uuid from sqlalchemy import event from sqlalchemy import distinct @@ -11,11 +12,12 @@ from sqlalchemy.engine.reflection import Inspector from sqlalchemy.orm import backref, relation from ckan import model +from ckan import logic from ckan.model.meta import metadata, mapper, Session from ckan.model.types import make_uuid from ckan.model.domain_object import DomainObject from ckan.model.package import Package - +from ckan.lib.munge import munge_title_to_name UPDATE_FREQUENCIES = ['MANUAL','MONTHLY','WEEKLY','BIWEEKLY','DAILY', 'ALWAYS'] @@ -69,6 +71,14 @@ def setup(): log.debug('Harvest tables need to be updated') migrate_v3() + # Check if this instance has harvest source datasets + source_id = Session.query(HarvestSource.id).first() + if source_id: + pkg = Session.query(model.Package).filter(model.Package.id==source_id[0]).first() + if not pkg: + log.debug('Creating harvest source datasets from existing sources') + migrate_v3_create_datasets() + else: log.debug('Harvest table creation deferred') @@ -383,3 +393,69 @@ UPDATE harvest_source set frequency = 'MANUAL'; Session.commit() log.info('Harvest tables migrated to v3') +def migrate_v3_create_datasets(): + import pylons + from paste.registry import Registry + + from ckan.lib.cli import MockTranslator + registry = Registry() + registry.prepare() + registry.register(pylons.translator, MockTranslator()) + + sources = model.Session.query(HarvestSource).all() + + if not sources: + log.debug('No harvest sources to migrate') + return + + site_user_name = logic.get_action('get_site_user')({'model': model, 'ignore_auth': True},{})['name'] + + context = {'model': model, + 'session': model.Session, + 'user': site_user_name, # TODO: auth of existing sources? + 'return_id_only': True, + 'extras_as_string': True, + } + + def gen_new_name(title): + name = munge_title_to_name(title).replace('_', '-') + while '--' in name: + name = name.replace('--', '-') + pkg_obj = Session.query(Package).filter(Package.name == name).first() + if pkg_obj: + return name + str(uuid.uuid4())[:5] + else: + return name + + for source in sources: + if 'id' in context: + del context['id'] + if 'package' in context: + del context['package'] + + # Check if package already exists + + try: + logic.get_action('package_show')(context, {'id': source.id}) + continue + except logic.NotFound: + pass + + package_dict = { + 'id': source.id, + 'name': gen_new_name(source.title) if source.title else source.id, + 'title': source.title if source.title else source.url, + 'notes': source.description, + 'url': source.url, + 'type': 'harvest_source', + 'source_type': source.type, + 'config': source.config, + 'frequency': source.frequency, + } + context['message'] = 'Created package for harvest source {0}'.format(source.id) + try: + logic.get_action('package_create')(context, package_dict) + log.info('Created new package for source {0} ({1})'.format(source.id, source.url)) + except logic.ValidationError,e: + log.error('Validation Error: %s' % str(e.error_summary)) + diff --git a/ckanext/harvest/plugin.py b/ckanext/harvest/plugin.py index b6bc3f5..da72648 100644 --- a/ckanext/harvest/plugin.py +++ b/ckanext/harvest/plugin.py @@ -3,11 +3,9 @@ from logging import getLogger from pylons import config from ckan import logic -from ckan.logic.converters import convert_from_extras import ckan.plugins as p from ckan.lib.plugins import DefaultDatasetForm from ckan.lib.navl import dictization_functions -from ckan.lib.navl.validators import ignore_missing from ckanext.harvest.model import setup as model_setup from ckanext.harvest.model import HarvestSource, HarvestJob @@ -29,10 +27,12 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm): p.implements(p.IDatasetForm) p.implements(p.IPackageController, inherit=True) + startup = False + ## IPackageController def after_create(self, data_dict): - if 'type' in data_dict and data_dict['type'] == DATASET_TYPE_NAME: + if 'type' in data_dict and data_dict['type'] == DATASET_TYPE_NAME and not self.startup: # Create an actual HarvestSource object _create_harvest_source_object(data_dict) @@ -86,8 +86,13 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm): Similar to form_to_db_schema but with further options to allow slightly different schemas, eg for creation or deletion on the API. ''' + schema = self.form_to_db_schema() - return self.form_to_db_schema() + # Tweak the default schema to allow using the same id as the harvest source + # if creating datasets for the harvest sources + if self.startup: + schema['id'] = [unicode] + return schema def form_to_db_schema(self): ''' @@ -129,6 +134,8 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm): def configure(self, config): + self.startup = True + auth_profile = config.get('ckan.harvest.auth.profile',None) if auth_profile: @@ -151,6 +158,8 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm): # Setup harvest model model_setup() + self.startup = False + def before_map(self, map): controller = 'ckanext.harvest.controllers.view:ViewController'