Script for creating harvest source datasets on old versions

The way we check whether datasets need to be created might need to be
improved.
This commit is contained in:
amercader 2012-12-05 18:54:28 +00:00
parent 22ec9cb5af
commit fcbe6aa6de
2 changed files with 90 additions and 5 deletions

View File

@ -1,5 +1,6 @@
import logging import logging
import datetime import datetime
import uuid
from sqlalchemy import event from sqlalchemy import event
from sqlalchemy import distinct from sqlalchemy import distinct
@ -11,11 +12,12 @@ from sqlalchemy.engine.reflection import Inspector
from sqlalchemy.orm import backref, relation from sqlalchemy.orm import backref, relation
from ckan import model from ckan import model
from ckan import logic
from ckan.model.meta import metadata, mapper, Session from ckan.model.meta import metadata, mapper, Session
from ckan.model.types import make_uuid from ckan.model.types import make_uuid
from ckan.model.domain_object import DomainObject from ckan.model.domain_object import DomainObject
from ckan.model.package import Package from ckan.model.package import Package
from ckan.lib.munge import munge_title_to_name
UPDATE_FREQUENCIES = ['MANUAL','MONTHLY','WEEKLY','BIWEEKLY','DAILY', 'ALWAYS'] UPDATE_FREQUENCIES = ['MANUAL','MONTHLY','WEEKLY','BIWEEKLY','DAILY', 'ALWAYS']
@ -69,6 +71,14 @@ def setup():
log.debug('Harvest tables need to be updated') log.debug('Harvest tables need to be updated')
migrate_v3() migrate_v3()
# Check if this instance has harvest source datasets
source_id = Session.query(HarvestSource.id).first()
if source_id:
pkg = Session.query(model.Package).filter(model.Package.id==source_id[0]).first()
if not pkg:
log.debug('Creating harvest source datasets from existing sources')
migrate_v3_create_datasets()
else: else:
log.debug('Harvest table creation deferred') log.debug('Harvest table creation deferred')
@ -383,3 +393,69 @@ UPDATE harvest_source set frequency = 'MANUAL';
Session.commit() Session.commit()
log.info('Harvest tables migrated to v3') log.info('Harvest tables migrated to v3')
def migrate_v3_create_datasets():
import pylons
from paste.registry import Registry
from ckan.lib.cli import MockTranslator
registry = Registry()
registry.prepare()
registry.register(pylons.translator, MockTranslator())
sources = model.Session.query(HarvestSource).all()
if not sources:
log.debug('No harvest sources to migrate')
return
site_user_name = logic.get_action('get_site_user')({'model': model, 'ignore_auth': True},{})['name']
context = {'model': model,
'session': model.Session,
'user': site_user_name, # TODO: auth of existing sources?
'return_id_only': True,
'extras_as_string': True,
}
def gen_new_name(title):
name = munge_title_to_name(title).replace('_', '-')
while '--' in name:
name = name.replace('--', '-')
pkg_obj = Session.query(Package).filter(Package.name == name).first()
if pkg_obj:
return name + str(uuid.uuid4())[:5]
else:
return name
for source in sources:
if 'id' in context:
del context['id']
if 'package' in context:
del context['package']
# Check if package already exists
try:
logic.get_action('package_show')(context, {'id': source.id})
continue
except logic.NotFound:
pass
package_dict = {
'id': source.id,
'name': gen_new_name(source.title) if source.title else source.id,
'title': source.title if source.title else source.url,
'notes': source.description,
'url': source.url,
'type': 'harvest_source',
'source_type': source.type,
'config': source.config,
'frequency': source.frequency,
}
context['message'] = 'Created package for harvest source {0}'.format(source.id)
try:
logic.get_action('package_create')(context, package_dict)
log.info('Created new package for source {0} ({1})'.format(source.id, source.url))
except logic.ValidationError,e:
log.error('Validation Error: %s' % str(e.error_summary))

View File

@ -3,11 +3,9 @@ from logging import getLogger
from pylons import config from pylons import config
from ckan import logic from ckan import logic
from ckan.logic.converters import convert_from_extras
import ckan.plugins as p import ckan.plugins as p
from ckan.lib.plugins import DefaultDatasetForm from ckan.lib.plugins import DefaultDatasetForm
from ckan.lib.navl import dictization_functions from ckan.lib.navl import dictization_functions
from ckan.lib.navl.validators import ignore_missing
from ckanext.harvest.model import setup as model_setup from ckanext.harvest.model import setup as model_setup
from ckanext.harvest.model import HarvestSource, HarvestJob from ckanext.harvest.model import HarvestSource, HarvestJob
@ -29,10 +27,12 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
p.implements(p.IDatasetForm) p.implements(p.IDatasetForm)
p.implements(p.IPackageController, inherit=True) p.implements(p.IPackageController, inherit=True)
startup = False
## IPackageController ## IPackageController
def after_create(self, data_dict): def after_create(self, data_dict):
if 'type' in data_dict and data_dict['type'] == DATASET_TYPE_NAME: if 'type' in data_dict and data_dict['type'] == DATASET_TYPE_NAME and not self.startup:
# Create an actual HarvestSource object # Create an actual HarvestSource object
_create_harvest_source_object(data_dict) _create_harvest_source_object(data_dict)
@ -86,8 +86,13 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
Similar to form_to_db_schema but with further options to allow Similar to form_to_db_schema but with further options to allow
slightly different schemas, eg for creation or deletion on the API. slightly different schemas, eg for creation or deletion on the API.
''' '''
schema = self.form_to_db_schema()
return self.form_to_db_schema() # Tweak the default schema to allow using the same id as the harvest source
# if creating datasets for the harvest sources
if self.startup:
schema['id'] = [unicode]
return schema
def form_to_db_schema(self): def form_to_db_schema(self):
''' '''
@ -129,6 +134,8 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
def configure(self, config): def configure(self, config):
self.startup = True
auth_profile = config.get('ckan.harvest.auth.profile',None) auth_profile = config.get('ckan.harvest.auth.profile',None)
if auth_profile: if auth_profile:
@ -151,6 +158,8 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
# Setup harvest model # Setup harvest model
model_setup() model_setup()
self.startup = False
def before_map(self, map): def before_map(self, map):
controller = 'ckanext.harvest.controllers.view:ViewController' controller = 'ckanext.harvest.controllers.view:ViewController'