Script for creating harvest source datasets on old versions
The way we check whether datasets need to be created might need to be improved.
This commit is contained in:
parent
22ec9cb5af
commit
fcbe6aa6de
|
@ -1,5 +1,6 @@
|
||||||
import logging
|
import logging
|
||||||
import datetime
|
import datetime
|
||||||
|
import uuid
|
||||||
|
|
||||||
from sqlalchemy import event
|
from sqlalchemy import event
|
||||||
from sqlalchemy import distinct
|
from sqlalchemy import distinct
|
||||||
|
@ -11,11 +12,12 @@ from sqlalchemy.engine.reflection import Inspector
|
||||||
from sqlalchemy.orm import backref, relation
|
from sqlalchemy.orm import backref, relation
|
||||||
|
|
||||||
from ckan import model
|
from ckan import model
|
||||||
|
from ckan import logic
|
||||||
from ckan.model.meta import metadata, mapper, Session
|
from ckan.model.meta import metadata, mapper, Session
|
||||||
from ckan.model.types import make_uuid
|
from ckan.model.types import make_uuid
|
||||||
from ckan.model.domain_object import DomainObject
|
from ckan.model.domain_object import DomainObject
|
||||||
from ckan.model.package import Package
|
from ckan.model.package import Package
|
||||||
|
from ckan.lib.munge import munge_title_to_name
|
||||||
|
|
||||||
UPDATE_FREQUENCIES = ['MANUAL','MONTHLY','WEEKLY','BIWEEKLY','DAILY', 'ALWAYS']
|
UPDATE_FREQUENCIES = ['MANUAL','MONTHLY','WEEKLY','BIWEEKLY','DAILY', 'ALWAYS']
|
||||||
|
|
||||||
|
@ -69,6 +71,14 @@ def setup():
|
||||||
log.debug('Harvest tables need to be updated')
|
log.debug('Harvest tables need to be updated')
|
||||||
migrate_v3()
|
migrate_v3()
|
||||||
|
|
||||||
|
# Check if this instance has harvest source datasets
|
||||||
|
source_id = Session.query(HarvestSource.id).first()
|
||||||
|
if source_id:
|
||||||
|
pkg = Session.query(model.Package).filter(model.Package.id==source_id[0]).first()
|
||||||
|
if not pkg:
|
||||||
|
log.debug('Creating harvest source datasets from existing sources')
|
||||||
|
migrate_v3_create_datasets()
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log.debug('Harvest table creation deferred')
|
log.debug('Harvest table creation deferred')
|
||||||
|
|
||||||
|
@ -383,3 +393,69 @@ UPDATE harvest_source set frequency = 'MANUAL';
|
||||||
Session.commit()
|
Session.commit()
|
||||||
log.info('Harvest tables migrated to v3')
|
log.info('Harvest tables migrated to v3')
|
||||||
|
|
||||||
|
def migrate_v3_create_datasets():
|
||||||
|
import pylons
|
||||||
|
from paste.registry import Registry
|
||||||
|
|
||||||
|
from ckan.lib.cli import MockTranslator
|
||||||
|
registry = Registry()
|
||||||
|
registry.prepare()
|
||||||
|
registry.register(pylons.translator, MockTranslator())
|
||||||
|
|
||||||
|
sources = model.Session.query(HarvestSource).all()
|
||||||
|
|
||||||
|
if not sources:
|
||||||
|
log.debug('No harvest sources to migrate')
|
||||||
|
return
|
||||||
|
|
||||||
|
site_user_name = logic.get_action('get_site_user')({'model': model, 'ignore_auth': True},{})['name']
|
||||||
|
|
||||||
|
context = {'model': model,
|
||||||
|
'session': model.Session,
|
||||||
|
'user': site_user_name, # TODO: auth of existing sources?
|
||||||
|
'return_id_only': True,
|
||||||
|
'extras_as_string': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
def gen_new_name(title):
|
||||||
|
name = munge_title_to_name(title).replace('_', '-')
|
||||||
|
while '--' in name:
|
||||||
|
name = name.replace('--', '-')
|
||||||
|
pkg_obj = Session.query(Package).filter(Package.name == name).first()
|
||||||
|
if pkg_obj:
|
||||||
|
return name + str(uuid.uuid4())[:5]
|
||||||
|
else:
|
||||||
|
return name
|
||||||
|
|
||||||
|
for source in sources:
|
||||||
|
if 'id' in context:
|
||||||
|
del context['id']
|
||||||
|
if 'package' in context:
|
||||||
|
del context['package']
|
||||||
|
|
||||||
|
# Check if package already exists
|
||||||
|
|
||||||
|
try:
|
||||||
|
logic.get_action('package_show')(context, {'id': source.id})
|
||||||
|
continue
|
||||||
|
except logic.NotFound:
|
||||||
|
pass
|
||||||
|
|
||||||
|
package_dict = {
|
||||||
|
'id': source.id,
|
||||||
|
'name': gen_new_name(source.title) if source.title else source.id,
|
||||||
|
'title': source.title if source.title else source.url,
|
||||||
|
'notes': source.description,
|
||||||
|
'url': source.url,
|
||||||
|
'type': 'harvest_source',
|
||||||
|
'source_type': source.type,
|
||||||
|
'config': source.config,
|
||||||
|
'frequency': source.frequency,
|
||||||
|
}
|
||||||
|
context['message'] = 'Created package for harvest source {0}'.format(source.id)
|
||||||
|
try:
|
||||||
|
logic.get_action('package_create')(context, package_dict)
|
||||||
|
log.info('Created new package for source {0} ({1})'.format(source.id, source.url))
|
||||||
|
except logic.ValidationError,e:
|
||||||
|
log.error('Validation Error: %s' % str(e.error_summary))
|
||||||
|
|
||||||
|
|
|
@ -3,11 +3,9 @@ from logging import getLogger
|
||||||
from pylons import config
|
from pylons import config
|
||||||
|
|
||||||
from ckan import logic
|
from ckan import logic
|
||||||
from ckan.logic.converters import convert_from_extras
|
|
||||||
import ckan.plugins as p
|
import ckan.plugins as p
|
||||||
from ckan.lib.plugins import DefaultDatasetForm
|
from ckan.lib.plugins import DefaultDatasetForm
|
||||||
from ckan.lib.navl import dictization_functions
|
from ckan.lib.navl import dictization_functions
|
||||||
from ckan.lib.navl.validators import ignore_missing
|
|
||||||
|
|
||||||
from ckanext.harvest.model import setup as model_setup
|
from ckanext.harvest.model import setup as model_setup
|
||||||
from ckanext.harvest.model import HarvestSource, HarvestJob
|
from ckanext.harvest.model import HarvestSource, HarvestJob
|
||||||
|
@ -29,10 +27,12 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
|
||||||
p.implements(p.IDatasetForm)
|
p.implements(p.IDatasetForm)
|
||||||
p.implements(p.IPackageController, inherit=True)
|
p.implements(p.IPackageController, inherit=True)
|
||||||
|
|
||||||
|
startup = False
|
||||||
|
|
||||||
## IPackageController
|
## IPackageController
|
||||||
|
|
||||||
def after_create(self, data_dict):
|
def after_create(self, data_dict):
|
||||||
if 'type' in data_dict and data_dict['type'] == DATASET_TYPE_NAME:
|
if 'type' in data_dict and data_dict['type'] == DATASET_TYPE_NAME and not self.startup:
|
||||||
# Create an actual HarvestSource object
|
# Create an actual HarvestSource object
|
||||||
_create_harvest_source_object(data_dict)
|
_create_harvest_source_object(data_dict)
|
||||||
|
|
||||||
|
@ -86,8 +86,13 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
|
||||||
Similar to form_to_db_schema but with further options to allow
|
Similar to form_to_db_schema but with further options to allow
|
||||||
slightly different schemas, eg for creation or deletion on the API.
|
slightly different schemas, eg for creation or deletion on the API.
|
||||||
'''
|
'''
|
||||||
|
schema = self.form_to_db_schema()
|
||||||
|
|
||||||
return self.form_to_db_schema()
|
# Tweak the default schema to allow using the same id as the harvest source
|
||||||
|
# if creating datasets for the harvest sources
|
||||||
|
if self.startup:
|
||||||
|
schema['id'] = [unicode]
|
||||||
|
return schema
|
||||||
|
|
||||||
def form_to_db_schema(self):
|
def form_to_db_schema(self):
|
||||||
'''
|
'''
|
||||||
|
@ -129,6 +134,8 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
|
||||||
|
|
||||||
def configure(self, config):
|
def configure(self, config):
|
||||||
|
|
||||||
|
self.startup = True
|
||||||
|
|
||||||
auth_profile = config.get('ckan.harvest.auth.profile',None)
|
auth_profile = config.get('ckan.harvest.auth.profile',None)
|
||||||
|
|
||||||
if auth_profile:
|
if auth_profile:
|
||||||
|
@ -151,6 +158,8 @@ class Harvest(p.SingletonPlugin, DefaultDatasetForm):
|
||||||
# Setup harvest model
|
# Setup harvest model
|
||||||
model_setup()
|
model_setup()
|
||||||
|
|
||||||
|
self.startup = False
|
||||||
|
|
||||||
def before_map(self, map):
|
def before_map(self, map):
|
||||||
|
|
||||||
controller = 'ckanext.harvest.controllers.view:ViewController'
|
controller = 'ckanext.harvest.controllers.view:ViewController'
|
||||||
|
|
Loading…
Reference in New Issue