Fix migration for old harvests so that ones that errored are correctly marked. Added helpful comments in model.
This commit is contained in:
parent
3e21ea4f82
commit
2da918c2e4
|
@ -196,12 +196,21 @@ def define_harvester_tables():
|
|||
Column('gather_finished', types.DateTime),
|
||||
Column('finished', types.DateTime),
|
||||
Column('source_id', types.UnicodeText, ForeignKey('harvest_source.id')),
|
||||
# status: New, Running, Finished
|
||||
Column('status', types.UnicodeText, default=u'New', nullable=False),
|
||||
)
|
||||
# Was harvested_document
|
||||
# A harvest_object contains a representation of one dataset during a
|
||||
# particular harvest
|
||||
harvest_object_table = Table('harvest_object', metadata,
|
||||
Column('id', types.UnicodeText, primary_key=True, default=make_uuid),
|
||||
# The guid is the 'identity' of the dataset, according to the source.
|
||||
# So if you reharvest it, then the harvester knows which dataset to
|
||||
# update because of this identity. The identity needs to be unique
|
||||
# within this CKAN.
|
||||
Column('guid', types.UnicodeText, default=u''),
|
||||
# When you harvest a dataset multiple times, only the latest
|
||||
# successfully imported harvest_object should be flagged 'current'.
|
||||
# The import_stage reads and writes it.
|
||||
Column('current',types.Boolean,default=False),
|
||||
Column('gathered', types.DateTime, default=datetime.datetime.utcnow),
|
||||
Column('fetch_started', types.DateTime),
|
||||
|
@ -209,6 +218,7 @@ def define_harvester_tables():
|
|||
Column('fetch_finished', types.DateTime),
|
||||
Column('import_started', types.DateTime),
|
||||
Column('import_finished', types.DateTime),
|
||||
# state: WAITING, FETCH, IMPORT, COMPLETE, ERROR
|
||||
Column('state', types.UnicodeText, default=u'WAITING'),
|
||||
Column('metadata_modified_date', types.DateTime),
|
||||
Column('retry_times',types.Integer, default=0),
|
||||
|
@ -391,9 +401,11 @@ ALTER TABLE harvest_object_extra
|
|||
ALTER TABLE harvest_object_extra
|
||||
ADD CONSTRAINT harvest_object_extra_harvest_object_id_fkey FOREIGN KEY (harvest_object_id) REFERENCES harvest_object(id);
|
||||
|
||||
UPDATE harvest_object set state = 'COMPLETE';
|
||||
UPDATE harvest_object set state = 'COMPLETE' where package_id is not null;
|
||||
UPDATE harvest_object set state = 'ERROR' where package_id is null;
|
||||
UPDATE harvest_object set retry_times = 0;
|
||||
UPDATE harvest_object set report_status = 'new';
|
||||
UPDATE harvest_object set report_status = 'updated' where package_id is not null;
|
||||
UPDATE harvest_object set report_status = 'errored' where package_id is null;
|
||||
UPDATE harvest_source set frequency = 'MANUAL';
|
||||
|
||||
ALTER TABLE harvest_object DROP CONSTRAINT harvest_object_package_id_fkey;
|
||||
|
|
|
@ -250,7 +250,12 @@ def gather_callback(channel, method, header, body):
|
|||
log.debug('Sent {0} objects to the fetch queue'.format(len(harvest_object_ids)))
|
||||
|
||||
if not harvester_found:
|
||||
msg = 'No harvester could be found for source type %s' % job.source.type
|
||||
# This can occur if you:
|
||||
# * remove a harvester and it still has sources that are then
|
||||
# refreshed
|
||||
# * add a new harvester and restart CKAN but not the gather
|
||||
# queue.
|
||||
msg = 'System error - No harvester could be found for source type %s' % job.source.type
|
||||
err = HarvestGatherError(message=msg,job=job)
|
||||
err.save()
|
||||
log.error(msg)
|
||||
|
|
Loading…
Reference in New Issue