[#5] Add method for returning the original URL for a document
Harvesters implementing IHarvester can define a `get_original_url` method that should return a URL pointing to the original location of a document in the remote server. If present, this URL will be used on the job reports. Examples: * For a CKAN record: http://{ckan-instance}/api/rest/{guid} * For a WAF record: http://{waf-root}/{file-name} * For a CSW record: http://{csw-server}/?Request=GetElementById&Id={guid}&...
This commit is contained in:
parent
d4b6dcb7f6
commit
676c7d34b6
|
@ -161,22 +161,38 @@ def harvest_job_report(context, data_dict):
|
|||
if not job:
|
||||
raise NotFound
|
||||
|
||||
q = model.Session.query(harvest_model.HarvestObjectError) \
|
||||
# Check if the harvester for this job's source has a method for returning
|
||||
# the URL to the original document
|
||||
original_url_builder = None
|
||||
for harvester in PluginImplementations(IHarvester):
|
||||
if harvester.info()['name'] == job.source.type:
|
||||
if hasattr(harvester, 'get_original_url'):
|
||||
original_url_builder = harvester.get_original_url
|
||||
|
||||
q = model.Session.query(harvest_model.HarvestObjectError, harvest_model.HarvestObject.guid) \
|
||||
.join(harvest_model.HarvestObject) \
|
||||
.filter(harvest_model.HarvestObject.harvest_job_id==job.id) \
|
||||
.order_by(harvest_model.HarvestObjectError.harvest_object_id)
|
||||
|
||||
errors= {}
|
||||
for error in q.all():
|
||||
if not error.harvest_object_id in errors:
|
||||
errors[error.harvest_object_id] = []
|
||||
errors[error.harvest_object_id].append({
|
||||
report = {}
|
||||
for error, guid in q.all():
|
||||
if not error.harvest_object_id in report:
|
||||
report[error.harvest_object_id] = {
|
||||
'guid': guid,
|
||||
'errors': []
|
||||
}
|
||||
if original_url_builder:
|
||||
url = original_url_builder(error.harvest_object_id)
|
||||
if url:
|
||||
report[error.harvest_object_id]['original_url'] = url
|
||||
|
||||
report[error.harvest_object_id]['errors'].append({
|
||||
'message': error.message,
|
||||
'line': error.line,
|
||||
'type': error.stage
|
||||
})
|
||||
|
||||
return {'errors': errors}
|
||||
return report
|
||||
|
||||
def harvest_job_list(context,data_dict):
|
||||
|
||||
|
|
|
@ -33,13 +33,23 @@
|
|||
|
||||
|
||||
<div style='font-size: 1.5em; margin: 1em 0;'>
|
||||
{{ c.job_report.errors.keys()|length}} documents with errors
|
||||
{{ c.job_report.keys()|length}} documents with errors
|
||||
</div>
|
||||
|
||||
{% for harvest_object_id in c.job_report.errors.keys() %}
|
||||
{% for harvest_object_id in c.job_report.keys() %}
|
||||
<div>
|
||||
<div>{{ harvest_object_id }}</div>
|
||||
{% for error in c.job_report.errors[harvest_object_id] %}
|
||||
<div>
|
||||
{{ c.job_report[harvest_object_id].guid }}
|
||||
|
||||
{% if 'original_url' in c.job_report[harvest_object_id] %}
|
||||
(<a href="{{ c.job_report[harvest_object_id].original_url }}">{{ _('Remote content') }}</a>)
|
||||
{% endif %}
|
||||
|
||||
({{ h.link_for_harvest_object(harvest_object_id,text=_('Local content')) }})
|
||||
|
||||
|
||||
</div>
|
||||
{% for error in c.job_report[harvest_object_id].errors %}
|
||||
<div style="margin-left: 2em">{{ error.message }}
|
||||
{% if error.line %}
|
||||
<span>(line {{error.line}})</span>
|
||||
|
|
Loading…
Reference in New Issue