[#5] Add method for returning the original URL for a document
Harvesters implementing IHarvester can define a `get_original_url` method that should return a URL pointing to the original location of a document in the remote server. If present, this URL will be used on the job reports. Examples: * For a CKAN record: http://{ckan-instance}/api/rest/{guid} * For a WAF record: http://{waf-root}/{file-name} * For a CSW record: http://{csw-server}/?Request=GetElementById&Id={guid}&...
This commit is contained in:
parent
d4b6dcb7f6
commit
676c7d34b6
|
@ -161,22 +161,38 @@ def harvest_job_report(context, data_dict):
|
||||||
if not job:
|
if not job:
|
||||||
raise NotFound
|
raise NotFound
|
||||||
|
|
||||||
q = model.Session.query(harvest_model.HarvestObjectError) \
|
# Check if the harvester for this job's source has a method for returning
|
||||||
|
# the URL to the original document
|
||||||
|
original_url_builder = None
|
||||||
|
for harvester in PluginImplementations(IHarvester):
|
||||||
|
if harvester.info()['name'] == job.source.type:
|
||||||
|
if hasattr(harvester, 'get_original_url'):
|
||||||
|
original_url_builder = harvester.get_original_url
|
||||||
|
|
||||||
|
q = model.Session.query(harvest_model.HarvestObjectError, harvest_model.HarvestObject.guid) \
|
||||||
.join(harvest_model.HarvestObject) \
|
.join(harvest_model.HarvestObject) \
|
||||||
.filter(harvest_model.HarvestObject.harvest_job_id==job.id) \
|
.filter(harvest_model.HarvestObject.harvest_job_id==job.id) \
|
||||||
.order_by(harvest_model.HarvestObjectError.harvest_object_id)
|
.order_by(harvest_model.HarvestObjectError.harvest_object_id)
|
||||||
|
|
||||||
errors= {}
|
report = {}
|
||||||
for error in q.all():
|
for error, guid in q.all():
|
||||||
if not error.harvest_object_id in errors:
|
if not error.harvest_object_id in report:
|
||||||
errors[error.harvest_object_id] = []
|
report[error.harvest_object_id] = {
|
||||||
errors[error.harvest_object_id].append({
|
'guid': guid,
|
||||||
|
'errors': []
|
||||||
|
}
|
||||||
|
if original_url_builder:
|
||||||
|
url = original_url_builder(error.harvest_object_id)
|
||||||
|
if url:
|
||||||
|
report[error.harvest_object_id]['original_url'] = url
|
||||||
|
|
||||||
|
report[error.harvest_object_id]['errors'].append({
|
||||||
'message': error.message,
|
'message': error.message,
|
||||||
'line': error.line,
|
'line': error.line,
|
||||||
'type': error.stage
|
'type': error.stage
|
||||||
})
|
})
|
||||||
|
|
||||||
return {'errors': errors}
|
return report
|
||||||
|
|
||||||
def harvest_job_list(context,data_dict):
|
def harvest_job_list(context,data_dict):
|
||||||
|
|
||||||
|
|
|
@ -33,13 +33,23 @@
|
||||||
|
|
||||||
|
|
||||||
<div style='font-size: 1.5em; margin: 1em 0;'>
|
<div style='font-size: 1.5em; margin: 1em 0;'>
|
||||||
{{ c.job_report.errors.keys()|length}} documents with errors
|
{{ c.job_report.keys()|length}} documents with errors
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{% for harvest_object_id in c.job_report.errors.keys() %}
|
{% for harvest_object_id in c.job_report.keys() %}
|
||||||
<div>
|
<div>
|
||||||
<div>{{ harvest_object_id }}</div>
|
<div>
|
||||||
{% for error in c.job_report.errors[harvest_object_id] %}
|
{{ c.job_report[harvest_object_id].guid }}
|
||||||
|
|
||||||
|
{% if 'original_url' in c.job_report[harvest_object_id] %}
|
||||||
|
(<a href="{{ c.job_report[harvest_object_id].original_url }}">{{ _('Remote content') }}</a>)
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
({{ h.link_for_harvest_object(harvest_object_id,text=_('Local content')) }})
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
{% for error in c.job_report[harvest_object_id].errors %}
|
||||||
<div style="margin-left: 2em">{{ error.message }}
|
<div style="margin-left: 2em">{{ error.message }}
|
||||||
{% if error.line %}
|
{% if error.line %}
|
||||||
<span>(line {{error.line}})</span>
|
<span>(line {{error.line}})</span>
|
||||||
|
|
Loading…
Reference in New Issue