[#5] Add method for returning the original URL for a document

Harvesters implementing IHarvester can define a `get_original_url`
method that should return a URL pointing to the original location of a
document in the remote server. If present, this URL will be used on the
job reports.

Examples:
* For a CKAN record: http://{ckan-instance}/api/rest/{guid}
* For a WAF record: http://{waf-root}/{file-name}
* For a CSW record: http://{csw-server}/?Request=GetElementById&Id={guid}&...
This commit is contained in:
amercader 2013-01-24 18:35:43 +00:00
parent d4b6dcb7f6
commit 676c7d34b6
2 changed files with 38 additions and 12 deletions

View File

@ -161,22 +161,38 @@ def harvest_job_report(context, data_dict):
if not job: if not job:
raise NotFound raise NotFound
q = model.Session.query(harvest_model.HarvestObjectError) \ # Check if the harvester for this job's source has a method for returning
# the URL to the original document
original_url_builder = None
for harvester in PluginImplementations(IHarvester):
if harvester.info()['name'] == job.source.type:
if hasattr(harvester, 'get_original_url'):
original_url_builder = harvester.get_original_url
q = model.Session.query(harvest_model.HarvestObjectError, harvest_model.HarvestObject.guid) \
.join(harvest_model.HarvestObject) \ .join(harvest_model.HarvestObject) \
.filter(harvest_model.HarvestObject.harvest_job_id==job.id) \ .filter(harvest_model.HarvestObject.harvest_job_id==job.id) \
.order_by(harvest_model.HarvestObjectError.harvest_object_id) .order_by(harvest_model.HarvestObjectError.harvest_object_id)
errors= {} report = {}
for error in q.all(): for error, guid in q.all():
if not error.harvest_object_id in errors: if not error.harvest_object_id in report:
errors[error.harvest_object_id] = [] report[error.harvest_object_id] = {
errors[error.harvest_object_id].append({ 'guid': guid,
'errors': []
}
if original_url_builder:
url = original_url_builder(error.harvest_object_id)
if url:
report[error.harvest_object_id]['original_url'] = url
report[error.harvest_object_id]['errors'].append({
'message': error.message, 'message': error.message,
'line': error.line, 'line': error.line,
'type': error.stage 'type': error.stage
}) })
return {'errors': errors} return report
def harvest_job_list(context,data_dict): def harvest_job_list(context,data_dict):

View File

@ -33,13 +33,23 @@
<div style='font-size: 1.5em; margin: 1em 0;'> <div style='font-size: 1.5em; margin: 1em 0;'>
{{ c.job_report.errors.keys()|length}} documents with errors {{ c.job_report.keys()|length}} documents with errors
</div> </div>
{% for harvest_object_id in c.job_report.errors.keys() %} {% for harvest_object_id in c.job_report.keys() %}
<div> <div>
<div>{{ harvest_object_id }}</div> <div>
{% for error in c.job_report.errors[harvest_object_id] %} {{ c.job_report[harvest_object_id].guid }}
{% if 'original_url' in c.job_report[harvest_object_id] %}
(<a href="{{ c.job_report[harvest_object_id].original_url }}">{{ _('Remote content') }}</a>)
{% endif %}
({{ h.link_for_harvest_object(harvest_object_id,text=_('Local content')) }})
</div>
{% for error in c.job_report[harvest_object_id].errors %}
<div style="margin-left: 2em">{{ error.message }} <div style="margin-left: 2em">{{ error.message }}
{% if error.line %} {% if error.line %}
<span>(line {{error.line}})</span> <span>(line {{error.line}})</span>