From 8431182f01fe1d0ecfd0aea9021d577616fbd380 Mon Sep 17 00:00:00 2001 From: amercader Date: Thu, 24 Jan 2013 18:39:19 +0000 Subject: [PATCH] Document method and cleanup the interface file --- README.rst | 51 ++++++++++++++++++++++++++--------- ckanext/harvest/interfaces.py | 39 +++++++++++++++++++++++++-- 2 files changed, 76 insertions(+), 14 deletions(-) diff --git a/README.rst b/README.rst index 9c038bb..6e50c65 100644 --- a/README.rst +++ b/README.rst @@ -224,6 +224,7 @@ following methods:: ''' implements(IHarvester) + def info(self): ''' Harvesting implementations must provide this method, which will return a @@ -237,30 +238,53 @@ following methods:: in the WUI. * description: a small description of what the harvester does. This will appear on the form as a guidance to the user. - * form_config_interface [optional]: Harvesters willing to store configuration - values in the database must provide this key. The only supported value is - 'Text'. This will enable the configuration text box in the form. See also - the ``validate_config`` method. A complete example may be:: { 'name': 'csw', 'title': 'CSW Server', - 'description': 'A server that implements OGC\'s Catalog Service + 'description': 'A server that implements OGC's Catalog Service for the Web (CSW) standard' } - returns: A dictionary with the harvester descriptors + :returns: A dictionary with the harvester descriptors ''' def validate_config(self, config): ''' + + [optional] + Harvesters can provide this method to validate the configuration entered in the form. It should return a single string, which will be stored in the database. Exceptions raised will be shown in the form's error messages. - returns A string with the validated configuration options + :param harvest_object_id: Config string coming from the form + :returns: A string with the validated configuration options + ''' + + def get_original_url(self, harvest_object_id): + ''' + + [optional] + + This optional but very recommended method allows harvesters to return + the URL to the original remote document, given a Harvest Object id. + Note that getting the harvest object you have access to its guid as + well as the object source, which has the URL. + This URL will be used on error reports to help publishers link to the + original document that has the errors. If this method is not provided + or no URL is returned, only a link to the local copy of the remote + document will be shown. + + Examples: + * For a CKAN record: http://{ckan-instance}/api/rest/{guid} + * For a WAF record: http://{waf-root}/{file-name} + * For a CSW record: http://{csw-server}/?Request=GetElementById&Id={guid}&... + + :param harvest_object_id: HarvestObject id + :returns: A string with the URL to the original document ''' def gather_stage(self, harvest_job): @@ -270,7 +294,10 @@ following methods:: - gathering all the necessary objects to fetch on a later. stage (e.g. for a CSW server, perform a GetRecords request) - creating the necessary HarvestObjects in the database, specifying - the guid and a reference to its source and job. + the guid and a reference to its job. The HarvestObjects need a + reference date with the last modified date for the resource, this + may need to be set in a different stage depending on the type of + source. - creating and storing any suitable HarvestGatherErrors that may occur. - returning a list with all the ids of the created HarvestObjects. @@ -301,8 +328,7 @@ following methods:: - performing any necessary action with the fetched object (e.g create a CKAN package). Note: if this stage creates or updates a package, a reference - to the package must be added to the HarvestObject. - Additionally, the HarvestObject must be flagged as current. + to the package should be added to the HarvestObject. - creating the HarvestObject - Package relation (if necessary) - creating and storing any suitable HarvestObjectErrors that may occur. @@ -312,6 +338,7 @@ following methods:: :returns: True if everything went right, False if errors were found ''' + See the CKAN harvester for an example of how to implement the harvesting interface: @@ -466,8 +493,8 @@ following steps with the one you are using. you defined in the `stdout_logfile` section to see what happened. Common errors include:: `socket.error: [Errno 111] Connection refused` - RabbitMQ is not running:: - + RabbitMQ is not running:: + sudo service rabbitmq-server start 4. Once we have the two consumers running and monitored, we just need to create a cron job diff --git a/ckanext/harvest/interfaces.py b/ckanext/harvest/interfaces.py index 9d47883..c0ed689 100644 --- a/ckanext/harvest/interfaces.py +++ b/ckanext/harvest/interfaces.py @@ -29,7 +29,43 @@ class IHarvester(Interface): for the Web (CSW) standard' } - returns: A dictionary with the harvester descriptors + :returns: A dictionary with the harvester descriptors + ''' + + def validate_config(self, config): + ''' + + [optional] + + Harvesters can provide this method to validate the configuration entered in the + form. It should return a single string, which will be stored in the database. + Exceptions raised will be shown in the form's error messages. + + :param harvest_object_id: Config string coming from the form + :returns: A string with the validated configuration options + ''' + + def get_original_url(self, harvest_object_id): + ''' + + [optional] + + This optional but very recommended method allows harvesters to return + the URL to the original remote document, given a Harvest Object id. + Note that getting the harvest object you have access to its guid as + well as the object source, which has the URL. + This URL will be used on error reports to help publishers link to the + original document that has the errors. If this method is not provided + or no URL is returned, only a link to the local copy of the remote + document will be shown. + + Examples: + * For a CKAN record: http://{ckan-instance}/api/rest/{guid} + * For a WAF record: http://{waf-root}/{file-name} + * For a CSW record: http://{csw-server}/?Request=GetElementById&Id={guid}&... + + :param harvest_object_id: HarvestObject id + :returns: A string with the URL to the original document ''' def gather_stage(self, harvest_job): @@ -82,4 +118,3 @@ class IHarvester(Interface): :param harvest_object: HarvestObject object :returns: True if everything went right, False if errors were found ''' -