Merge branch 'tomkralidis-csw-keywords'

This commit is contained in:
amercader 2013-11-22 16:29:38 +00:00
commit 6b28bb8ec2
3 changed files with 50 additions and 5 deletions

View File

@ -33,6 +33,26 @@ def setup_db(pycsw_config):
extra_columns=ckan_columns) extra_columns=ckan_columns)
def set_keywords(pycsw_config_file, pycsw_config, ckan_url, limit=20):
"""set pycsw service metadata keywords from top limit CKAN tags"""
log.info('Fetching tags from %s', ckan_url)
url = ckan_url + 'api/tag_counts'
response = requests.get(url)
tags = response.json()
log.info('Deriving top %d tags', limit)
# uniquify and sort by top limit
tags_unique = [list(x) for x in set(tuple(x) for x in tags)]
tags_sorted = sorted(tags_unique, key=lambda x: x[1], reverse=1)[0:limit]
keywords = ','.join('%s' % tn[0] for tn in tags_sorted)
log.info('Setting tags in pycsw configuration file %s', pycsw_config_file)
pycsw_config.set('metadata:main', 'identification_keywords', keywords)
with open(pycsw_config_file, 'wb') as configfile:
pycsw_config.write(configfile)
def load(pycsw_config, ckan_url): def load(pycsw_config, ckan_url):
database = pycsw_config.get('repository', 'database') database = pycsw_config.get('repository', 'database')
@ -40,7 +60,6 @@ def load(pycsw_config, ckan_url):
context = pycsw.config.StaticContext() context = pycsw.config.StaticContext()
repo = repository.Repository(database, context, table=table_name) repo = repository.Repository(database, context, table=table_name)
ckan_url = ckan_url.lstrip('/') + '/'
log.info('Started gathering CKAN datasets identifiers: {0}'.format(str(datetime.datetime.now()))) log.info('Started gathering CKAN datasets identifiers: {0}'.format(str(datetime.datetime.now())))
@ -179,6 +198,9 @@ Manages the CKAN-pycsw integration
python ckan-pycsw.py setup [-p] python ckan-pycsw.py setup [-p]
Setups the necessary pycsw table on the db. Setups the necessary pycsw table on the db.
python ckan-pycsw.py set_keywords [-p] -u
Sets pycsw server metadata keywords from CKAN site tag list.
python ckan-pycsw.py load [-p] -u python ckan-pycsw.py load [-p] -u
Loads CKAN datasets as records into the pycsw db. Loads CKAN datasets as records into the pycsw db.
@ -237,10 +259,14 @@ if __name__ == '__main__':
if arg.command == 'setup': if arg.command == 'setup':
setup_db(pycsw_config) setup_db(pycsw_config)
elif arg.command == 'load': elif arg.command in ['load', 'set_keywords']:
if not arg.ckan_url: if not arg.ckan_url:
raise AssertionError('You need to provide a CKAN URL with -u or --ckan_url') raise AssertionError('You need to provide a CKAN URL with -u or --ckan_url')
load(pycsw_config, arg.ckan_url) ckan_url = arg.ckan_url.rstrip('/') + '/'
if arg.command == 'load':
load(pycsw_config, ckan_url)
else:
set_keywords(arg.pycsw_config, pycsw_config, ckan_url)
elif arg.command == 'clear': elif arg.command == 'clear':
clear(pycsw_config) clear(pycsw_config)
else: else:

View File

@ -11,6 +11,9 @@ class Pycsw(script.command.Command):
ckan-pycsw setup [-p] ckan-pycsw setup [-p]
Setups the necessary pycsw table on the db. Setups the necessary pycsw table on the db.
ckan-pycsw set_keywords [-p] [-u]
Sets pycsw server metadata keywords from CKAN site tag list.
ckan-pycsw load [-p] [-u] ckan-pycsw load [-p] [-u]
Loads CKAN datasets as records into the pycsw db. Loads CKAN datasets as records into the pycsw db.
@ -51,9 +54,12 @@ option:
cmd = self.args[0] cmd = self.args[0]
if cmd == 'setup': if cmd == 'setup':
ckan_pycsw.setup_db(config) ckan_pycsw.setup_db(config)
elif cmd == 'load': elif cmd in ['load', 'set_keywords']:
ckan_url = self.options.ckan_url ckan_url = self.options.ckan_url
ckan_pycsw.load(config, ckan_url) if cmd == 'load':
ckan_pycsw.load(config, ckan_url)
else:
ckan_pycsw.set_keywords(self.options.pycsw_config, config, ckan_url)
elif cmd == 'clear': elif cmd == 'clear':
ckan_pycsw.clear(config) ckan_pycsw.clear(config)
else: else:

View File

@ -154,6 +154,19 @@ Setup
datasets will be synchronized and deleted datasets from CKAN will be removed datasets will be synchronized and deleted datasets from CKAN will be removed
from pycsw as well. from pycsw as well.
Setting Service Metadata Keywords
+++++++++++++++++++++++++++++++++
The CSW standard allows for administrators to set CSW service metadata. These
values can be set in the pycsw configuration ``metadata:main`` section. If you
would like the CSW service metadata keywords to be reflective of the CKAN
tags, run the following convenience command:
paster ckan-pycsw set_keywords -p /etc/ckan/default/pycsw.cfg
Note that you must have privileges to write to the pycsw configuration file.
Running it on production site Running it on production site
+++++++++++++++++++++++++++++ +++++++++++++++++++++++++++++