Update CLI docstrings and documentation, remove unused file
This commit is contained in:
parent
bb2bf38c72
commit
cdf4b70bb7
|
@ -2,6 +2,9 @@ import sys
|
|||
import logging
|
||||
import datetime
|
||||
import io
|
||||
import os
|
||||
import argparse
|
||||
from six.moves.configparser import SafeConfigParser
|
||||
|
||||
import requests
|
||||
from lxml import etree
|
||||
|
@ -10,58 +13,66 @@ from pycsw.core import metadata, repository, util
|
|||
import pycsw.core.config
|
||||
import pycsw.core.admin
|
||||
|
||||
logging.basicConfig(format='%(message)s', level=logging.INFO)
|
||||
logging.basicConfig(format="%(message)s", level=logging.INFO)
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def setup_db(pycsw_config):
|
||||
"""Setup database tables and indexes"""
|
||||
|
||||
from sqlalchemy import Column, Text
|
||||
|
||||
database = pycsw_config.get('repository', 'database')
|
||||
table_name = pycsw_config.get('repository', 'table', 'records')
|
||||
database = pycsw_config.get("repository", "database")
|
||||
table_name = pycsw_config.get("repository", "table", "records")
|
||||
|
||||
ckan_columns = [
|
||||
Column('ckan_id', Text, index=True),
|
||||
Column('ckan_modified', Text),
|
||||
Column("ckan_id", Text, index=True),
|
||||
Column("ckan_modified", Text),
|
||||
]
|
||||
|
||||
pycsw.core.admin.setup_db(database,
|
||||
table_name, '',
|
||||
pycsw.core.admin.setup_db(
|
||||
database,
|
||||
table_name,
|
||||
"",
|
||||
create_plpythonu_functions=False,
|
||||
extra_columns=ckan_columns)
|
||||
extra_columns=ckan_columns,
|
||||
)
|
||||
|
||||
|
||||
def set_keywords(pycsw_config_file, pycsw_config, ckan_url, limit=20):
|
||||
"""set pycsw service metadata keywords from top limit CKAN tags"""
|
||||
|
||||
log.info('Fetching tags from %s', ckan_url)
|
||||
url = ckan_url + 'api/tag_counts'
|
||||
log.info("Fetching tags from %s", ckan_url)
|
||||
url = ckan_url + "api/tag_counts"
|
||||
response = requests.get(url)
|
||||
tags = response.json()
|
||||
|
||||
log.info('Deriving top %d tags', limit)
|
||||
log.info("Deriving top %d tags", limit)
|
||||
# uniquify and sort by top limit
|
||||
tags_unique = [list(x) for x in set(tuple(x) for x in tags)]
|
||||
tags_sorted = sorted(tags_unique, key=lambda x: x[1], reverse=1)[0:limit]
|
||||
keywords = ','.join('%s' % tn[0] for tn in tags_sorted)
|
||||
keywords = ",".join("%s" % tn[0] for tn in tags_sorted)
|
||||
|
||||
log.info('Setting tags in pycsw configuration file %s', pycsw_config_file)
|
||||
pycsw_config.set('metadata:main', 'identification_keywords', keywords)
|
||||
with open(pycsw_config_file, 'wb') as configfile:
|
||||
log.info("Setting tags in pycsw configuration file %s", pycsw_config_file)
|
||||
pycsw_config.set("metadata:main", "identification_keywords", keywords)
|
||||
with open(pycsw_config_file, "wb") as configfile:
|
||||
pycsw_config.write(configfile)
|
||||
|
||||
|
||||
def load(pycsw_config, ckan_url):
|
||||
|
||||
database = pycsw_config.get('repository', 'database')
|
||||
table_name = pycsw_config.get('repository', 'table', 'records')
|
||||
database = pycsw_config.get("repository", "database")
|
||||
table_name = pycsw_config.get("repository", "table", "records")
|
||||
|
||||
context = pycsw.core.config.StaticContext()
|
||||
repo = repository.Repository(database, context, table=table_name)
|
||||
|
||||
log.info('Started gathering CKAN datasets identifiers: {0}'.format(str(datetime.datetime.now())))
|
||||
log.info(
|
||||
"Started gathering CKAN datasets identifiers: {0}".format(
|
||||
str(datetime.datetime.now())
|
||||
)
|
||||
)
|
||||
|
||||
query = 'api/search/dataset?qjson={"fl":"id,metadata_modified,extras_harvest_object_id,extras_metadata_source", "q":"harvest_object_id:[\\"\\" TO *]", "limit":1000, "start":%s}'
|
||||
|
||||
|
@ -75,23 +86,25 @@ def load(pycsw_config, ckan_url):
|
|||
response = requests.get(url)
|
||||
listing = response.json()
|
||||
if not isinstance(listing, dict):
|
||||
raise RuntimeError('Wrong API response: %s' % listing)
|
||||
results = listing.get('results')
|
||||
raise RuntimeError("Wrong API response: %s" % listing)
|
||||
results = listing.get("results")
|
||||
if not results:
|
||||
break
|
||||
for result in results:
|
||||
gathered_records[result['id']] = {
|
||||
'metadata_modified': result['metadata_modified'],
|
||||
'harvest_object_id': result['extras']['harvest_object_id'],
|
||||
'source': result['extras'].get('metadata_source')
|
||||
gathered_records[result["id"]] = {
|
||||
"metadata_modified": result["metadata_modified"],
|
||||
"harvest_object_id": result["extras"]["harvest_object_id"],
|
||||
"source": result["extras"].get("metadata_source"),
|
||||
}
|
||||
|
||||
start = start + 1000
|
||||
log.debug('Gathered %s' % start)
|
||||
log.debug("Gathered %s" % start)
|
||||
|
||||
log.info('Gather finished ({0} datasets): {1}'.format(
|
||||
len(gathered_records.keys()),
|
||||
str(datetime.datetime.now())))
|
||||
log.info(
|
||||
"Gather finished ({0} datasets): {1}".format(
|
||||
len(gathered_records.keys()), str(datetime.datetime.now())
|
||||
)
|
||||
)
|
||||
|
||||
existing_records = {}
|
||||
|
||||
|
@ -105,17 +118,16 @@ def load(pycsw_config, ckan_url):
|
|||
changed = set()
|
||||
|
||||
for key in set(gathered_records) & set(existing_records):
|
||||
if gathered_records[key]['metadata_modified'] > existing_records[key]:
|
||||
if gathered_records[key]["metadata_modified"] > existing_records[key]:
|
||||
changed.add(key)
|
||||
|
||||
for ckan_id in deleted:
|
||||
try:
|
||||
repo.session.begin()
|
||||
repo.session.query(repo.dataset.ckan_id).filter_by(
|
||||
ckan_id=ckan_id).delete()
|
||||
log.info('Deleted %s' % ckan_id)
|
||||
repo.session.query(repo.dataset.ckan_id).filter_by(ckan_id=ckan_id).delete()
|
||||
log.info("Deleted %s" % ckan_id)
|
||||
repo.session.commit()
|
||||
except Exception as err:
|
||||
except Exception:
|
||||
repo.session.rollback()
|
||||
raise
|
||||
|
||||
|
@ -123,76 +135,81 @@ def load(pycsw_config, ckan_url):
|
|||
ckan_info = gathered_records[ckan_id]
|
||||
record = get_record(context, repo, ckan_url, ckan_id, ckan_info)
|
||||
if not record:
|
||||
log.info('Skipped record %s' % ckan_id)
|
||||
log.info("Skipped record %s" % ckan_id)
|
||||
continue
|
||||
try:
|
||||
repo.insert(record, 'local', util.get_today_and_now())
|
||||
log.info('Inserted %s' % ckan_id)
|
||||
repo.insert(record, "local", util.get_today_and_now())
|
||||
log.info("Inserted %s" % ckan_id)
|
||||
except Exception as err:
|
||||
log.error('ERROR: not inserted %s Error:%s' % (ckan_id, err))
|
||||
log.error("ERROR: not inserted %s Error:%s" % (ckan_id, err))
|
||||
|
||||
for ckan_id in changed:
|
||||
ckan_info = gathered_records[ckan_id]
|
||||
record = get_record(context, repo, ckan_url, ckan_id, ckan_info)
|
||||
if not record:
|
||||
continue
|
||||
update_dict = dict([(getattr(repo.dataset, key),
|
||||
getattr(record, key)) \
|
||||
for key in record.__dict__.keys() if key != '_sa_instance_state'])
|
||||
update_dict = dict(
|
||||
[
|
||||
(getattr(repo.dataset, key), getattr(record, key))
|
||||
for key in record.__dict__.keys()
|
||||
if key != "_sa_instance_state"
|
||||
]
|
||||
)
|
||||
try:
|
||||
repo.session.begin()
|
||||
repo.session.query(repo.dataset).filter_by(
|
||||
ckan_id=ckan_id).update(update_dict)
|
||||
repo.session.query(repo.dataset).filter_by(ckan_id=ckan_id).update(
|
||||
update_dict
|
||||
)
|
||||
repo.session.commit()
|
||||
log.info('Changed %s' % ckan_id)
|
||||
log.info("Changed %s" % ckan_id)
|
||||
except Exception as err:
|
||||
repo.session.rollback()
|
||||
raise RuntimeError('ERROR: %s' % str(err))
|
||||
raise RuntimeError("ERROR: %s" % str(err))
|
||||
|
||||
|
||||
def clear(pycsw_config):
|
||||
|
||||
from sqlalchemy import create_engine, MetaData, Table
|
||||
|
||||
database = pycsw_config.get('repository', 'database')
|
||||
table_name = pycsw_config.get('repository', 'table', 'records')
|
||||
database = pycsw_config.get("repository", "database")
|
||||
table_name = pycsw_config.get("repository", "table", "records")
|
||||
|
||||
log.debug('Creating engine')
|
||||
log.debug("Creating engine")
|
||||
engine = create_engine(database)
|
||||
records = Table(table_name, MetaData(engine))
|
||||
records.delete().execute()
|
||||
log.info('Table cleared')
|
||||
log.info("Table cleared")
|
||||
|
||||
|
||||
def get_record(context, repo, ckan_url, ckan_id, ckan_info):
|
||||
query = ckan_url + 'harvest/object/%s'
|
||||
url = query % ckan_info['harvest_object_id']
|
||||
query = ckan_url + "harvest/object/%s"
|
||||
url = query % ckan_info["harvest_object_id"]
|
||||
response = requests.get(url)
|
||||
|
||||
if ckan_info['source'] == 'arcgis':
|
||||
if ckan_info["source"] == "arcgis":
|
||||
return
|
||||
|
||||
try:
|
||||
xml = etree.parse(io.BytesIO(response.content))
|
||||
except Exception as err:
|
||||
log.error('Could not pass xml doc from %s, Error: %s' % (ckan_id, err))
|
||||
log.error("Could not pass xml doc from %s, Error: %s" % (ckan_id, err))
|
||||
return
|
||||
|
||||
try:
|
||||
record = metadata.parse_record(context, xml, repo)[0]
|
||||
except Exception as err:
|
||||
log.error('Could not extract metadata from %s, Error: %s' % (ckan_id, err))
|
||||
log.error("Could not extract metadata from %s, Error: %s" % (ckan_id, err))
|
||||
return
|
||||
|
||||
if not record.identifier:
|
||||
record.identifier = ckan_id
|
||||
record.ckan_id = ckan_id
|
||||
record.ckan_modified = ckan_info['metadata_modified']
|
||||
record.ckan_modified = ckan_info["metadata_modified"]
|
||||
|
||||
return record
|
||||
|
||||
|
||||
usage='''
|
||||
usage = """
|
||||
Manages the CKAN-pycsw integration
|
||||
|
||||
python ckan-pycsw.py setup [-p]
|
||||
|
@ -211,18 +228,19 @@ All commands require the pycsw configuration file. By default it will try
|
|||
to find a file called 'default.cfg' in the same directory, but you'll
|
||||
probably need to provide the actual location via the -p option:
|
||||
|
||||
paster ckan-pycsw setup -p /etc/ckan/default/pycsw.cfg
|
||||
python ckan_pycsw.py setup -p /etc/ckan/default/pycsw.cfg
|
||||
|
||||
The load command requires a CKAN URL from where the datasets will be pulled:
|
||||
|
||||
paster ckan-pycsw load -p /etc/ckan/default/pycsw.cfg -u http://localhost
|
||||
python ckan_pycsw.py load -p /etc/ckan/default/pycsw.cfg -u http://localhost
|
||||
|
||||
"""
|
||||
|
||||
'''
|
||||
|
||||
def _load_config(file_path):
|
||||
abs_path = os.path.abspath(file_path)
|
||||
if not os.path.exists(abs_path):
|
||||
raise AssertionError('pycsw config file {0} does not exist.'.format(abs_path))
|
||||
raise AssertionError("pycsw config file {0} does not exist.".format(abs_path))
|
||||
|
||||
config = SafeConfigParser()
|
||||
config.read(abs_path)
|
||||
|
@ -230,25 +248,24 @@ def _load_config(file_path):
|
|||
return config
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="\n".split(usage)[0], usage=usage)
|
||||
parser.add_argument("command", help="Command to perform")
|
||||
|
||||
import os
|
||||
import argparse
|
||||
from ConfigParser import SafeConfigParser
|
||||
parser.add_argument(
|
||||
"-p",
|
||||
"--pycsw_config",
|
||||
action="store",
|
||||
default="default.cfg",
|
||||
help="pycsw config file to use.",
|
||||
)
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(
|
||||
description='\n'.split(usage)[0],
|
||||
usage=usage)
|
||||
parser.add_argument('command',
|
||||
help='Command to perform')
|
||||
|
||||
parser.add_argument('-p', '--pycsw_config',
|
||||
action='store', default='default.cfg',
|
||||
help='pycsw config file to use.')
|
||||
|
||||
parser.add_argument('-u', '--ckan_url',
|
||||
action='store',
|
||||
help='CKAN instance to import the datasets from.')
|
||||
parser.add_argument(
|
||||
"-u",
|
||||
"--ckan_url",
|
||||
action="store",
|
||||
help="CKAN instance to import the datasets from.",
|
||||
)
|
||||
|
||||
if len(sys.argv) <= 1:
|
||||
parser.print_usage()
|
||||
|
@ -257,18 +274,18 @@ if __name__ == '__main__':
|
|||
arg = parser.parse_args()
|
||||
pycsw_config = _load_config(arg.pycsw_config)
|
||||
|
||||
if arg.command == 'setup':
|
||||
if arg.command == "setup":
|
||||
setup_db(pycsw_config)
|
||||
elif arg.command in ['load', 'set_keywords']:
|
||||
elif arg.command in ["load", "set_keywords"]:
|
||||
if not arg.ckan_url:
|
||||
raise AssertionError('You need to provide a CKAN URL with -u or --ckan_url')
|
||||
ckan_url = arg.ckan_url.rstrip('/') + '/'
|
||||
if arg.command == 'load':
|
||||
raise AssertionError("You need to provide a CKAN URL with -u or --ckan_url")
|
||||
ckan_url = arg.ckan_url.rstrip("/") + "/"
|
||||
if arg.command == "load":
|
||||
load(pycsw_config, ckan_url)
|
||||
else:
|
||||
set_keywords(arg.pycsw_config, pycsw_config, ckan_url)
|
||||
elif arg.command == 'clear':
|
||||
elif arg.command == "clear":
|
||||
clear(pycsw_config)
|
||||
else:
|
||||
print('Unknown command {0}'.format(arg.command))
|
||||
print("Unknown command {0}".format(arg.command))
|
||||
sys.exit(1)
|
||||
|
|
|
@ -14,7 +14,7 @@ def get_commands():
|
|||
]
|
||||
|
||||
|
||||
@click.group(u"spatial-validation", short_help=u"Validation commands")
|
||||
@click.group(u"spatial-validation", short_help=u"Spatial formats validation commands")
|
||||
def spatial_validation():
|
||||
pass
|
||||
|
||||
|
@ -22,18 +22,28 @@ def spatial_validation():
|
|||
@spatial_validation.command()
|
||||
@click.argument('pkg', required=False)
|
||||
def report(pkg):
|
||||
"""
|
||||
Performs validation on the harvested metadata, either for all
|
||||
packages or the one specified.
|
||||
"""
|
||||
|
||||
return util.report(pkg)
|
||||
|
||||
|
||||
@spatial_validation.command('report-csv')
|
||||
@click.argument('filepath')
|
||||
def report_csv(filepath):
|
||||
"""
|
||||
Performs validation on all the harvested metadata in the db and
|
||||
writes a report in CSV format to the given filepath.
|
||||
"""
|
||||
return util.report_csv(filepath)
|
||||
|
||||
|
||||
@spatial_validation.command('file')
|
||||
@click.argument('filepath')
|
||||
def validate_file(filepath):
|
||||
"""Performs validation on the given metadata file."""
|
||||
return util.validate_file(filepath)
|
||||
|
||||
|
||||
|
@ -45,9 +55,19 @@ def spatial():
|
|||
@spatial.command()
|
||||
@click.argument('srid', required=False)
|
||||
def initdb(srid):
|
||||
"""
|
||||
Creates the necessary tables. You must have PostGIS installed
|
||||
and configured in the database.
|
||||
You can provide the SRID of the geometry column. Default is 4326.
|
||||
"""
|
||||
return util.initdb(srid)
|
||||
|
||||
|
||||
@spatial.command('extents')
|
||||
def update_extents():
|
||||
"""
|
||||
Creates or updates the extent geometry column for datasets with
|
||||
an extent defined in the 'spatial' extra.
|
||||
"""
|
||||
|
||||
return util.update_extents()
|
||||
|
|
|
@ -1,71 +0,0 @@
|
|||
import time
|
||||
from six.moves.urllib.request import urlopen
|
||||
from six.moves.urllib.error import URLError
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from ckan.plugins.toolkit import config
|
||||
|
||||
from ckan.model import engine_is_sqlite
|
||||
|
||||
|
||||
# copied from ckan/tests/__init__ to save importing it and therefore
|
||||
# setting up Pylons.
|
||||
class CkanServerCase(object):
|
||||
@staticmethod
|
||||
def _system(cmd):
|
||||
import subprocess
|
||||
|
||||
(status, output) = subprocess.getstatusoutput(cmd)
|
||||
if status:
|
||||
raise Exception("Couldn't execute cmd: %s: %s" % (cmd, output))
|
||||
|
||||
@classmethod
|
||||
def _paster(cls, cmd, config_path_rel):
|
||||
config_path = os.path.join(config["here"], config_path_rel)
|
||||
cls._system("paster --plugin ckan %s --config=%s" % (cmd, config_path))
|
||||
|
||||
@staticmethod
|
||||
def _start_ckan_server(config_file=None):
|
||||
if not config_file:
|
||||
config_file = config["__file__"]
|
||||
config_path = config_file
|
||||
import subprocess
|
||||
|
||||
process = subprocess.Popen(["paster", "serve", config_path])
|
||||
return process
|
||||
|
||||
@staticmethod
|
||||
def _wait_for_url(url="http://127.0.0.1:5000/", timeout=15):
|
||||
for i in range(int(timeout) * 100):
|
||||
try:
|
||||
urlopen(url)
|
||||
except URLError:
|
||||
time.sleep(0.01)
|
||||
else:
|
||||
break
|
||||
|
||||
@staticmethod
|
||||
def _stop_ckan_server(process):
|
||||
pid = process.pid
|
||||
pid = int(pid)
|
||||
if os.system("kill -9 %d" % pid):
|
||||
raise Exception(
|
||||
"Can't kill foreign CKAN instance (pid: %d)." % pid
|
||||
)
|
||||
|
||||
|
||||
class CkanProcess(CkanServerCase):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
if engine_is_sqlite():
|
||||
return pytest.skip("Non-memory database needed for this test")
|
||||
|
||||
cls.pid = cls._start_ckan_server()
|
||||
## Don't need to init database, since it is same database as this process uses
|
||||
cls._wait_for_url()
|
||||
|
||||
@classmethod
|
||||
def teardown_class(cls):
|
||||
cls._stop_ckan_server(cls.pid)
|
12
doc/csw.rst
12
doc/csw.rst
|
@ -55,7 +55,7 @@ All necessary tasks are done with the ``ckan-pycsw`` command. To get more
|
|||
details of its usage, run the following::
|
||||
|
||||
cd /usr/lib/ckan/default/src/ckanext-spatial
|
||||
paster ckan-pycsw --help
|
||||
python bin/ckan_pycsw.py --help
|
||||
|
||||
|
||||
Setup
|
||||
|
@ -114,11 +114,11 @@ Setup
|
|||
|
||||
The rest of the options are described `here <http://docs.pycsw.org/en/latest/configuration.html>`_.
|
||||
|
||||
4. Setup the pycsw table. This is done with the ``ckan-pycsw`` paster command
|
||||
4. Setup the pycsw table. This is done with the ``ckan-pycsw`` script
|
||||
(Remember to have the virtualenv activated when running it)::
|
||||
|
||||
cd /usr/lib/ckan/default/src/ckanext-spatial
|
||||
paster ckan-pycsw setup -p /etc/ckan/default/pycsw.cfg
|
||||
python bin/ckan_pycsw.py setup -p /etc/ckan/default/pycsw.cfg
|
||||
|
||||
At this point you should be ready to run pycsw with the wsgi script that it
|
||||
includes::
|
||||
|
@ -135,7 +135,7 @@ Setup
|
|||
command for this::
|
||||
|
||||
cd /usr/lib/ckan/default/src/ckanext-spatial
|
||||
paster ckan-pycsw load -p /etc/ckan/default/pycsw.cfg
|
||||
python bin/ckan_pycsw.py load -p /etc/ckan/default/pycsw.cfg
|
||||
|
||||
When the loading is finished, check that results are returned when visiting
|
||||
this link:
|
||||
|
@ -155,7 +155,7 @@ values can be set in the pycsw configuration ``metadata:main`` section. If you
|
|||
would like the CSW service metadata keywords to be reflective of the CKAN
|
||||
tags, run the following convenience command::
|
||||
|
||||
paster ckan-pycsw set_keywords -p /etc/ckan/default/pycsw.cfg
|
||||
python ckan_pycsw.py set_keywords -p /etc/ckan/default/pycsw.cfg
|
||||
|
||||
Note that you must have privileges to write to the pycsw configuration file.
|
||||
|
||||
|
@ -170,7 +170,7 @@ keep CKAN and pycsw in sync, and serve pycsw with Apache + mod_wsgi like CKAN.
|
|||
and copy the following lines::
|
||||
|
||||
# m h dom mon dow command
|
||||
0 * * * * /usr/lib/ckan/default/bin/paster --plugin=ckanext-spatial ckan-pycsw load -p /etc/ckan/default/pycsw.cfg
|
||||
0 * * * * /var/lib/ckan/default/bin/python /var/lib/ckan/default/src/ckanext-spatial/bin/ckan_pycsw.py load -p /etc/ckan/default/pycsw.cfg
|
||||
|
||||
This particular example will run the load command every hour. You can of
|
||||
course modify this periodicity, for instance reducing it for huge instances.
|
||||
|
|
|
@ -140,6 +140,10 @@ plugins on the configuration ini file (eg when restarting Apache).
|
|||
If for some reason you need to explicitly create the table beforehand, you can
|
||||
do it with the following command (with the virtualenv activated)::
|
||||
|
||||
(pyenv) $ ckan --config=mysite.ini spatial initdb [srid]
|
||||
|
||||
On CKAN 2.8 and below use::
|
||||
|
||||
(pyenv) $ paster --plugin=ckanext-spatial spatial initdb [srid] --config=mysite.ini
|
||||
|
||||
You can define the SRID of the geometry column. Default is 4326. If you are not
|
||||
|
|
|
@ -61,6 +61,10 @@ synchronize the information stored in the extra with the geometry table.
|
|||
If you already have datasets when you enable Spatial Search then you'll need to
|
||||
reindex them:
|
||||
|
||||
ckan --config=/etc/ckan/default/development.ini search-index rebuild
|
||||
|
||||
..note:: For CKAN 2.8 and below use:
|
||||
|
||||
paster --plugin=ckan search-index rebuild --config=/etc/ckan/default/development.ini
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue