[#62] Clean up keywords parsing

Keywords were extracted via 3 different elements
`keyword-inspire-theme`, `keyword-controlled-other` and
`keyword-free-text`. The latter didn't actually do anything and the
second duplicated xpaths from the first and added a non-standard one.

A new `keywords` key has been added which contains all keyword objects,
including type. This is not used to modify the `tags` key right now.
This commit is contained in:
amercader 2014-03-11 14:09:55 +00:00
parent 5b29e47efb
commit e9c7bbcabe
1 changed files with 33 additions and 9 deletions

View File

@ -379,6 +379,29 @@ class ISOBrowseGraphic(ISOElement):
] ]
class ISOKeyword(ISOElement):
elements = [
ISOElement(
name="keyword",
search_paths=[
"gmd:keyword/gco:CharacterString/text()",
],
multiplicity="*",
),
ISOElement(
name="type",
search_paths=[
"gmd:type/gmd:MD_KeywordTypeCode/@codeListValue",
"gmd:type/gmd:MD_KeywordTypeCode/text()",
],
multiplicity="0..1",
),
# If Thesaurus information is needed at some point, this is the
# place to add it
]
class ISODocument(MappedXmlDocument): class ISODocument(MappedXmlDocument):
# Attribute specifications from "XPaths for GEMINI" by Peter Parslow. # Attribute specifications from "XPaths for GEMINI" by Peter Parslow.
@ -499,6 +522,14 @@ class ISODocument(MappedXmlDocument):
], ],
multiplicity="*", multiplicity="*",
), ),
ISOKeyword(
name="keywords",
search_paths=[
"gmd:identificationInfo/gmd:MD_DataIdentification/gmd:descriptiveKeywords/gmd:MD_Keywords",
"gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:descriptiveKeywords/gmd:MD_Keywords",
],
multiplicity="*"
),
ISOElement( ISOElement(
name="keyword-inspire-theme", name="keyword-inspire-theme",
search_paths=[ search_paths=[
@ -507,21 +538,14 @@ class ISODocument(MappedXmlDocument):
], ],
multiplicity="*", multiplicity="*",
), ),
# Deprecated: kept for backwards compatibilty
ISOElement( ISOElement(
name="keyword-controlled-other", name="keyword-controlled-other",
search_paths=[ search_paths=[
"gmd:identificationInfo/gmd:MD_DataIdentification/gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString/text()",
"gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString/text()",
"gmd:identificationInfo/srv:SV_ServiceIdentification/srv:keywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString/text()", "gmd:identificationInfo/srv:SV_ServiceIdentification/srv:keywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString/text()",
], ],
multiplicity="*", multiplicity="*",
), ),
ISOElement(
name="keyword-free-text",
search_paths=[
],
multiplicity="*",
),
ISOElement( ISOElement(
name="limitations-on-public-access", name="limitations-on-public-access",
search_paths=[ search_paths=[
@ -762,7 +786,7 @@ class ISODocument(MappedXmlDocument):
def infer_tags(self, values): def infer_tags(self, values):
tags = [] tags = []
for key in ['keyword-inspire-theme', 'keyword-controlled-other', 'keyword-free-text']: for key in ['keyword-inspire-theme', 'keyword-controlled-other']:
for item in values[key]: for item in values[key]:
if item not in tags: if item not in tags:
tags.append(item) tags.append(item)