[#62] Clean up keywords parsing
Keywords were extracted via 3 different elements `keyword-inspire-theme`, `keyword-controlled-other` and `keyword-free-text`. The latter didn't actually do anything and the second duplicated xpaths from the first and added a non-standard one. A new `keywords` key has been added which contains all keyword objects, including type. This is not used to modify the `tags` key right now.
This commit is contained in:
parent
5b29e47efb
commit
e9c7bbcabe
|
@ -379,6 +379,29 @@ class ISOBrowseGraphic(ISOElement):
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class ISOKeyword(ISOElement):
|
||||||
|
|
||||||
|
elements = [
|
||||||
|
ISOElement(
|
||||||
|
name="keyword",
|
||||||
|
search_paths=[
|
||||||
|
"gmd:keyword/gco:CharacterString/text()",
|
||||||
|
],
|
||||||
|
multiplicity="*",
|
||||||
|
),
|
||||||
|
ISOElement(
|
||||||
|
name="type",
|
||||||
|
search_paths=[
|
||||||
|
"gmd:type/gmd:MD_KeywordTypeCode/@codeListValue",
|
||||||
|
"gmd:type/gmd:MD_KeywordTypeCode/text()",
|
||||||
|
],
|
||||||
|
multiplicity="0..1",
|
||||||
|
),
|
||||||
|
# If Thesaurus information is needed at some point, this is the
|
||||||
|
# place to add it
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class ISODocument(MappedXmlDocument):
|
class ISODocument(MappedXmlDocument):
|
||||||
|
|
||||||
# Attribute specifications from "XPaths for GEMINI" by Peter Parslow.
|
# Attribute specifications from "XPaths for GEMINI" by Peter Parslow.
|
||||||
|
@ -499,6 +522,14 @@ class ISODocument(MappedXmlDocument):
|
||||||
],
|
],
|
||||||
multiplicity="*",
|
multiplicity="*",
|
||||||
),
|
),
|
||||||
|
ISOKeyword(
|
||||||
|
name="keywords",
|
||||||
|
search_paths=[
|
||||||
|
"gmd:identificationInfo/gmd:MD_DataIdentification/gmd:descriptiveKeywords/gmd:MD_Keywords",
|
||||||
|
"gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:descriptiveKeywords/gmd:MD_Keywords",
|
||||||
|
],
|
||||||
|
multiplicity="*"
|
||||||
|
),
|
||||||
ISOElement(
|
ISOElement(
|
||||||
name="keyword-inspire-theme",
|
name="keyword-inspire-theme",
|
||||||
search_paths=[
|
search_paths=[
|
||||||
|
@ -507,21 +538,14 @@ class ISODocument(MappedXmlDocument):
|
||||||
],
|
],
|
||||||
multiplicity="*",
|
multiplicity="*",
|
||||||
),
|
),
|
||||||
|
# Deprecated: kept for backwards compatibilty
|
||||||
ISOElement(
|
ISOElement(
|
||||||
name="keyword-controlled-other",
|
name="keyword-controlled-other",
|
||||||
search_paths=[
|
search_paths=[
|
||||||
"gmd:identificationInfo/gmd:MD_DataIdentification/gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString/text()",
|
|
||||||
"gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString/text()",
|
|
||||||
"gmd:identificationInfo/srv:SV_ServiceIdentification/srv:keywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString/text()",
|
"gmd:identificationInfo/srv:SV_ServiceIdentification/srv:keywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString/text()",
|
||||||
],
|
],
|
||||||
multiplicity="*",
|
multiplicity="*",
|
||||||
),
|
),
|
||||||
ISOElement(
|
|
||||||
name="keyword-free-text",
|
|
||||||
search_paths=[
|
|
||||||
],
|
|
||||||
multiplicity="*",
|
|
||||||
),
|
|
||||||
ISOElement(
|
ISOElement(
|
||||||
name="limitations-on-public-access",
|
name="limitations-on-public-access",
|
||||||
search_paths=[
|
search_paths=[
|
||||||
|
@ -762,7 +786,7 @@ class ISODocument(MappedXmlDocument):
|
||||||
|
|
||||||
def infer_tags(self, values):
|
def infer_tags(self, values):
|
||||||
tags = []
|
tags = []
|
||||||
for key in ['keyword-inspire-theme', 'keyword-controlled-other', 'keyword-free-text']:
|
for key in ['keyword-inspire-theme', 'keyword-controlled-other']:
|
||||||
for item in values[key]:
|
for item in values[key]:
|
||||||
if item not in tags:
|
if item not in tags:
|
||||||
tags.append(item)
|
tags.append(item)
|
||||||
|
|
Loading…
Reference in New Issue