BrBETA_dnet-hadoop/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java

175 lines
5.1 KiB
Java
Raw Normal View History

2020-05-27 11:34:13 +02:00
package eu.dnetlib.dhp.common.vocabulary;
2020-05-27 11:34:13 +02:00
import java.io.Serializable;
2020-06-09 19:52:53 +02:00
import java.util.*;
import java.util.stream.Collectors;
2020-05-27 11:34:13 +02:00
2020-05-29 12:03:51 +02:00
import org.apache.commons.lang3.StringUtils;
2020-05-27 11:34:13 +02:00
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
2020-05-29 12:03:51 +02:00
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
2020-05-27 11:34:13 +02:00
public class VocabularyGroup implements Serializable {
2020-05-27 11:34:13 +02:00
2020-06-15 18:32:24 +02:00
public static final String VOCABULARIES_XQUERY = "for $x in collection('/db/DRIVER/VocabularyDSResources/VocabularyDSResourceType') \n"
2020-06-09 17:20:40 +02:00
+
"let $vocid := $x//VOCABULARY_NAME/@code\n" +
"let $vocname := $x//VOCABULARY_NAME/text()\n" +
"for $term in ($x//TERM)\n" +
"return concat($vocid,' @=@ ',$vocname,' @=@ ',$term/@code,' @=@ ',$term/@english_name)";
2020-05-29 12:03:51 +02:00
2020-06-09 17:20:40 +02:00
public static final String VOCABULARY_SYNONYMS_XQUERY = "for $x in collection('/db/DRIVER/VocabularyDSResources/VocabularyDSResourceType')\n"
+
"let $vocid := $x//VOCABULARY_NAME/@code\n" +
"let $vocname := $x//VOCABULARY_NAME/text()\n" +
"for $term in ($x//TERM)\n" +
"for $syn in ($term//SYNONYM/@term)\n" +
"return concat($vocid,' @=@ ',$term/@code,' @=@ ', $syn)\n";
public static VocabularyGroup loadVocsFromIS(ISLookUpService isLookUpService) throws ISLookUpException {
2020-05-29 12:03:51 +02:00
final VocabularyGroup vocs = new VocabularyGroup();
2020-06-09 17:20:40 +02:00
for (final String s : isLookUpService.quickSearchProfile(VOCABULARIES_XQUERY)) {
2020-05-29 12:03:51 +02:00
final String[] arr = s.split("@=@");
if (arr.length == 4) {
final String vocId = arr[0].trim();
final String vocName = arr[1].trim();
final String termId = arr[2].trim();
final String termName = arr[3].trim();
if (!vocs.vocabularyExists(vocId)) {
vocs.addVocabulary(vocId, vocName);
}
vocs.addTerm(vocId, termId, termName);
2020-06-09 17:20:40 +02:00
}
}
for (final String s : isLookUpService.quickSearchProfile(VOCABULARY_SYNONYMS_XQUERY)) {
final String[] arr = s.split("@=@");
if (arr.length == 3) {
final String vocId = arr[0].trim();
final String termId = arr[1].trim();
final String syn = arr[2].trim();
vocs.addSynonyms(vocId, termId, syn);
2020-05-29 12:03:51 +02:00
}
}
return vocs;
}
2020-05-27 11:34:13 +02:00
private final Map<String, Vocabulary> vocs = new HashMap<>();
public Set<String> vocabularyNames() {
return vocs.keySet();
}
2020-05-27 11:34:13 +02:00
public void addVocabulary(final String id, final String name) {
vocs.put(id.toLowerCase(), new Vocabulary(id, name));
}
public void addTerm(final String vocId, final String id, final String name) {
if (vocabularyExists(vocId)) {
vocs.get(vocId.toLowerCase()).addTerm(id, name);
}
}
public VocabularyTerm getTerm(final String vocId, final String id) {
if (termExists(vocId, id)) {
return vocs.get(vocId.toLowerCase()).getTerm(id);
} else {
return new VocabularyTerm(id, id);
}
}
2020-06-09 19:52:53 +02:00
public Set<String> getTerms(String vocId) {
if (!vocabularyExists(vocId)) {
return new HashSet<>();
}
return vocs
.get(vocId.toLowerCase())
.getTerms()
.values()
.stream()
2021-08-11 12:13:22 +02:00
.map(VocabularyTerm::getId)
2020-06-09 19:52:53 +02:00
.collect(Collectors.toCollection(HashSet::new));
}
2020-06-09 17:20:40 +02:00
public Qualifier lookup(String vocId, String id) {
return Optional
.ofNullable(getSynonymAsQualifier(vocId, id))
.orElse(getTermAsQualifier(vocId, id));
}
2020-05-27 11:34:13 +02:00
public Qualifier getTermAsQualifier(final String vocId, final String id) {
2020-06-12 10:45:18 +02:00
if (vocabularyExists(vocId)) {
return vocs.get(vocId.toLowerCase()).getTermAsQualifier(id);
}
return OafMapperUtils.qualifier(id, id, "", "");
2020-06-09 17:20:40 +02:00
}
public Qualifier getSynonymAsQualifier(final String vocId, final String syn) {
if (StringUtils.isBlank(vocId)) {
return OafMapperUtils.unknown("", "");
2020-05-27 11:34:13 +02:00
}
2020-06-09 17:20:40 +02:00
return vocs.get(vocId.toLowerCase()).getSynonymAsQualifier(syn);
2020-05-27 11:34:13 +02:00
}
2021-03-05 15:45:28 +01:00
/**
* getSynonymAsQualifierCaseSensitive
*
* refelects the situation to check caseSensitive vocabulary
*/
public Qualifier getSynonymAsQualifierCaseSensitive(final String vocId, final String syn) {
if (StringUtils.isBlank(vocId)) {
return OafMapperUtils.unknown("", "");
}
return vocs.get(vocId).getSynonymAsQualifier(syn);
}
/**
* termExists
*
* two methods: without and with caseSensitive check
*/
2020-05-27 11:34:13 +02:00
public boolean termExists(final String vocId, final String id) {
2021-03-05 15:45:28 +01:00
return termExists(vocId, id, Boolean.FALSE);
}
public boolean termExists(final String vocId, final String id, final Boolean caseSensitive) {
if (Boolean.TRUE.equals(caseSensitive)) {
return vocabularyExists(vocId) && vocs.get(vocId).termExists(id);
}
2020-05-27 11:34:13 +02:00
return vocabularyExists(vocId) && vocs.get(vocId.toLowerCase()).termExists(id);
}
public boolean vocabularyExists(final String vocId) {
return Optional
.ofNullable(vocId)
.map(String::toLowerCase)
2021-08-11 12:13:22 +02:00
.map(vocs::containsKey)
.orElse(false);
2020-05-27 11:34:13 +02:00
}
2020-06-09 17:20:40 +02:00
private void addSynonyms(final String vocId, final String termId, final String syn) {
String id = Optional
.ofNullable(vocId)
2021-08-11 12:13:22 +02:00
.map(String::toLowerCase)
2020-06-09 17:20:40 +02:00
.orElseThrow(
2021-08-11 12:13:22 +02:00
() -> new IllegalArgumentException(
String
.format(
"empty vocabulary id for [term:%s, synonym:%s]", termId, syn)));
2020-06-09 17:20:40 +02:00
Optional
.ofNullable(vocs.get(id))
.orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId))
2020-06-15 18:32:24 +02:00
.addSynonym(syn.toLowerCase(), termId);
2020-06-09 17:20:40 +02:00
}
2020-05-27 11:34:13 +02:00
}