forked from D-Net/dnet-hadoop
Merge branch 'master' of code-repo.d4science.org:D-Net/dnet-hadoop
This commit is contained in:
commit
113c9b1de0
|
@ -126,9 +126,16 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
|
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
|
||||||
url.add(((Node) o).getText().trim());
|
url.add(((Node) o).getText().trim());
|
||||||
}
|
}
|
||||||
|
for (final Object o : doc
|
||||||
|
.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='landingPage']")) {
|
||||||
|
url.add(((Node) o).getText().trim());
|
||||||
|
}
|
||||||
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='URL']")) {
|
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='URL']")) {
|
||||||
url.add(((Node) o).getText().trim());
|
url.add(((Node) o).getText().trim());
|
||||||
}
|
}
|
||||||
|
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='landingPage']")) {
|
||||||
|
url.add(((Node) o).getText().trim());
|
||||||
|
}
|
||||||
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='DOI']")) {
|
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='DOI']")) {
|
||||||
url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
|
url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
|
||||||
}
|
}
|
||||||
|
@ -367,11 +374,13 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
res
|
res
|
||||||
.addAll(
|
.addAll(
|
||||||
prepareListStructPropsWithValidQualifier(
|
prepareListStructPropsWithValidQualifier(
|
||||||
doc, "//datacite:identifier[@identifierType != 'URL']", "@identifierType", DNET_PID_TYPES, info));
|
doc, "//datacite:identifier[@identifierType != 'URL' and @identifierType != 'landingPage']",
|
||||||
|
"@identifierType", DNET_PID_TYPES, info));
|
||||||
res
|
res
|
||||||
.addAll(
|
.addAll(
|
||||||
prepareListStructPropsWithValidQualifier(
|
prepareListStructPropsWithValidQualifier(
|
||||||
doc, "//datacite:alternateIdentifier[@alternateIdentifierType != 'URL']",
|
doc,
|
||||||
|
"//datacite:alternateIdentifier[@alternateIdentifierType != 'URL' and @alternateIdentifierType != 'landingPage']",
|
||||||
"@alternateIdentifierType", DNET_PID_TYPES, info));
|
"@alternateIdentifierType", DNET_PID_TYPES, info));
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,7 +61,7 @@ public class Vocabulary implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public VocabularyTerm getTermBySynonym(final String syn) {
|
public VocabularyTerm getTermBySynonym(final String syn) {
|
||||||
return getTerm(synonyms.get(syn));
|
return getTerm(synonyms.get(syn.toLowerCase()));
|
||||||
}
|
}
|
||||||
|
|
||||||
public Qualifier getTermAsQualifier(final String termId) {
|
public Qualifier getTermAsQualifier(final String termId) {
|
||||||
|
|
|
@ -3,7 +3,6 @@ package eu.dnetlib.dhp.oa.graph.raw.common;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.function.Supplier;
|
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
@ -14,7 +13,7 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
public class VocabularyGroup implements Serializable {
|
public class VocabularyGroup implements Serializable {
|
||||||
|
|
||||||
public static final String VOCABULARIES_XQUERY = "for $x in collection(' /db/DRIVER/VocabularyDSResources/VocabularyDSResourceType') \n"
|
public static final String VOCABULARIES_XQUERY = "for $x in collection('/db/DRIVER/VocabularyDSResources/VocabularyDSResourceType') \n"
|
||||||
+
|
+
|
||||||
"let $vocid := $x//VOCABULARY_NAME/@code\n" +
|
"let $vocid := $x//VOCABULARY_NAME/@code\n" +
|
||||||
"let $vocname := $x//VOCABULARY_NAME/text()\n" +
|
"let $vocname := $x//VOCABULARY_NAME/text()\n" +
|
||||||
|
@ -46,7 +45,7 @@ public class VocabularyGroup implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
vocs.addTerm(vocId, termId, termName);
|
vocs.addTerm(vocId, termId, termName);
|
||||||
vocs.addSynonyms(vocId, termId, termId);
|
// vocs.addSynonyms(vocId, termId, termId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -58,7 +57,7 @@ public class VocabularyGroup implements Serializable {
|
||||||
final String syn = arr[2].trim();
|
final String syn = arr[2].trim();
|
||||||
|
|
||||||
vocs.addSynonyms(vocId, termId, syn);
|
vocs.addSynonyms(vocId, termId, syn);
|
||||||
vocs.addSynonyms(vocId, termId, termId);
|
// vocs.addSynonyms(vocId, termId, termId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -135,7 +134,7 @@ public class VocabularyGroup implements Serializable {
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(vocs.get(id))
|
.ofNullable(vocs.get(id))
|
||||||
.orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId))
|
.orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId))
|
||||||
.addSynonym(syn, termId);
|
.addSynonym(syn.toLowerCase(), termId);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
DROP VIEW IF EXISTS ${hiveDbName}.result;
|
DROP VIEW IF EXISTS ${hiveDbName}.result;
|
||||||
|
|
||||||
CREATE VIEW IF NOT EXISTS result as
|
CREATE VIEW IF NOT EXISTS result as
|
||||||
select id, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, instance from ${hiveDbName}.publication p
|
select id, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, externalreference, instance from ${hiveDbName}.publication p
|
||||||
union all
|
union all
|
||||||
select id, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, instance from ${hiveDbName}.dataset d
|
select id, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, externalreference, instance from ${hiveDbName}.dataset d
|
||||||
union all
|
union all
|
||||||
select id, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, instance from ${hiveDbName}.software s
|
select id, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, externalreference, instance from ${hiveDbName}.software s
|
||||||
union all
|
union all
|
||||||
select id, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, instance from ${hiveDbName}.otherresearchproduct o;
|
select id, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, embargoenddate, resourcetype, context, externalreference, instance from ${hiveDbName}.otherresearchproduct o;
|
||||||
|
|
|
@ -60,8 +60,8 @@ public class CleaningFunctionTest {
|
||||||
|
|
||||||
assertNotNull(p_out);
|
assertNotNull(p_out);
|
||||||
|
|
||||||
assertEquals("eng", p_out.getLanguage().getClassid());
|
assertEquals("und", p_out.getLanguage().getClassid());
|
||||||
assertEquals("English", p_out.getLanguage().getClassname());
|
assertEquals("Undetermined", p_out.getLanguage().getClassname());
|
||||||
|
|
||||||
assertEquals("0018", p_out.getInstance().get(0).getInstancetype().getClassid());
|
assertEquals("0018", p_out.getInstance().get(0).getInstancetype().getClassid());
|
||||||
assertEquals("Annotation", p_out.getInstance().get(0).getInstancetype().getClassname());
|
assertEquals("Annotation", p_out.getInstance().get(0).getInstancetype().getClassname());
|
||||||
|
|
|
@ -281,8 +281,8 @@
|
||||||
"value": "VIRTA"
|
"value": "VIRTA"
|
||||||
},
|
},
|
||||||
"instancetype": {
|
"instancetype": {
|
||||||
"classid": "Comentario",
|
"classid": "Comment/debate",
|
||||||
"classname": "Comentario",
|
"classname": "Comment/debate",
|
||||||
"schemeid": "dnet:publication_resource",
|
"schemeid": "dnet:publication_resource",
|
||||||
"schemename": "dnet:publication_resource"
|
"schemename": "dnet:publication_resource"
|
||||||
},
|
},
|
||||||
|
@ -317,8 +317,8 @@
|
||||||
"vol": ""
|
"vol": ""
|
||||||
},
|
},
|
||||||
"language": {
|
"language": {
|
||||||
"classid": "en",
|
"classid": "UNKNOWN",
|
||||||
"classname": "en",
|
"classname": "UNKNOWN",
|
||||||
"schemeid": "dnet:languages",
|
"schemeid": "dnet:languages",
|
||||||
"schemename": "dnet:languages"
|
"schemename": "dnet:languages"
|
||||||
},
|
},
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue