forked from antonis.lempesis/dnet-hadoop
introduced mapping for ExternalReferences, made urls defined within an instance unique
This commit is contained in:
parent
8c67073a07
commit
62ea19f1d3
|
@ -49,6 +49,9 @@ public class ModelConstants {
|
||||||
public static final String HAS_PARTICIPANT = "hasParticipant";
|
public static final String HAS_PARTICIPANT = "hasParticipant";
|
||||||
public static final String IS_PARTICIPANT = "isParticipant";
|
public static final String IS_PARTICIPANT = "isParticipant";
|
||||||
|
|
||||||
|
public static final String UNKNOWN = "UNKNOWN";
|
||||||
|
public static final String NOT_AVAILABLE = "not available";
|
||||||
|
|
||||||
public static final Qualifier PUBLICATION_DEFAULT_RESULTTYPE = qualifier(
|
public static final Qualifier PUBLICATION_DEFAULT_RESULTTYPE = qualifier(
|
||||||
PUBLICATION_RESULTTYPE_CLASSID, PUBLICATION_RESULTTYPE_CLASSID,
|
PUBLICATION_RESULTTYPE_CLASSID, PUBLICATION_RESULTTYPE_CLASSID,
|
||||||
DNET_RESULT_TYPOLOGIES, DNET_RESULT_TYPOLOGIES);
|
DNET_RESULT_TYPOLOGIES, DNET_RESULT_TYPOLOGIES);
|
||||||
|
|
|
@ -1,12 +1,10 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.migration;
|
package eu.dnetlib.dhp.actionmanager.migration;
|
||||||
|
|
||||||
import static eu.dnetlib.data.proto.KindProtos.Kind.entity;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
import static eu.dnetlib.data.proto.KindProtos.Kind.relation;
|
|
||||||
import static eu.dnetlib.data.proto.TypeProtos.*;
|
|
||||||
import static eu.dnetlib.data.proto.TypeProtos.Type.*;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
@ -21,10 +19,6 @@ import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
public class ProtoConverter implements Serializable {
|
public class ProtoConverter implements Serializable {
|
||||||
|
|
||||||
public static final String UNKNOWN = "UNKNOWN";
|
|
||||||
public static final String NOT_AVAILABLE = "not available";
|
|
||||||
public static final String DNET_ACCESS_MODES = "dnet:access_modes";
|
|
||||||
|
|
||||||
public static Oaf convert(OafProtos.Oaf oaf) {
|
public static Oaf convert(OafProtos.Oaf oaf) {
|
||||||
try {
|
try {
|
||||||
switch (oaf.getKind()) {
|
switch (oaf.getKind()) {
|
||||||
|
@ -64,6 +58,7 @@ public class ProtoConverter implements Serializable {
|
||||||
case result:
|
case result:
|
||||||
final Result r = convertResult(oaf);
|
final Result r = convertResult(oaf);
|
||||||
r.setInstance(convertInstances(oaf));
|
r.setInstance(convertInstances(oaf));
|
||||||
|
r.setExternalReference(convertExternalRefs(oaf));
|
||||||
return r;
|
return r;
|
||||||
case project:
|
case project:
|
||||||
return convertProject(oaf);
|
return convertProject(oaf);
|
||||||
|
@ -94,13 +89,44 @@ public class ProtoConverter implements Serializable {
|
||||||
i.setHostedby(mapKV(ri.getHostedby()));
|
i.setHostedby(mapKV(ri.getHostedby()));
|
||||||
i.setInstancetype(mapQualifier(ri.getInstancetype()));
|
i.setInstancetype(mapQualifier(ri.getInstancetype()));
|
||||||
i.setLicense(mapStringField(ri.getLicense()));
|
i.setLicense(mapStringField(ri.getLicense()));
|
||||||
i.setUrl(ri.getUrlList());
|
i
|
||||||
|
.setUrl(
|
||||||
|
ri.getUrlList() != null ? ri
|
||||||
|
.getUrlList()
|
||||||
|
.stream()
|
||||||
|
.distinct()
|
||||||
|
.collect(Collectors.toCollection(ArrayList::new)) : null);
|
||||||
i.setRefereed(mapStringField(ri.getRefereed()));
|
i.setRefereed(mapStringField(ri.getRefereed()));
|
||||||
i.setProcessingchargeamount(mapStringField(ri.getProcessingchargeamount()));
|
i.setProcessingchargeamount(mapStringField(ri.getProcessingchargeamount()));
|
||||||
i.setProcessingchargecurrency(mapStringField(ri.getProcessingchargecurrency()));
|
i.setProcessingchargecurrency(mapStringField(ri.getProcessingchargecurrency()));
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static List<ExternalReference> convertExternalRefs(OafProtos.Oaf oaf) {
|
||||||
|
ResultProtos.Result r = oaf.getEntity().getResult();
|
||||||
|
if (r.getExternalReferenceCount() > 0) {
|
||||||
|
return r
|
||||||
|
.getExternalReferenceList()
|
||||||
|
.stream()
|
||||||
|
.map(e -> convertExtRef(e))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
return Lists.newArrayList();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static ExternalReference convertExtRef(ResultProtos.Result.ExternalReference e) {
|
||||||
|
ExternalReference ex = new ExternalReference();
|
||||||
|
ex.setUrl(e.getUrl());
|
||||||
|
ex.setSitename(e.getSitename());
|
||||||
|
ex.setRefidentifier(e.getRefidentifier());
|
||||||
|
ex.setQuery(e.getQuery());
|
||||||
|
ex.setQualifier(mapQualifier(e.getQualifier()));
|
||||||
|
ex.setLabel(e.getLabel());
|
||||||
|
ex.setDescription(e.getDescription());
|
||||||
|
ex.setDataInfo(ex.getDataInfo());
|
||||||
|
return ex;
|
||||||
|
}
|
||||||
|
|
||||||
private static Organization convertOrganization(OafProtos.Oaf oaf) {
|
private static Organization convertOrganization(OafProtos.Oaf oaf) {
|
||||||
final OrganizationProtos.Organization.Metadata m = oaf.getEntity().getOrganization().getMetadata();
|
final OrganizationProtos.Organization.Metadata m = oaf.getEntity().getOrganization().getMetadata();
|
||||||
final Organization org = setOaf(new Organization(), oaf);
|
final Organization org = setOaf(new Organization(), oaf);
|
||||||
|
|
|
@ -5,10 +5,8 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Arrays;
|
import java.util.stream.Collectors;
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
|
@ -115,12 +113,17 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
.setProcessingchargecurrency(
|
.setProcessingchargecurrency(
|
||||||
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
||||||
|
|
||||||
for (final Object o : doc.selectNodes("//dc:identifier")) {
|
List<Node> nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier"));
|
||||||
final String url = ((Node) o).getText().trim();
|
instance
|
||||||
if (url.startsWith("http")) {
|
.setUrl(
|
||||||
instance.setUrl(Arrays.asList(url));
|
nodes
|
||||||
}
|
.stream()
|
||||||
}
|
.filter(n -> StringUtils.isNotBlank(n.getText()))
|
||||||
|
.map(n -> n.getText().trim())
|
||||||
|
.filter(u -> u.startsWith("http"))
|
||||||
|
.distinct()
|
||||||
|
.collect(Collectors.toCollection(ArrayList::new)));
|
||||||
|
|
||||||
return Lists.newArrayList(instance);
|
return Lists.newArrayList(instance);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -6,10 +6,7 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
|
@ -80,6 +77,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
final KeyValue hostedby) {
|
final KeyValue hostedby) {
|
||||||
|
|
||||||
final Instance instance = new Instance();
|
final Instance instance = new Instance();
|
||||||
|
final Set<String> url = new HashSet<>();
|
||||||
instance.setUrl(new ArrayList<>());
|
instance.setUrl(new ArrayList<>());
|
||||||
instance
|
instance
|
||||||
.setInstancetype(
|
.setInstancetype(
|
||||||
|
@ -100,17 +98,18 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
||||||
|
|
||||||
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
|
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
|
||||||
instance.getUrl().add(((Node) o).getText().trim());
|
url.add(((Node) o).getText().trim());
|
||||||
}
|
}
|
||||||
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='URL']")) {
|
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='URL']")) {
|
||||||
instance.getUrl().add(((Node) o).getText().trim());
|
url.add(((Node) o).getText().trim());
|
||||||
}
|
}
|
||||||
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='DOI']")) {
|
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='DOI']")) {
|
||||||
instance.getUrl().add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
|
url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
|
||||||
}
|
}
|
||||||
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) {
|
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) {
|
||||||
instance.getUrl().add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
|
url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
|
||||||
}
|
}
|
||||||
|
instance.getUrl().addAll(url);
|
||||||
return Arrays.asList(instance);
|
return Arrays.asList(instance);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue