forked from antonis.lempesis/dnet-hadoop
introduced mapping for ExternalReferences, made urls defined within an instance unique
This commit is contained in:
parent
8c67073a07
commit
62ea19f1d3
|
@ -49,6 +49,9 @@ public class ModelConstants {
|
|||
public static final String HAS_PARTICIPANT = "hasParticipant";
|
||||
public static final String IS_PARTICIPANT = "isParticipant";
|
||||
|
||||
public static final String UNKNOWN = "UNKNOWN";
|
||||
public static final String NOT_AVAILABLE = "not available";
|
||||
|
||||
public static final Qualifier PUBLICATION_DEFAULT_RESULTTYPE = qualifier(
|
||||
PUBLICATION_RESULTTYPE_CLASSID, PUBLICATION_RESULTTYPE_CLASSID,
|
||||
DNET_RESULT_TYPOLOGIES, DNET_RESULT_TYPOLOGIES);
|
||||
|
|
|
@ -1,12 +1,10 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.migration;
|
||||
|
||||
import static eu.dnetlib.data.proto.KindProtos.Kind.entity;
|
||||
import static eu.dnetlib.data.proto.KindProtos.Kind.relation;
|
||||
import static eu.dnetlib.data.proto.TypeProtos.*;
|
||||
import static eu.dnetlib.data.proto.TypeProtos.Type.*;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
@ -21,10 +19,6 @@ import eu.dnetlib.dhp.schema.oaf.*;
|
|||
|
||||
public class ProtoConverter implements Serializable {
|
||||
|
||||
public static final String UNKNOWN = "UNKNOWN";
|
||||
public static final String NOT_AVAILABLE = "not available";
|
||||
public static final String DNET_ACCESS_MODES = "dnet:access_modes";
|
||||
|
||||
public static Oaf convert(OafProtos.Oaf oaf) {
|
||||
try {
|
||||
switch (oaf.getKind()) {
|
||||
|
@ -64,6 +58,7 @@ public class ProtoConverter implements Serializable {
|
|||
case result:
|
||||
final Result r = convertResult(oaf);
|
||||
r.setInstance(convertInstances(oaf));
|
||||
r.setExternalReference(convertExternalRefs(oaf));
|
||||
return r;
|
||||
case project:
|
||||
return convertProject(oaf);
|
||||
|
@ -94,13 +89,44 @@ public class ProtoConverter implements Serializable {
|
|||
i.setHostedby(mapKV(ri.getHostedby()));
|
||||
i.setInstancetype(mapQualifier(ri.getInstancetype()));
|
||||
i.setLicense(mapStringField(ri.getLicense()));
|
||||
i.setUrl(ri.getUrlList());
|
||||
i
|
||||
.setUrl(
|
||||
ri.getUrlList() != null ? ri
|
||||
.getUrlList()
|
||||
.stream()
|
||||
.distinct()
|
||||
.collect(Collectors.toCollection(ArrayList::new)) : null);
|
||||
i.setRefereed(mapStringField(ri.getRefereed()));
|
||||
i.setProcessingchargeamount(mapStringField(ri.getProcessingchargeamount()));
|
||||
i.setProcessingchargecurrency(mapStringField(ri.getProcessingchargecurrency()));
|
||||
return i;
|
||||
}
|
||||
|
||||
private static List<ExternalReference> convertExternalRefs(OafProtos.Oaf oaf) {
|
||||
ResultProtos.Result r = oaf.getEntity().getResult();
|
||||
if (r.getExternalReferenceCount() > 0) {
|
||||
return r
|
||||
.getExternalReferenceList()
|
||||
.stream()
|
||||
.map(e -> convertExtRef(e))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
return Lists.newArrayList();
|
||||
}
|
||||
|
||||
private static ExternalReference convertExtRef(ResultProtos.Result.ExternalReference e) {
|
||||
ExternalReference ex = new ExternalReference();
|
||||
ex.setUrl(e.getUrl());
|
||||
ex.setSitename(e.getSitename());
|
||||
ex.setRefidentifier(e.getRefidentifier());
|
||||
ex.setQuery(e.getQuery());
|
||||
ex.setQualifier(mapQualifier(e.getQualifier()));
|
||||
ex.setLabel(e.getLabel());
|
||||
ex.setDescription(e.getDescription());
|
||||
ex.setDataInfo(ex.getDataInfo());
|
||||
return ex;
|
||||
}
|
||||
|
||||
private static Organization convertOrganization(OafProtos.Oaf oaf) {
|
||||
final OrganizationProtos.Organization.Metadata m = oaf.getEntity().getOrganization().getMetadata();
|
||||
final Organization org = setOaf(new Organization(), oaf);
|
||||
|
|
|
@ -5,10 +5,8 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId
|
|||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.dom4j.Document;
|
||||
|
@ -115,12 +113,17 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
|||
.setProcessingchargecurrency(
|
||||
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
||||
|
||||
for (final Object o : doc.selectNodes("//dc:identifier")) {
|
||||
final String url = ((Node) o).getText().trim();
|
||||
if (url.startsWith("http")) {
|
||||
instance.setUrl(Arrays.asList(url));
|
||||
}
|
||||
}
|
||||
List<Node> nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier"));
|
||||
instance
|
||||
.setUrl(
|
||||
nodes
|
||||
.stream()
|
||||
.filter(n -> StringUtils.isNotBlank(n.getText()))
|
||||
.map(n -> n.getText().trim())
|
||||
.filter(u -> u.startsWith("http"))
|
||||
.distinct()
|
||||
.collect(Collectors.toCollection(ArrayList::new)));
|
||||
|
||||
return Lists.newArrayList(instance);
|
||||
}
|
||||
|
||||
|
|
|
@ -6,10 +6,7 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
|
|||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.dom4j.Document;
|
||||
|
@ -80,6 +77,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
|||
final KeyValue hostedby) {
|
||||
|
||||
final Instance instance = new Instance();
|
||||
final Set<String> url = new HashSet<>();
|
||||
instance.setUrl(new ArrayList<>());
|
||||
instance
|
||||
.setInstancetype(
|
||||
|
@ -100,17 +98,18 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
|||
field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
||||
|
||||
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='URL']")) {
|
||||
instance.getUrl().add(((Node) o).getText().trim());
|
||||
url.add(((Node) o).getText().trim());
|
||||
}
|
||||
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='URL']")) {
|
||||
instance.getUrl().add(((Node) o).getText().trim());
|
||||
url.add(((Node) o).getText().trim());
|
||||
}
|
||||
for (final Object o : doc.selectNodes("//datacite:alternateIdentifier[@alternateIdentifierType='DOI']")) {
|
||||
instance.getUrl().add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
|
||||
url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
|
||||
}
|
||||
for (final Object o : doc.selectNodes("//datacite:identifier[@identifierType='DOI']")) {
|
||||
instance.getUrl().add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
|
||||
url.add(HTTP_DX_DOI_PREIFX + ((Node) o).getText().trim());
|
||||
}
|
||||
instance.getUrl().addAll(url);
|
||||
return Arrays.asList(instance);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue