From 52e2315ba270384704f6839c5c95c7cd6b68df95 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 28 Jul 2021 10:23:00 +0200 Subject: [PATCH 1/6] removed trick for datasourcetypeui --- .../dhp/oa/dedup/GroupEntitiesSparkJob.java | 3 +- .../dhp/oa/provision/XmlConverterJob.java | 73 +-- .../oa/provision/utils/XmlRecordFactory.java | 447 ++++++++---------- .../provision/input_params_xml_converter.json | 6 - .../dhp/oa/provision/oozie_app/workflow.xml | 5 - .../provision/IndexRecordTransformerTest.java | 36 +- .../oa/provision/XmlRecordFactoryTest.java | 70 ++- pom.xml | 2 +- 8 files changed, 278 insertions(+), 364 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java index 3f27b94422..58009bfcfc 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java @@ -38,8 +38,7 @@ import scala.Tuple2; /** * Groups the graph content by entity identifier to ensure ID uniqueness */ -public class -GroupEntitiesSparkJob { +public class GroupEntitiesSparkJob { private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java index b44ed7446f..518f411204 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java @@ -22,7 +22,6 @@ import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Maps; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -42,61 +41,51 @@ public class XmlConverterJob { public static final String schemaLocation = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd"; - public static void main(String[] args) throws Exception { + public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( XmlConverterJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json"))); + .getResourceAsStream("/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json"))); parser.parseArgument(args); - Boolean isSparkSessionManaged = Optional + final Boolean isSparkSessionManaged = Optional .ofNullable(parser.get("isSparkSessionManaged")) .map(Boolean::valueOf) .orElse(Boolean.TRUE); log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - String inputPath = parser.get("inputPath"); + final String inputPath = parser.get("inputPath"); log.info("inputPath: {}", inputPath); - String outputPath = parser.get("outputPath"); + final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - String isLookupUrl = parser.get("isLookupUrl"); + final String isLookupUrl = parser.get("isLookupUrl"); log.info("isLookupUrl: {}", isLookupUrl); - String otherDsTypeId = parser.get("otherDsTypeId"); - log.info("otherDsTypeId: {}", otherDsTypeId); - - SparkConf conf = new SparkConf(); + final SparkConf conf = new SparkConf(); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.registerKryoClasses(ProvisionModelSupport.getModelClasses()); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - removeOutputDir(spark, outputPath); - convertToXml( - spark, inputPath, outputPath, ContextMapper.fromIS(isLookupUrl), otherDsTypeId); - }); + runWithSparkSession(conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, outputPath); + convertToXml(spark, inputPath, outputPath, ContextMapper.fromIS(isLookupUrl)); + }); } private static void convertToXml( - SparkSession spark, - String inputPath, - String outputPath, - ContextMapper contextMapper, - String otherDsTypeId) { + final SparkSession spark, + final String inputPath, + final String outputPath, + final ContextMapper contextMapper) { final XmlRecordFactory recordFactory = new XmlRecordFactory( prepareAccumulators(spark.sparkContext()), contextMapper, false, - schemaLocation, - otherDsTypeId); + schemaLocation); final List paths = HdfsSupport .listFiles(inputPath, spark.sparkContext().hadoopConfiguration()); @@ -116,16 +105,15 @@ public class XmlConverterJob { .mapToPair( (PairFunction, Text, Text>) t -> new Tuple2<>(new Text(t._1()), new Text(t._2()))) - .saveAsHadoopFile( - outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class); + .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class); } - private static void removeOutputDir(SparkSession spark, String path) { + private static void removeOutputDir(final SparkSession spark, final String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } - private static Map prepareAccumulators(SparkContext sc) { - Map accumulators = Maps.newHashMap(); + private static Map prepareAccumulators(final SparkContext sc) { + final Map accumulators = Maps.newHashMap(); accumulators .put( "resultResult_similarity_isAmongTopNSimilarDocuments", @@ -136,15 +124,13 @@ public class XmlConverterJob { sc.longAccumulator("resultResult_similarity_hasAmongTopNSimilarDocuments")); accumulators .put( - "resultResult_supplement_isSupplementTo", - sc.longAccumulator("resultResult_supplement_isSupplementTo")); + "resultResult_supplement_isSupplementTo", sc.longAccumulator("resultResult_supplement_isSupplementTo")); accumulators .put( "resultResult_supplement_isSupplementedBy", sc.longAccumulator("resultResult_supplement_isSupplementedBy")); accumulators - .put( - "resultResult_dedup_isMergedIn", sc.longAccumulator("resultResult_dedup_isMergedIn")); + .put("resultResult_dedup_isMergedIn", sc.longAccumulator("resultResult_dedup_isMergedIn")); accumulators.put("resultResult_dedup_merges", sc.longAccumulator("resultResult_dedup_merges")); accumulators @@ -152,16 +138,11 @@ public class XmlConverterJob { "resultResult_publicationDataset_isRelatedTo", sc.longAccumulator("resultResult_publicationDataset_isRelatedTo")); accumulators - .put( - "resultResult_relationship_isRelatedTo", - sc.longAccumulator("resultResult_relationship_isRelatedTo")); + .put("resultResult_relationship_isRelatedTo", sc.longAccumulator("resultResult_relationship_isRelatedTo")); accumulators - .put( - "resultProject_outcome_isProducedBy", - sc.longAccumulator("resultProject_outcome_isProducedBy")); + .put("resultProject_outcome_isProducedBy", sc.longAccumulator("resultProject_outcome_isProducedBy")); accumulators - .put( - "resultProject_outcome_produces", sc.longAccumulator("resultProject_outcome_produces")); + .put("resultProject_outcome_produces", sc.longAccumulator("resultProject_outcome_produces")); accumulators .put( "resultOrganization_affiliation_isAuthorInstitutionOf", @@ -184,9 +165,7 @@ public class XmlConverterJob { "organizationOrganization_dedup_isMergedIn", sc.longAccumulator("organizationOrganization_dedup_isMergedIn")); accumulators - .put( - "organizationOrganization_dedup_merges", - sc.longAccumulator("resultProject_outcome_produces")); + .put("organizationOrganization_dedup_merges", sc.longAccumulator("resultProject_outcome_produces")); accumulators .put( "datasourceOrganization_provision_isProvidedBy", diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index a985d23718..392d3cde6d 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -1,7 +1,8 @@ package eu.dnetlib.dhp.oa.provision.utils; -import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.*; +import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.authorPidTypes; +import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.getRelDescriptor; import static org.apache.commons.lang3.StringUtils.isNotBlank; import static org.apache.commons.lang3.StringUtils.substringBefore; @@ -9,14 +10,23 @@ import java.io.IOException; import java.io.Serializable; import java.io.StringReader; import java.io.StringWriter; -import java.util.*; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; -import javax.xml.transform.*; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerConfigurationException; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; -import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import org.apache.commons.lang3.StringUtils; import org.apache.spark.util.LongAccumulator; import org.dom4j.Document; @@ -36,19 +46,38 @@ import com.google.common.collect.Sets; import com.mycila.xmltool.XMLDoc; import com.mycila.xmltool.XMLTag; -import eu.dnetlib.dhp.oa.provision.model.*; +import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; +import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; +import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.MainEntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.ExternalReference; +import eu.dnetlib.dhp.schema.oaf.ExtraInfo; +import eu.dnetlib.dhp.schema.oaf.Instance; +import eu.dnetlib.dhp.schema.oaf.Journal; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.Software; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class XmlRecordFactory implements Serializable { - private final Map accumulators; + /** + * + */ + private static final long serialVersionUID = 2912912999272373172L; - private final Set specialDatasourceTypes; + private final Map accumulators; private final ContextMapper contextMapper; @@ -61,23 +90,20 @@ public class XmlRecordFactory implements Serializable { public XmlRecordFactory( final ContextMapper contextMapper, final boolean indent, - final String schemaLocation, - final String otherDatasourceTypesUForUI) { + final String schemaLocation) { - this(Maps.newHashMap(), contextMapper, indent, schemaLocation, otherDatasourceTypesUForUI); + this(Maps.newHashMap(), contextMapper, indent, schemaLocation); } public XmlRecordFactory( final Map accumulators, final ContextMapper contextMapper, final boolean indent, - final String schemaLocation, - final String otherDatasourceTypesUForUI) { + final String schemaLocation) { this.accumulators = accumulators; this.contextMapper = contextMapper; this.schemaLocation = schemaLocation; - this.specialDatasourceTypes = Sets.newHashSet(Splitter.on(",").trimResults().split(otherDatasourceTypesUForUI)); this.indent = indent; } @@ -87,8 +113,8 @@ public class XmlRecordFactory implements Serializable { final Set contexts = Sets.newHashSet(); // final OafEntity entity = toOafEntity(je.getEntity()); - OafEntity entity = je.getEntity(); - TemplateFactory templateFactory = new TemplateFactory(); + final OafEntity entity = je.getEntity(); + final TemplateFactory templateFactory = new TemplateFactory(); try { final EntityType type = EntityType.fromClass(entity.getClass()); @@ -110,11 +136,7 @@ public class XmlRecordFactory implements Serializable { final String body = templateFactory .buildBody( - mainType, - metadata, - relations, - listChildren(entity, je, templateFactory), - listExtraInfo(entity)); + mainType, metadata, relations, listChildren(entity, je, templateFactory), listExtraInfo(entity)); return printXML(templateFactory.buildRecord(entity, schemaLocation, body), indent); } catch (final Throwable e) { @@ -142,19 +164,19 @@ public class XmlRecordFactory implements Serializable { default: throw new IllegalArgumentException("invalid type: " + type); } - } catch (IOException e) { + } catch (final IOException e) { throw new IllegalArgumentException(e); } } - private String printXML(String xml, boolean indent) { + private String printXML(final String xml, final boolean indent) { try { final Document doc = new SAXReader().read(new StringReader(xml)); - OutputFormat format = indent ? OutputFormat.createPrettyPrint() : OutputFormat.createCompactFormat(); + final OutputFormat format = indent ? OutputFormat.createPrettyPrint() : OutputFormat.createCompactFormat(); format.setExpandEmptyElements(false); format.setSuppressDeclaration(true); - StringWriter sw = new StringWriter(); - XMLWriter writer = new XMLWriter(sw, format); + final StringWriter sw = new StringWriter(); + final XMLWriter writer = new XMLWriter(sw, format); writer.write(doc); return sw.toString(); } catch (IOException | DocumentException e) { @@ -163,7 +185,9 @@ public class XmlRecordFactory implements Serializable { } private List metadata( - final EntityType type, final OafEntity entity, final Set contexts) { + final EntityType type, + final OafEntity entity, + final Set contexts) { final List metadata = Lists.newArrayList(); @@ -230,72 +254,63 @@ public class XmlRecordFactory implements Serializable { .getAuthor() .stream() .filter(Objects::nonNull) - .map( - a -> { - final StringBuilder sb = new StringBuilder(" isNotBlank(sp.getQualifier().getClassid()) - && isNotBlank(sp.getValue())) - .collect( - Collectors - .toMap( - p -> getAuthorPidType(p.getQualifier().getClassid()), - p -> p, - (p1, p2) -> p1)) - .values() - .stream() - .collect( - Collectors - .groupingBy( - p -> p.getValue(), - Collectors - .mapping( - p -> p, - Collectors.minBy(new AuthorPidTypeComparator())))) - .values() - .stream() - .map(op -> op.get()) - .forEach( - sp -> { - String pidType = getAuthorPidType(sp.getQualifier().getClassid()); - String pidValue = XmlSerializationUtils.escapeXml(sp.getValue()); - - // ugly hack: some records provide swapped pidtype and pidvalue - if (authorPidTypes.contains(pidValue.toLowerCase().trim())) { - sb.append(String.format(" %s=\"%s\"", pidValue, pidType)); - } else { - if (isNotBlank(pidType)) { - sb - .append( - String - .format( - " %s=\"%s\"", - pidType, - pidValue - .toLowerCase() - .replaceAll("^.*orcid\\.org\\/", ""))); - } - } - }); - } + .map(a -> { + final StringBuilder sb = new StringBuilder("" + XmlSerializationUtils.escapeXml(a.getFullname()) + ""); - return sb.toString(); - }) + .append(" surname=\"" + XmlSerializationUtils.escapeXml(a.getSurname()) + "\""); + } + if (a.getPid() != null) { + a + .getPid() + .stream() + .filter(Objects::nonNull) + .filter( + sp -> isNotBlank(sp.getQualifier().getClassid()) + && isNotBlank(sp.getValue())) + .collect( + Collectors + .toMap( + p -> getAuthorPidType(p.getQualifier().getClassid()), p -> p, + (p1, p2) -> p1)) + .values() + .stream() + .collect( + Collectors + .groupingBy( + p -> p.getValue(), Collectors + .mapping( + p -> p, Collectors.minBy(new AuthorPidTypeComparator())))) + .values() + .stream() + .map(op -> op.get()) + .forEach(sp -> { + final String pidType = getAuthorPidType(sp.getQualifier().getClassid()); + final String pidValue = XmlSerializationUtils.escapeXml(sp.getValue()); + + // ugly hack: some records provide swapped pidtype and pidvalue + if (authorPidTypes.contains(pidValue.toLowerCase().trim())) { + sb.append(String.format(" %s=\"%s\"", pidValue, pidType)); + } else { + if (isNotBlank(pidType)) { + sb + .append( + String + .format( + " %s=\"%s\"", pidType, pidValue + .toLowerCase() + .replaceAll("^.*orcid\\.org\\/", ""))); + } + } + }); + } + sb + .append(">" + XmlSerializationUtils.escapeXml(a.getFullname()) + ""); + return sb.toString(); + }) .collect(Collectors.toList())); } if (r.getContributor() != null) { @@ -332,8 +347,7 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "dateofacceptance", r.getDateofacceptance().getValue())); + .asXmlElement("dateofacceptance", r.getDateofacceptance().getValue())); } if (r.getDescription() != null) { metadata @@ -347,8 +361,7 @@ public class XmlRecordFactory implements Serializable { } if (r.getEmbargoenddate() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("embargoenddate", r.getEmbargoenddate().getValue())); + .add(XmlSerializationUtils.asXmlElement("embargoenddate", r.getEmbargoenddate().getValue())); } if (r.getSubject() != null) { metadata @@ -423,23 +436,20 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "lastmetadataupdate", d.getLastmetadataupdate().getValue())); + .asXmlElement("lastmetadataupdate", d.getLastmetadataupdate().getValue())); } if (d.getMetadataversionnumber() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "metadataversionnumber", d.getMetadataversionnumber().getValue())); + .asXmlElement("metadataversionnumber", d.getMetadataversionnumber().getValue())); } if (d.getSize() != null) { metadata.add(XmlSerializationUtils.asXmlElement("size", d.getSize().getValue())); } if (d.getStoragedate() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("storagedate", d.getStoragedate().getValue())); + .add(XmlSerializationUtils.asXmlElement("storagedate", d.getStoragedate().getValue())); } if (d.getVersion() != null) { metadata.add(XmlSerializationUtils.asXmlElement("version", d.getVersion().getValue())); @@ -509,98 +519,87 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "codeRepositoryUrl", s.getCodeRepositoryUrl().getValue())); + .asXmlElement("codeRepositoryUrl", s.getCodeRepositoryUrl().getValue())); } if (s.getProgrammingLanguage() != null) { metadata .add( XmlSerializationUtils - .mapQualifier( - "programmingLanguage", s.getProgrammingLanguage())); + .mapQualifier("programmingLanguage", s.getProgrammingLanguage())); } break; case datasource: final Datasource ds = (Datasource) entity; if (ds.getDatasourcetype() != null) { - mapDatasourceType(metadata, ds.getDatasourcetype()); + metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", ds.getDatasourcetype())); + } + if (ds.getDatasourcetypeui() != null) { + metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", ds.getDatasourcetypeui())); } if (ds.getOpenairecompatibility() != null) { metadata .add( XmlSerializationUtils - .mapQualifier( - "openairecompatibility", ds.getOpenairecompatibility())); + .mapQualifier("openairecompatibility", ds.getOpenairecompatibility())); } if (ds.getOfficialname() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("officialname", ds.getOfficialname().getValue())); + .add(XmlSerializationUtils.asXmlElement("officialname", ds.getOfficialname().getValue())); } if (ds.getEnglishname() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("englishname", ds.getEnglishname().getValue())); + .add(XmlSerializationUtils.asXmlElement("englishname", ds.getEnglishname().getValue())); } if (ds.getWebsiteurl() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("websiteurl", ds.getWebsiteurl().getValue())); + .add(XmlSerializationUtils.asXmlElement("websiteurl", ds.getWebsiteurl().getValue())); } if (ds.getLogourl() != null) { metadata.add(XmlSerializationUtils.asXmlElement("logourl", ds.getLogourl().getValue())); } if (ds.getContactemail() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("contactemail", ds.getContactemail().getValue())); + .add(XmlSerializationUtils.asXmlElement("contactemail", ds.getContactemail().getValue())); } if (ds.getNamespaceprefix() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "namespaceprefix", ds.getNamespaceprefix().getValue())); + .asXmlElement("namespaceprefix", ds.getNamespaceprefix().getValue())); } if (ds.getLatitude() != null) { metadata.add(XmlSerializationUtils.asXmlElement("latitude", ds.getLatitude().getValue())); } if (ds.getLongitude() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("longitude", ds.getLongitude().getValue())); + .add(XmlSerializationUtils.asXmlElement("longitude", ds.getLongitude().getValue())); } if (ds.getDateofvalidation() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "dateofvalidation", ds.getDateofvalidation().getValue())); + .asXmlElement("dateofvalidation", ds.getDateofvalidation().getValue())); } if (ds.getDescription() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("description", ds.getDescription().getValue())); + .add(XmlSerializationUtils.asXmlElement("description", ds.getDescription().getValue())); } if (ds.getOdnumberofitems() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "odnumberofitems", ds.getOdnumberofitems().getValue())); + .asXmlElement("odnumberofitems", ds.getOdnumberofitems().getValue())); } if (ds.getOdnumberofitemsdate() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "odnumberofitemsdate", ds.getOdnumberofitemsdate().getValue())); + .asXmlElement("odnumberofitemsdate", ds.getOdnumberofitemsdate().getValue())); } if (ds.getOdpolicies() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("odpolicies", ds.getOdpolicies().getValue())); + .add(XmlSerializationUtils.asXmlElement("odpolicies", ds.getOdpolicies().getValue())); } if (ds.getOdlanguages() != null) { metadata @@ -635,50 +634,43 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "releasestartdate", ds.getReleaseenddate().getValue())); + .asXmlElement("releasestartdate", ds.getReleaseenddate().getValue())); } if (ds.getReleaseenddate() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "releaseenddate", ds.getReleaseenddate().getValue())); + .asXmlElement("releaseenddate", ds.getReleaseenddate().getValue())); } if (ds.getMissionstatementurl() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "missionstatementurl", ds.getMissionstatementurl().getValue())); + .asXmlElement("missionstatementurl", ds.getMissionstatementurl().getValue())); } if (ds.getDataprovider() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "dataprovider", ds.getDataprovider().getValue().toString())); + .asXmlElement("dataprovider", ds.getDataprovider().getValue().toString())); } if (ds.getServiceprovider() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "serviceprovider", ds.getServiceprovider().getValue().toString())); + .asXmlElement("serviceprovider", ds.getServiceprovider().getValue().toString())); } if (ds.getDatabaseaccesstype() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "databaseaccesstype", ds.getDatabaseaccesstype().getValue())); + .asXmlElement("databaseaccesstype", ds.getDatabaseaccesstype().getValue())); } if (ds.getDatauploadtype() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "datauploadtype", ds.getDatauploadtype().getValue())); + .asXmlElement("datauploadtype", ds.getDatauploadtype().getValue())); } if (ds.getDatabaseaccessrestriction() != null) { metadata @@ -691,39 +683,33 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "datauploadrestriction", ds.getDatauploadrestriction().getValue())); + .asXmlElement("datauploadrestriction", ds.getDatauploadrestriction().getValue())); } if (ds.getVersioning() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "versioning", ds.getVersioning().getValue().toString())); + .asXmlElement("versioning", ds.getVersioning().getValue().toString())); } if (ds.getCitationguidelineurl() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "citationguidelineurl", ds.getCitationguidelineurl().getValue())); + .asXmlElement("citationguidelineurl", ds.getCitationguidelineurl().getValue())); } if (ds.getQualitymanagementkind() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "qualitymanagementkind", ds.getQualitymanagementkind().getValue())); + .asXmlElement("qualitymanagementkind", ds.getQualitymanagementkind().getValue())); } if (ds.getPidsystems() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems().getValue())); + .add(XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems().getValue())); } if (ds.getCertificates() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("certificates", ds.getCertificates().getValue())); + .add(XmlSerializationUtils.asXmlElement("certificates", ds.getCertificates().getValue())); } if (ds.getPolicies() != null) { metadata @@ -757,13 +743,11 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "legalshortname", o.getLegalshortname().getValue())); + .asXmlElement("legalshortname", o.getLegalshortname().getValue())); } if (o.getLegalname() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("legalname", o.getLegalname().getValue())); + .add(XmlSerializationUtils.asXmlElement("legalname", o.getLegalname().getValue())); } if (o.getAlternativeNames() != null) { metadata @@ -777,8 +761,7 @@ public class XmlRecordFactory implements Serializable { } if (o.getWebsiteurl() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl().getValue())); + .add(XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl().getValue())); } if (o.getLogourl() != null) { metadata.add(XmlSerializationUtils.asXmlElement("logourl", o.getLogourl().getValue())); @@ -786,32 +769,27 @@ public class XmlRecordFactory implements Serializable { if (o.getEclegalbody() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("eclegalbody", o.getEclegalbody().getValue())); + .add(XmlSerializationUtils.asXmlElement("eclegalbody", o.getEclegalbody().getValue())); } if (o.getEclegalperson() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("eclegalperson", o.getEclegalperson().getValue())); + .add(XmlSerializationUtils.asXmlElement("eclegalperson", o.getEclegalperson().getValue())); } if (o.getEcnonprofit() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("ecnonprofit", o.getEcnonprofit().getValue())); + .add(XmlSerializationUtils.asXmlElement("ecnonprofit", o.getEcnonprofit().getValue())); } if (o.getEcresearchorganization() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "ecresearchorganization", o.getEcresearchorganization().getValue())); + .asXmlElement("ecresearchorganization", o.getEcresearchorganization().getValue())); } if (o.getEchighereducation() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "echighereducation", o.getEchighereducation().getValue())); + .asXmlElement("echighereducation", o.getEchighereducation().getValue())); } if (o.getEcinternationalorganizationeurinterests() != null) { metadata @@ -830,20 +808,17 @@ public class XmlRecordFactory implements Serializable { } if (o.getEcenterprise() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("ecenterprise", o.getEcenterprise().getValue())); + .add(XmlSerializationUtils.asXmlElement("ecenterprise", o.getEcenterprise().getValue())); } if (o.getEcsmevalidated() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "ecsmevalidated", o.getEcsmevalidated().getValue())); + .asXmlElement("ecsmevalidated", o.getEcsmevalidated().getValue())); } if (o.getEcnutscode() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("ecnutscode", o.getEcnutscode().getValue())); + .add(XmlSerializationUtils.asXmlElement("ecnutscode", o.getEcnutscode().getValue())); } if (o.getCountry() != null) { metadata.add(XmlSerializationUtils.mapQualifier("country", o.getCountry())); @@ -855,8 +830,7 @@ public class XmlRecordFactory implements Serializable { if (p.getWebsiteurl() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("websiteurl", p.getWebsiteurl().getValue())); + .add(XmlSerializationUtils.asXmlElement("websiteurl", p.getWebsiteurl().getValue())); } if (p.getCode() != null) { metadata.add(XmlSerializationUtils.asXmlElement("code", p.getCode().getValue())); @@ -869,8 +843,7 @@ public class XmlRecordFactory implements Serializable { } if (p.getStartdate() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("startdate", p.getStartdate().getValue())); + .add(XmlSerializationUtils.asXmlElement("startdate", p.getStartdate().getValue())); } if (p.getEnddate() != null) { metadata.add(XmlSerializationUtils.asXmlElement("enddate", p.getEnddate().getValue())); @@ -879,8 +852,7 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "callidentifier", p.getCallidentifier().getValue())); + .asXmlElement("callidentifier", p.getCallidentifier().getValue())); } if (p.getKeywords() != null) { metadata.add(XmlSerializationUtils.asXmlElement("keywords", p.getKeywords().getValue())); @@ -890,8 +862,7 @@ public class XmlRecordFactory implements Serializable { } if (p.getEcarticle29_3() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("ecarticle29_3", p.getEcarticle29_3().getValue())); + .add(XmlSerializationUtils.asXmlElement("ecarticle29_3", p.getEcarticle29_3().getValue())); } if (p.getSubjects() != null) { metadata @@ -923,13 +894,11 @@ public class XmlRecordFactory implements Serializable { } if (p.getTotalcost() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("totalcost", p.getTotalcost().toString())); + .add(XmlSerializationUtils.asXmlElement("totalcost", p.getTotalcost().toString())); } if (p.getFundedamount() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("fundedamount", p.getFundedamount().toString())); + .add(XmlSerializationUtils.asXmlElement("fundedamount", p.getFundedamount().toString())); } if (p.getFundingtree() != null) { metadata @@ -950,28 +919,18 @@ public class XmlRecordFactory implements Serializable { return metadata; } - private String getAuthorPidType(String s) { + private String getAuthorPidType(final String s) { return XmlSerializationUtils .escapeXml(s) .replaceAll("\\W", "") .replaceAll("\\d", ""); } - private static boolean kvNotBlank(KeyValue kv) { + private static boolean kvNotBlank(final KeyValue kv) { return kv != null && StringUtils.isNotBlank(kv.getKey()) && StringUtils.isNotBlank(kv.getValue()); } - private void mapDatasourceType(List metadata, final Qualifier dsType) { - metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", dsType)); - - if (specialDatasourceTypes.contains(dsType.getClassid())) { - dsType.setClassid("other"); - dsType.setClassname("other"); - } - metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", dsType)); - } - - private List mapFields(RelatedEntityWrapper link, Set contexts) { + private List mapFields(final RelatedEntityWrapper link, final Set contexts) { final Relation rel = link.getRelation(); final RelatedEntity re = link.getTarget(); final String targetType = link.getTarget().getType(); @@ -987,16 +946,14 @@ public class XmlRecordFactory implements Serializable { } if (isNotBlank(re.getDateofacceptance())) { metadata - .add( - XmlSerializationUtils.asXmlElement("dateofacceptance", re.getDateofacceptance())); + .add(XmlSerializationUtils.asXmlElement("dateofacceptance", re.getDateofacceptance())); } if (isNotBlank(re.getPublisher())) { metadata.add(XmlSerializationUtils.asXmlElement("publisher", re.getPublisher())); } if (isNotBlank(re.getCodeRepositoryUrl())) { metadata - .add( - XmlSerializationUtils.asXmlElement("coderepositoryurl", re.getCodeRepositoryUrl())); + .add(XmlSerializationUtils.asXmlElement("coderepositoryurl", re.getCodeRepositoryUrl())); } if (re.getResulttype() != null && re.getResulttype().isBlank()) { metadata.add(XmlSerializationUtils.mapQualifier("resulttype", re.getResulttype())); @@ -1026,14 +983,16 @@ public class XmlRecordFactory implements Serializable { metadata.add(XmlSerializationUtils.asXmlElement("officialname", re.getOfficialname())); } if (re.getDatasourcetype() != null && !re.getDatasourcetype().isBlank()) { - mapDatasourceType(metadata, re.getDatasourcetype()); + metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", re.getDatasourcetype())); + } + if (re.getDatasourcetypeui() != null && !re.getDatasourcetypeui().isBlank()) { + metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", re.getDatasourcetypeui())); } if (re.getOpenairecompatibility() != null && !re.getOpenairecompatibility().isBlank()) { metadata .add( XmlSerializationUtils - .mapQualifier( - "openairecompatibility", re.getOpenairecompatibility())); + .mapQualifier("openairecompatibility", re.getOpenairecompatibility())); } break; case organization: @@ -1042,8 +1001,7 @@ public class XmlRecordFactory implements Serializable { } if (isNotBlank(re.getLegalshortname())) { metadata - .add( - XmlSerializationUtils.asXmlElement("legalshortname", re.getLegalshortname())); + .add(XmlSerializationUtils.asXmlElement("legalshortname", re.getLegalshortname())); } if (re.getCountry() != null && !re.getCountry().isBlank()) { metadata.add(XmlSerializationUtils.mapQualifier("country", re.getCountry())); @@ -1085,8 +1043,10 @@ public class XmlRecordFactory implements Serializable { return metadata; } - private String mapRelation(Set contexts, TemplateFactory templateFactory, EntityType type, - RelatedEntityWrapper link) { + private String mapRelation(final Set contexts, + final TemplateFactory templateFactory, + final EntityType type, + final RelatedEntityWrapper link) { final Relation rel = link.getRelation(); final String targetType = link.getTarget().getType(); final String scheme = ModelSupport.getScheme(type.toString(), targetType); @@ -1096,8 +1056,9 @@ public class XmlRecordFactory implements Serializable { String.format("missing scheme for: <%s - %s>", type, targetType)); } final HashSet fields = Sets.newHashSet(mapFields(link, contexts)); - if (rel.getValidated() == null) + if (rel.getValidated() == null) { rel.setValidated(false); + } return templateFactory .getRel( targetType, rel.getTarget(), fields, rel.getRelClass(), scheme, rel.getDataInfo(), rel.getValidated(), @@ -1105,12 +1066,14 @@ public class XmlRecordFactory implements Serializable { } private List listChildren( - final OafEntity entity, JoinedEntity je, TemplateFactory templateFactory) { + final OafEntity entity, + final JoinedEntity je, + final TemplateFactory templateFactory) { final EntityType entityType = EntityType.fromClass(je.getEntity().getClass()); final List links = je.getLinks(); - List children = links + final List children = links .stream() .filter(link -> isDuplicate(link)) .map(link -> { @@ -1131,13 +1094,11 @@ public class XmlRecordFactory implements Serializable { if (instance.getAccessright() != null && !instance.getAccessright().isBlank()) { fields - .add( - XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright())); + .add(XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright())); } if (instance.getCollectedfrom() != null && kvNotBlank(instance.getCollectedfrom())) { fields - .add( - XmlSerializationUtils.mapKeyValue("collectedfrom", instance.getCollectedfrom())); + .add(XmlSerializationUtils.mapKeyValue("collectedfrom", instance.getCollectedfrom())); } if (instance.getHostedby() != null && kvNotBlank(instance.getHostedby())) { fields.add(XmlSerializationUtils.mapKeyValue("hostedby", instance.getHostedby())); @@ -1147,20 +1108,17 @@ public class XmlRecordFactory implements Serializable { fields .add( XmlSerializationUtils - .asXmlElement( - "dateofacceptance", instance.getDateofacceptance().getValue())); + .asXmlElement("dateofacceptance", instance.getDateofacceptance().getValue())); } if (instance.getInstancetype() != null && !instance.getInstancetype().isBlank()) { fields - .add( - XmlSerializationUtils.mapQualifier("instancetype", instance.getInstancetype())); + .add(XmlSerializationUtils.mapQualifier("instancetype", instance.getInstancetype())); } if (isNotBlank(instance.getDistributionlocation())) { fields .add( XmlSerializationUtils - .asXmlElement( - "distributionlocation", instance.getDistributionlocation())); + .asXmlElement("distributionlocation", instance.getDistributionlocation())); } if (instance.getPid() != null) { fields @@ -1185,8 +1143,7 @@ public class XmlRecordFactory implements Serializable { if (instance.getRefereed() != null && !instance.getRefereed().isBlank()) { fields - .add( - XmlSerializationUtils.mapQualifier("refereed", instance.getRefereed())); + .add(XmlSerializationUtils.mapQualifier("refereed", instance.getRefereed())); } if (instance.getProcessingchargeamount() != null && isNotBlank(instance.getProcessingchargeamount().getValue())) { @@ -1208,8 +1165,7 @@ public class XmlRecordFactory implements Serializable { children .add( templateFactory - .getInstance( - instance.getHostedby().getKey(), fields, instance.getUrl())); + .getInstance(instance.getHostedby().getKey(), fields, instance.getUrl())); } } final List ext = ((Result) entity).getExternalReference(); @@ -1254,11 +1210,11 @@ public class XmlRecordFactory implements Serializable { return children; } - private boolean isDuplicate(RelatedEntityWrapper link) { + private boolean isDuplicate(final RelatedEntityWrapper link) { return ModelConstants.DEDUP.equalsIgnoreCase(link.getRelation().getSubRelType()); } - private List listExtraInfo(OafEntity entity) { + private List listExtraInfo(final OafEntity entity) { final List extraInfo = entity.getExtraInfo(); return extraInfo != null ? extraInfo @@ -1271,7 +1227,7 @@ public class XmlRecordFactory implements Serializable { private List buildContexts(final String type, final Set contexts) { final List res = Lists.newArrayList(); - if ((contextMapper != null) + if (contextMapper != null && !contextMapper.isEmpty() && MainEntityType.result.toString().equals(type)) { @@ -1302,8 +1258,7 @@ public class XmlRecordFactory implements Serializable { if (def.getName().equals("category")) { final String rootId = substringBefore(def.getId(), "::"); document = addContextDef( - document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), - def); + document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), def); } if (def.getName().equals("concept")) { @@ -1327,17 +1282,17 @@ public class XmlRecordFactory implements Serializable { private Transformer getTransformer() { try { - Transformer transformer = TransformerFactory.newInstance().newTransformer(); + final Transformer transformer = TransformerFactory.newInstance().newTransformer(); transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); return transformer; - } catch (TransformerConfigurationException e) { + } catch (final TransformerConfigurationException e) { throw new IllegalStateException("unable to create javax.xml.transform.Transformer", e); } } private XMLTag addContextDef(final XMLTag tag, final ContextDef def) { tag.addTag(def.getName()).addAttribute("id", def.getId()).addAttribute("label", def.getLabel()); - if ((def.getType() != null) && !def.getType().isEmpty()) { + if (def.getType() != null && !def.getType().isEmpty()) { tag.addAttribute("type", def.getType()); } return tag; @@ -1374,16 +1329,14 @@ public class XmlRecordFactory implements Serializable { if (level0 != null) { final String level0Id = Joiner.on("::").join(funderShortName, level0.valueOf("./name")); contextMapper - .put( - level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", "")); + .put(level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", "")); final Node level1 = fundingPath.selectSingleNode("//funding_level_1"); if (level1 == null) { contexts.add(level0Id); } else { final String level1Id = Joiner.on("::").join(level0Id, level1.valueOf("./name")); contextMapper - .put( - level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", "")); + .put(level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", "")); final Node level2 = fundingPath.selectSingleNode("//funding_level_2"); if (level2 == null) { contexts.add(level1Id); @@ -1391,8 +1344,7 @@ public class XmlRecordFactory implements Serializable { final String level2Id = Joiner.on("::").join(level1Id, level2.valueOf("./name")); contextMapper .put( - level2Id, - new ContextDef(level2Id, level2.valueOf("./description"), "concept", "")); + level2Id, new ContextDef(level2Id, level2.valueOf("./description"), "concept", "")); contexts.add(level2Id); } } @@ -1413,8 +1365,7 @@ public class XmlRecordFactory implements Serializable { funding += getFunderElement(ftree); for (final Object o : Lists - .reverse( - ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) { + .reverse(ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) { final Element e = (Element) o; final String _id = e.valueOf("./id"); funding += "<" diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json index 32720514e1..eda6154d7e 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json @@ -16,11 +16,5 @@ "paramLongName": "isLookupUrl", "paramDescription": "URL of the isLookUp Service", "paramRequired": true - }, - { - "paramName": "odt", - "paramLongName": "otherDsTypeId", - "paramDescription": "list of datasource types to populate field datasourcetypeui", - "paramRequired": true } ] diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml index 9280678c14..9f41805e21 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml @@ -25,10 +25,6 @@ targetMaxRelations maximum number of relations allowed for a each entity grouping by target - - otherDsTypeId - mapping used to populate datasourceTypeUi field - format metadata format name (DMF|TMF) @@ -582,7 +578,6 @@ --inputPath${workingDir}/join_entities --outputPath${workingDir}/xml --isLookupUrl${isLookupUrl} - --otherDsTypeId${otherDsTypeId} diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index e1ed4ffe48..cd07cfcb1d 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.provision; import static org.junit.jupiter.api.Assertions.assertNotNull; import java.io.IOException; -import java.util.List; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; @@ -16,11 +15,9 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; -import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory; @@ -49,7 +46,7 @@ public class IndexRecordTransformerTest { @Test public void testPreBuiltRecordTransformation() throws IOException, TransformerException { - String record = IOUtils.toString(getClass().getResourceAsStream("record.xml")); + final String record = IOUtils.toString(getClass().getResourceAsStream("record.xml")); testRecordTransformation(record); } @@ -57,14 +54,14 @@ public class IndexRecordTransformerTest { @Test public void testPublicationRecordTransformation() throws IOException, TransformerException { - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - XmlRecordFactoryTest.otherDsTypeId); + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); - Publication p = load("publication.json", Publication.class); - Project pj = load("project.json", Project.class); - Relation rel = load("relToValidatedProject.json", Relation.class); + final Publication p = load("publication.json", Publication.class); + final Project pj = load("project.json", Project.class); + final Relation rel = load("relToValidatedProject.json", Relation.class); - JoinedEntity je = new JoinedEntity<>(p); + final JoinedEntity je = new JoinedEntity<>(p); je .setLinks( Lists @@ -72,24 +69,25 @@ public class IndexRecordTransformerTest { new RelatedEntityWrapper(rel, CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class)))); - String record = xmlRecordFactory.build(je); + final String record = xmlRecordFactory.build(je); assertNotNull(record); testRecordTransformation(record); } - private void testRecordTransformation(String record) throws IOException, TransformerException { - String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); - String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); + private void testRecordTransformation(final String record) throws IOException, TransformerException { + final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); + final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); - String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt); + final String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt); - Transformer tr = SaxonTransformerFactory.newInstance(transformer); + final Transformer tr = SaxonTransformerFactory.newInstance(transformer); - String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record); + final String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record); - SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID).parseDocument(indexRecordXML); + final SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID) + .parseDocument(indexRecordXML); final String xmlDoc = ClientUtils.toXML(solrDoc); @@ -97,7 +95,7 @@ public class IndexRecordTransformerTest { System.out.println(xmlDoc); } - private T load(String fileName, Class clazz) throws IOException { + private T load(final String fileName, final Class clazz) throws IOException { return XmlRecordFactoryTest.OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream(fileName)), clazz); } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 6631cb4da6..2b5e08e921 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -1,7 +1,8 @@ package eu.dnetlib.dhp.oa.provision; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; import java.io.IOException; import java.io.StringReader; @@ -29,27 +30,25 @@ import eu.dnetlib.dhp.schema.oaf.Relation; public class XmlRecordFactoryTest { - public static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource"; - public static ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @Test public void testXMLRecordFactory() throws IOException, DocumentException { - ContextMapper contextMapper = new ContextMapper(); + final ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - otherDsTypeId); + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); - Publication p = OBJECT_MAPPER + final Publication p = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class); - String xml = xmlRecordFactory.build(new JoinedEntity<>(p)); + final String xml = xmlRecordFactory.build(new JoinedEntity<>(p)); assertNotNull(xml); - Document doc = new SAXReader().read(new StringReader(xml)); + final Document doc = new SAXReader().read(new StringReader(xml)); assertNotNull(doc); @@ -72,30 +71,29 @@ public class XmlRecordFactoryTest { @Test public void testXMLRecordFactoryWithValidatedProject() throws IOException, DocumentException { - ContextMapper contextMapper = new ContextMapper(); + final ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - otherDsTypeId); + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); - Publication p = OBJECT_MAPPER + final Publication p = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class); - Project pj = OBJECT_MAPPER + final Project pj = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("project.json")), Project.class); - Relation rel = OBJECT_MAPPER - .readValue( - (IOUtils.toString(getClass().getResourceAsStream("relToValidatedProject.json"))), Relation.class); - RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class); - List links = Lists.newArrayList(); - RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject); + final Relation rel = OBJECT_MAPPER + .readValue(IOUtils.toString(getClass().getResourceAsStream("relToValidatedProject.json")), Relation.class); + final RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class); + final List links = Lists.newArrayList(); + final RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject); links.add(rew); - JoinedEntity je = new JoinedEntity<>(p); + final JoinedEntity je = new JoinedEntity<>(p); je.setLinks(links); - String xml = xmlRecordFactory.build(je); + final String xml = xmlRecordFactory.build(je); assertNotNull(xml); - Document doc = new SAXReader().read(new StringReader(xml)); + final Document doc = new SAXReader().read(new StringReader(xml)); assertNotNull(doc); System.out.println(doc.asXML()); Assertions.assertEquals("2021-01-01", doc.valueOf("//validated/@date")); @@ -104,29 +102,29 @@ public class XmlRecordFactoryTest { @Test public void testXMLRecordFactoryWithNonValidatedProject() throws IOException, DocumentException { - ContextMapper contextMapper = new ContextMapper(); + final ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - otherDsTypeId); + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); - Publication p = OBJECT_MAPPER + final Publication p = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class); - Project pj = OBJECT_MAPPER + final Project pj = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("project.json")), Project.class); - Relation rel = OBJECT_MAPPER - .readValue((IOUtils.toString(getClass().getResourceAsStream("relToProject.json"))), Relation.class); - RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class); - List links = Lists.newArrayList(); - RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject); + final Relation rel = OBJECT_MAPPER + .readValue(IOUtils.toString(getClass().getResourceAsStream("relToProject.json")), Relation.class); + final RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class); + final List links = Lists.newArrayList(); + final RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject); links.add(rew); - JoinedEntity je = new JoinedEntity<>(p); + final JoinedEntity je = new JoinedEntity<>(p); je.setLinks(links); - String xml = xmlRecordFactory.build(je); + final String xml = xmlRecordFactory.build(je); assertNotNull(xml); - Document doc = new SAXReader().read(new StringReader(xml)); + final Document doc = new SAXReader().read(new StringReader(xml)); assertNotNull(doc); System.out.println(doc.asXML()); assertEquals("", doc.valueOf("//rel/validated")); diff --git a/pom.xml b/pom.xml index 6e4526e41c..1a3523f2bf 100644 --- a/pom.xml +++ b/pom.xml @@ -736,7 +736,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.7.14] + [2.7.15-SNAPSHOT] [4.0.3] [6.0.5] [3.1.6] From 1fb572a33ae1fa374d3237aeefb977de8de6685e Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 28 Jul 2021 10:52:24 +0200 Subject: [PATCH 2/6] added eosc fields --- .../dnetlib/dhp/oa/graph/sql/queryDatasources.sql | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql index f0a4161ab9..f586771014 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql @@ -84,13 +84,18 @@ SELECT dc.id AS collectedfromid, dc.officialname AS collectedfromname, d.typology||'@@@dnet:datasource_typologies' AS datasourcetype, + d.typology||'@@@dnet:datasource_typologies_ui' AS datasourcetypeui, 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction, d.issn AS issnPrinted, d.eissn AS issnOnline, - d.lissn AS issnLinking + d.lissn AS issnLinking, + de.jurisdiction AS jurisdiction, + de.thematic AS thematic, + de.knowledge_graph AS knowledgegraph, + de.content_policies AS contentpolicies FROM dsm_datasources d - +LEFT OUTER JOIN dsm_datasources_eosc de on (d.id = de.id) LEFT OUTER JOIN dsm_datasources dc on (d.collectedfrom = dc.id) LEFT OUTER JOIN dsm_api a ON (d.id = a.datasource) LEFT OUTER JOIN dsm_datasourcepids di ON (d.id = di.datasource) @@ -126,4 +131,8 @@ GROUP BY dc.officialname, d.issn, d.eissn, - d.lissn + d.lissn, + de.jurisdiction, + de.thematic, + de.knowledge_graph, + de.content_policies From c72c960ffb61bcd7db272d387d34fa4d044e25c3 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 28 Jul 2021 11:03:15 +0200 Subject: [PATCH 3/6] added eosc fields --- .../eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql index f586771014..98092e8825 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql @@ -89,10 +89,10 @@ SELECT d.issn AS issnPrinted, d.eissn AS issnOnline, d.lissn AS issnLinking, - de.jurisdiction AS jurisdiction, + de.jurisdiction||'@@@eosc:jurisdictions' AS jurisdiction, de.thematic AS thematic, de.knowledge_graph AS knowledgegraph, - de.content_policies AS contentpolicies + array(select unnest(de.content_policies)||'@@@eosc:contentpolicies') AS contentpolicies FROM dsm_datasources d LEFT OUTER JOIN dsm_datasources_eosc de on (d.id = de.id) From e6f1773d637cc02ca3424d1c76658156cfb78399 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 28 Jul 2021 11:17:11 +0200 Subject: [PATCH 4/6] mapping of new eosc fields --- .../raw/MigrateDbEntitiesApplication.java | 97 ++++++++++++++----- 1 file changed, 74 insertions(+), 23 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index a9d3e05fe2..d78732f9b9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -1,8 +1,41 @@ package eu.dnetlib.dhp.oa.graph.raw; -import static eu.dnetlib.dhp.schema.common.ModelConstants.*; -import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASOURCE_ORGANIZATION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS; +import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTICIPANT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_MERGED_IN; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PARTICIPANT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PROVIDED_BY; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO; +import static eu.dnetlib.dhp.schema.common.ModelConstants.MERGES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.ORG_ORG_RELTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PARTICIPATION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PROJECT_ORGANIZATION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVIDES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVISION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DATASET; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RELATIONSHIP; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.asString; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.createOpenaireId; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.journal; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listFields; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty; import java.io.Closeable; import java.io.IOException; @@ -53,8 +86,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i private static final Logger log = LoggerFactory.getLogger(MigrateDbEntitiesApplication.class); private static final DataInfo DATA_INFO_CLAIM = dataInfo( - false, null, false, false, - qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), "0.9"); + false, null, false, false, qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), + "0.9"); private static final List COLLECTED_FROM_CLAIM = listKeyValues( createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE"); @@ -140,8 +173,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i case openorgs_dedup: // generates organization entities and relations for openorgs dedup log.info("Processing Openorgs..."); smdbe - .execute( - "queryOpenOrgsForOrgsDedup.sql", smdbe::processOrganization, verifyNamespacePrefix); + .execute("queryOpenOrgsForOrgsDedup.sql", smdbe::processOrganization, verifyNamespacePrefix); log.info("Processing Openorgs Sim Rels..."); smdbe.execute("queryOpenOrgsSimilarityForOrgsDedup.sql", smdbe::processOrgOrgSimRels); @@ -150,8 +182,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i case openorgs: // generates organization entities and relations for provision log.info("Processing Openorgs For Provision..."); smdbe - .execute( - "queryOpenOrgsForProvision.sql", smdbe::processOrganization, verifyNamespacePrefix); + .execute("queryOpenOrgsForProvision.sql", smdbe::processOrganization, verifyNamespacePrefix); log.info("Processing Openorgs Merge Rels..."); smdbe.execute("queryOpenOrgsSimilarityForProvision.sql", smdbe::processOrgOrgMergeRels); @@ -229,6 +260,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB ds.setOaiprovenance(null); // Values not present in the DB ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype"))); + ds.setDatasourcetypeui(prepareQualifierSplitting(rs.getString("datasourcetypeui"))); ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility"))); ds.setOfficialname(field(rs.getString("officialname"), info)); ds.setEnglishname(field(rs.getString("englishname"), info)); @@ -270,6 +302,11 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i ds.setDataInfo(info); ds.setLastupdatetimestamp(lastUpdateTimestamp); + ds.setJurisdiction(prepareQualifierSplitting(rs.getString("jurisdiction"))); + ds.setThematic(rs.getBoolean("thematic")); + ds.setKnowledgegraph(rs.getBoolean("knowledgegraph")); + ds.setContentpolicies(prepareListOfQualifiers(rs.getArray("contentpolicies"))); + return Arrays.asList(ds); } catch (final Exception e) { throw new RuntimeException(e); @@ -495,8 +532,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i throw new IllegalStateException( String .format( - "invalid claim, sourceId: %s, targetId: %s, semantics: %s", - sourceId, targetId, semantics)); + "invalid claim, sourceId: %s, targetId: %s, semantics: %s", sourceId, targetId, + semantics)); } r1 = setRelationSemantic(r1, RESULT_PROJECT, OUTCOME, PRODUCES); r2 = setRelationSemantic(r2, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY); @@ -516,8 +553,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i } } - private Relation prepareRelation(String sourceId, String targetId, String validationDate) { - Relation r = new Relation(); + private Relation prepareRelation(final String sourceId, final String targetId, final String validationDate) { + final Relation r = new Relation(); if (StringUtils.isNotBlank(validationDate)) { r.setValidated(true); r.setValidationDate(validationDate); @@ -530,7 +567,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i return r; } - private Relation setRelationSemantic(Relation r, String relType, String subRelType, String relClass) { + private Relation setRelationSemantic(final Relation r, final String relType, final String subRelType, + final String relClass) { r.setRelType(relType); r.setSubRelType(subRelType); r.setRelClass(relClass); @@ -603,6 +641,19 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i return res; } + private List prepareListOfQualifiers(final Array array) throws SQLException { + final List res = new ArrayList<>(); + if (array != null) { + for (final String s : (String[]) array.getArray()) { + final Qualifier q = prepareQualifierSplitting(s); + if (q != null) { + res.add(q); + } + } + } + return res; + } + public List processOrgOrgMergeRels(final ResultSet rs) { try { final DataInfo info = prepareDataInfo(rs); // TODO @@ -660,16 +711,16 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i r1.setLastupdatetimestamp(lastUpdateTimestamp); // removed because there's no difference between two sides //TODO -// final Relation r2 = new Relation(); -// r2.setRelType(ORG_ORG_RELTYPE); -// r2.setSubRelType(ORG_ORG_SUBRELTYPE); -// r2.setRelClass(relClass); -// r2.setSource(orgId2); -// r2.setTarget(orgId1); -// r2.setCollectedfrom(collectedFrom); -// r2.setDataInfo(info); -// r2.setLastupdatetimestamp(lastUpdateTimestamp); -// return Arrays.asList(r1, r2); + // final Relation r2 = new Relation(); + // r2.setRelType(ORG_ORG_RELTYPE); + // r2.setSubRelType(ORG_ORG_SUBRELTYPE); + // r2.setRelClass(relClass); + // r2.setSource(orgId2); + // r2.setTarget(orgId1); + // r2.setCollectedfrom(collectedFrom); + // r2.setDataInfo(info); + // r2.setLastupdatetimestamp(lastUpdateTimestamp); + // return Arrays.asList(r1, r2); return Arrays.asList(r1); } catch (final Exception e) { From 9f1c7b8e177f479289fda28f933de5bab04fb7ce Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 28 Jul 2021 11:32:34 +0200 Subject: [PATCH 5/6] tests --- .../raw/MigrateDbEntitiesApplicationTest.java | 192 ++++++++++-------- .../raw/datasources_resultset_entry.json | 27 +++ 2 files changed, 132 insertions(+), 87 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java index d529d2eb28..2078735f37 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java @@ -28,7 +28,12 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; @ExtendWith(MockitoExtension.class) @@ -46,11 +51,8 @@ public class MigrateDbEntitiesApplicationTest { public void setUp() { lenient() .when(vocs.getTermAsQualifier(anyString(), anyString())) - .thenAnswer( - invocation -> OafMapperUtils - .qualifier( - invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0), - invocation.getArgument(0))); + .thenAnswer(invocation -> OafMapperUtils + .qualifier(invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0), invocation.getArgument(0))); lenient().when(vocs.termExists(anyString(), anyString())).thenReturn(true); @@ -78,6 +80,23 @@ public class MigrateDbEntitiesApplicationTest { assertEquals(getValueAsString("issnPrinted", fields), ds.getJournal().getIssnPrinted()); assertEquals(getValueAsString("issnOnline", fields), ds.getJournal().getIssnOnline()); assertEquals(getValueAsString("issnLinking", fields), ds.getJournal().getIssnLinking()); + + assertEquals("pubsrepository::journal", ds.getDatasourcetype().getClassid()); + assertEquals("dnet:datasource_typologies", ds.getDatasourcetype().getSchemeid()); + + assertEquals("pubsrepository::journal", ds.getDatasourcetypeui().getClassid()); + assertEquals("dnet:datasource_typologies_ui", ds.getDatasourcetypeui().getSchemeid()); + + assertEquals("National", ds.getJurisdiction().getClassid()); + assertEquals("eosc:jurisdictions", ds.getJurisdiction().getSchemeid()); + + assertTrue(ds.getThematic()); + assertTrue(ds.getKnowledgegraph()); + + assertEquals(1, ds.getContentpolicies().size()); + assertEquals("Journal article", ds.getContentpolicies().get(0).getClassid()); + assertEquals("eosc:contentpolicies", ds.getContentpolicies().get(0).getSchemeid()); + } @Test @@ -119,7 +138,7 @@ public class MigrateDbEntitiesApplicationTest { assertEquals(getValueAsString("country", fields).split("@@@")[1], o.getCountry().getSchemeid()); assertEquals(getValueAsString("country", fields).split("@@@")[1], o.getCountry().getSchemename()); assertEquals(getValueAsString("collectedfromname", fields), o.getCollectedfrom().get(0).getValue()); - List alternativenames = getValueAsList("alternativenames", fields); + final List alternativenames = getValueAsList("alternativenames", fields); assertEquals(2, alternativenames.size()); assertTrue(alternativenames.contains("Pippo")); assertTrue(alternativenames.contains("Foo")); @@ -216,73 +235,72 @@ public class MigrateDbEntitiesApplicationTest { private List prepareMocks(final String jsonFile) throws IOException, SQLException { final String json = IOUtils.toString(getClass().getResourceAsStream(jsonFile)); final ObjectMapper mapper = new ObjectMapper(); - final List list = mapper.readValue(json, new TypeReference>() { - }); + final List list = mapper.readValue(json, new TypeReference>() {}); for (final TypedField tf : list) { if (tf.getValue() == null) { switch (tf.getType()) { - case "not_used": - break; - case "boolean": - Mockito.when(rs.getBoolean(tf.getField())).thenReturn(false); - break; - case "date": - Mockito.when(rs.getDate(tf.getField())).thenReturn(null); - break; - case "int": - Mockito.when(rs.getInt(tf.getField())).thenReturn(0); - break; - case "double": - Mockito.when(rs.getDouble(tf.getField())).thenReturn(0.0); - break; - case "array": - Mockito.when(rs.getArray(tf.getField())).thenReturn(null); - break; - case "string": - default: - Mockito.when(rs.getString(tf.getField())).thenReturn(null); - break; + case "not_used": + break; + case "boolean": + Mockito.when(rs.getBoolean(tf.getField())).thenReturn(false); + break; + case "date": + Mockito.when(rs.getDate(tf.getField())).thenReturn(null); + break; + case "int": + Mockito.when(rs.getInt(tf.getField())).thenReturn(0); + break; + case "double": + Mockito.when(rs.getDouble(tf.getField())).thenReturn(0.0); + break; + case "array": + Mockito.when(rs.getArray(tf.getField())).thenReturn(null); + break; + case "string": + default: + Mockito.when(rs.getString(tf.getField())).thenReturn(null); + break; } } else { switch (tf.getType()) { - case "not_used": - break; - case "boolean": - Mockito - .when(rs.getBoolean(tf.getField())) - .thenReturn(Boolean.parseBoolean(tf.getValue().toString())); - break; - case "date": - Mockito - .when(rs.getDate(tf.getField())) - .thenReturn(Date.valueOf(tf.getValue().toString())); - break; - case "int": - Mockito - .when(rs.getInt(tf.getField())) - .thenReturn(new Integer(tf.getValue().toString())); - break; - case "double": - Mockito - .when(rs.getDouble(tf.getField())) - .thenReturn(new Double(tf.getValue().toString())); - break; - case "array": - final Array arr = Mockito.mock(Array.class); - final String[] values = ((List) tf.getValue()) - .stream() - .filter(Objects::nonNull) - .map(o -> o.toString()) - .toArray(String[]::new); + case "not_used": + break; + case "boolean": + Mockito + .when(rs.getBoolean(tf.getField())) + .thenReturn(Boolean.parseBoolean(tf.getValue().toString())); + break; + case "date": + Mockito + .when(rs.getDate(tf.getField())) + .thenReturn(Date.valueOf(tf.getValue().toString())); + break; + case "int": + Mockito + .when(rs.getInt(tf.getField())) + .thenReturn(new Integer(tf.getValue().toString())); + break; + case "double": + Mockito + .when(rs.getDouble(tf.getField())) + .thenReturn(new Double(tf.getValue().toString())); + break; + case "array": + final Array arr = Mockito.mock(Array.class); + final String[] values = ((List) tf.getValue()) + .stream() + .filter(Objects::nonNull) + .map(o -> o.toString()) + .toArray(String[]::new); - Mockito.when(arr.getArray()).thenReturn(values); - Mockito.when(rs.getArray(tf.getField())).thenReturn(arr); - break; - case "string": - default: - Mockito.when(rs.getString(tf.getField())).thenReturn(tf.getValue().toString()); - break; + Mockito.when(arr.getArray()).thenReturn(values); + Mockito.when(rs.getArray(tf.getField())).thenReturn(arr); + break; + case "string": + default: + Mockito.when(rs.getString(tf.getField())).thenReturn(tf.getValue().toString()); + break; } } } @@ -294,27 +312,27 @@ public class MigrateDbEntitiesApplicationTest { for (final TypedField tf : list) { switch (tf.getType()) { - case "not_used": - break; - case "boolean": - Mockito.verify(rs, Mockito.atLeastOnce()).getBoolean(tf.getField()); - break; - case "date": - Mockito.verify(rs, Mockito.atLeastOnce()).getDate(tf.getField()); - break; - case "int": - Mockito.verify(rs, Mockito.atLeastOnce()).getInt(tf.getField()); - break; - case "double": - Mockito.verify(rs, Mockito.atLeastOnce()).getDouble(tf.getField()); - break; - case "array": - Mockito.verify(rs, Mockito.atLeastOnce()).getArray(tf.getField()); - break; - case "string": - default: - Mockito.verify(rs, Mockito.atLeastOnce()).getString(tf.getField()); - break; + case "not_used": + break; + case "boolean": + Mockito.verify(rs, Mockito.atLeastOnce()).getBoolean(tf.getField()); + break; + case "date": + Mockito.verify(rs, Mockito.atLeastOnce()).getDate(tf.getField()); + break; + case "int": + Mockito.verify(rs, Mockito.atLeastOnce()).getInt(tf.getField()); + break; + case "double": + Mockito.verify(rs, Mockito.atLeastOnce()).getDouble(tf.getField()); + break; + case "array": + Mockito.verify(rs, Mockito.atLeastOnce()).getArray(tf.getField()); + break; + case "string": + default: + Mockito.verify(rs, Mockito.atLeastOnce()).getString(tf.getField()); + break; } } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json index befa722e1d..42b140306e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json @@ -222,6 +222,11 @@ "type": "string", "value": "pubsrepository::journal@@@dnet:datasource_typologies" }, + { + "field": "datasourcetypeui", + "type": "string", + "value": "pubsrepository::journal@@@dnet:datasource_typologies_ui" + }, { "field": "provenanceaction", "type": "not_used", @@ -241,5 +246,27 @@ "field": "issnLinking", "type": "string", "value": "2579-5447" + }, + { + "field": "jurisdiction", + "type": "string", + "value": "National@@@eosc:jurisdictions" + }, + { + "field": "thematic", + "type": "boolean", + "value": true + }, + { + "field": "knowledgegraph", + "type": "boolean", + "value": true + }, + { + "field": "contentpolicies", + "type": "array", + "value": [ + "Journal article@@@eosc:contentpolicies" + ] } ] From 3e2a2d6e71c4cb9820c858ed114cea9b9ac25021 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 28 Jul 2021 11:56:55 +0200 Subject: [PATCH 6/6] added new fields in xml --- .../CreateRelatedEntitiesJob_phase1.java | 92 ++++++++++--------- .../oa/provision/utils/XmlRecordFactory.java | 24 +++++ 2 files changed, 73 insertions(+), 43 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java index 7534ce4bdd..b2abbc1562 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java @@ -27,14 +27,20 @@ import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport; import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits; import scala.Tuple2; /** - * CreateRelatedEntitiesJob: (phase 1): prepare tuples [relation - target entity] (R - T): for each entity type - * E_i map E_i as RelatedEntity T_i to simplify the model and extracting only the necessary information join - * (R.target = T_i.id) save the tuples (R_i, T_i) + * CreateRelatedEntitiesJob: (phase 1): prepare tuples [relation - target entity] (R - T): for each entity type E_i map E_i as RelatedEntity + * T_i to simplify the model and extracting only the necessary information join (R.target = T_i.id) save the tuples (R_i, T_i) */ public class CreateRelatedEntitiesJob_phase1 { @@ -42,68 +48,65 @@ public class CreateRelatedEntitiesJob_phase1 { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(String[] args) throws Exception { + public static void main(final String[] args) throws Exception { - String jsonConfiguration = IOUtils + final String jsonConfiguration = IOUtils .toString( PrepareRelationsJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json")); + .getResourceAsStream("/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); - Boolean isSparkSessionManaged = Optional + final Boolean isSparkSessionManaged = Optional .ofNullable(parser.get("isSparkSessionManaged")) .map(Boolean::valueOf) .orElse(Boolean.TRUE); log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - String inputRelationsPath = parser.get("inputRelationsPath"); + final String inputRelationsPath = parser.get("inputRelationsPath"); log.info("inputRelationsPath: {}", inputRelationsPath); - String inputEntityPath = parser.get("inputEntityPath"); + final String inputEntityPath = parser.get("inputEntityPath"); log.info("inputEntityPath: {}", inputEntityPath); - String outputPath = parser.get("outputPath"); + final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - String graphTableClassName = parser.get("graphTableClassName"); + final String graphTableClassName = parser.get("graphTableClassName"); log.info("graphTableClassName: {}", graphTableClassName); - Class entityClazz = (Class) Class.forName(graphTableClassName); + final Class entityClazz = (Class) Class.forName(graphTableClassName); - SparkConf conf = new SparkConf(); + final SparkConf conf = new SparkConf(); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.registerKryoClasses(ProvisionModelSupport.getModelClasses()); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - removeOutputDir(spark, outputPath); - joinRelationEntity(spark, inputRelationsPath, inputEntityPath, entityClazz, outputPath); - }); + runWithSparkSession(conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, outputPath); + joinRelationEntity(spark, inputRelationsPath, inputEntityPath, entityClazz, outputPath); + }); } private static void joinRelationEntity( - SparkSession spark, - String inputRelationsPath, - String inputEntityPath, - Class clazz, - String outputPath) { + final SparkSession spark, + final String inputRelationsPath, + final String inputEntityPath, + final Class clazz, + final String outputPath) { - Dataset> relsByTarget = readPathRelation(spark, inputRelationsPath) + final Dataset> relsByTarget = readPathRelation(spark, inputRelationsPath) .map( (MapFunction>) r -> new Tuple2<>(r.getTarget(), r), Encoders.tuple(Encoders.STRING(), Encoders.kryo(Relation.class))) .cache(); - Dataset> entities = readPathEntity(spark, inputEntityPath, clazz) + final Dataset> entities = readPathEntity(spark, inputEntityPath, clazz) .filter("dataInfo.invisible == false") .map( (MapFunction>) e -> new Tuple2<>(e.getId(), asRelatedEntity(e, clazz)), - Encoders.tuple(Encoders.STRING(), Encoders.kryo(RelatedEntity.class))) + Encoders + .tuple(Encoders.STRING(), Encoders.kryo(RelatedEntity.class))) .cache(); relsByTarget @@ -118,7 +121,9 @@ public class CreateRelatedEntitiesJob_phase1 { } private static Dataset readPathEntity( - SparkSession spark, String inputEntityPath, Class entityClazz) { + final SparkSession spark, + final String inputEntityPath, + final Class entityClazz) { log.info("Reading Graph table from: {}", inputEntityPath); return spark @@ -129,7 +134,7 @@ public class CreateRelatedEntitiesJob_phase1 { Encoders.bean(entityClazz)); } - public static RelatedEntity asRelatedEntity(E entity, Class clazz) { + public static RelatedEntity asRelatedEntity(final E entity, final Class clazz) { final RelatedEntity re = new RelatedEntity(); re.setId(entity.getId()); @@ -143,7 +148,7 @@ public class CreateRelatedEntitiesJob_phase1 { case dataset: case otherresearchproduct: case software: - Result result = (Result) entity; + final Result result = (Result) entity; if (result.getTitle() != null && !result.getTitle().isEmpty()) { final StructuredProperty title = result.getTitle().stream().findFirst().get(); @@ -170,16 +175,17 @@ public class CreateRelatedEntitiesJob_phase1 { break; case datasource: - Datasource d = (Datasource) entity; + final Datasource d = (Datasource) entity; re.setOfficialname(getValue(d.getOfficialname())); re.setWebsiteurl(getValue(d.getWebsiteurl())); re.setDatasourcetype(d.getDatasourcetype()); + re.setDatasourcetypeui(d.getDatasourcetypeui()); re.setOpenairecompatibility(d.getOpenairecompatibility()); break; case organization: - Organization o = (Organization) entity; + final Organization o = (Organization) entity; re.setLegalname(getValue(o.getLegalname())); re.setLegalshortname(getValue(o.getLegalshortname())); @@ -187,14 +193,14 @@ public class CreateRelatedEntitiesJob_phase1 { re.setWebsiteurl(getValue(o.getWebsiteurl())); break; case project: - Project p = (Project) entity; + final Project p = (Project) entity; re.setProjectTitle(getValue(p.getTitle())); re.setCode(getValue(p.getCode())); re.setAcronym(getValue(p.getAcronym())); re.setContracttype(p.getContracttype()); - List> f = p.getFundingtree(); + final List> f = p.getFundingtree(); if (!f.isEmpty()) { re.setFundingtree(f.stream().map(s -> s.getValue()).collect(Collectors.toList())); } @@ -203,11 +209,11 @@ public class CreateRelatedEntitiesJob_phase1 { return re; } - private static String getValue(Field field) { + private static String getValue(final Field field) { return getFieldValueWithDefault(field, ""); } - private static T getFieldValueWithDefault(Field f, T defaultValue) { + private static T getFieldValueWithDefault(final Field f, final T defaultValue) { return Optional .ofNullable(f) .filter(Objects::nonNull) @@ -216,21 +222,21 @@ public class CreateRelatedEntitiesJob_phase1 { } /** - * Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text - * file, + * Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text file, * * @param spark * @param relationPath * @return the Dataset containing all the relationships */ private static Dataset readPathRelation( - SparkSession spark, final String relationPath) { + final SparkSession spark, + final String relationPath) { log.info("Reading relations from: {}", relationPath); return spark.read().load(relationPath).as(Encoders.bean(Relation.class)); } - private static void removeOutputDir(SparkSession spark, String path) { + private static void removeOutputDir(final SparkSession spark, final String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 392d3cde6d..19300d77db 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -735,6 +735,30 @@ public class XmlRecordFactory implements Serializable { .collect(Collectors.toList())); } + if (ds.getJurisdiction() != null) { + metadata.add(XmlSerializationUtils.mapQualifier("jurisdiction", ds.getJurisdiction())); + } + + if (ds.getThematic() != null) { + metadata.add(XmlSerializationUtils.asXmlElement("thematic", ds.getThematic().toString())); + } + + if (ds.getKnowledgegraph() != null) { + metadata + .add(XmlSerializationUtils.asXmlElement("knowledgegraph", ds.getKnowledgegraph().toString())); + } + + if (ds.getContentpolicies() != null) { + metadata + .addAll( + ds + .getContentpolicies() + .stream() + .filter(Objects::nonNull) + .map(q -> XmlSerializationUtils.mapQualifier("contentpolicy", q)) + .collect(Collectors.toList())); + } + break; case organization: final Organization o = (Organization) entity;