diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java index 3f27b9442..58009bfcf 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java @@ -38,8 +38,7 @@ import scala.Tuple2; /** * Groups the graph content by entity identifier to ensure ID uniqueness */ -public class -GroupEntitiesSparkJob { +public class GroupEntitiesSparkJob { private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java index b44ed7446..518f41120 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java @@ -22,7 +22,6 @@ import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Maps; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -42,61 +41,51 @@ public class XmlConverterJob { public static final String schemaLocation = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd"; - public static void main(String[] args) throws Exception { + public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( XmlConverterJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json"))); + .getResourceAsStream("/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json"))); parser.parseArgument(args); - Boolean isSparkSessionManaged = Optional + final Boolean isSparkSessionManaged = Optional .ofNullable(parser.get("isSparkSessionManaged")) .map(Boolean::valueOf) .orElse(Boolean.TRUE); log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - String inputPath = parser.get("inputPath"); + final String inputPath = parser.get("inputPath"); log.info("inputPath: {}", inputPath); - String outputPath = parser.get("outputPath"); + final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - String isLookupUrl = parser.get("isLookupUrl"); + final String isLookupUrl = parser.get("isLookupUrl"); log.info("isLookupUrl: {}", isLookupUrl); - String otherDsTypeId = parser.get("otherDsTypeId"); - log.info("otherDsTypeId: {}", otherDsTypeId); - - SparkConf conf = new SparkConf(); + final SparkConf conf = new SparkConf(); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.registerKryoClasses(ProvisionModelSupport.getModelClasses()); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - removeOutputDir(spark, outputPath); - convertToXml( - spark, inputPath, outputPath, ContextMapper.fromIS(isLookupUrl), otherDsTypeId); - }); + runWithSparkSession(conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, outputPath); + convertToXml(spark, inputPath, outputPath, ContextMapper.fromIS(isLookupUrl)); + }); } private static void convertToXml( - SparkSession spark, - String inputPath, - String outputPath, - ContextMapper contextMapper, - String otherDsTypeId) { + final SparkSession spark, + final String inputPath, + final String outputPath, + final ContextMapper contextMapper) { final XmlRecordFactory recordFactory = new XmlRecordFactory( prepareAccumulators(spark.sparkContext()), contextMapper, false, - schemaLocation, - otherDsTypeId); + schemaLocation); final List paths = HdfsSupport .listFiles(inputPath, spark.sparkContext().hadoopConfiguration()); @@ -116,16 +105,15 @@ public class XmlConverterJob { .mapToPair( (PairFunction, Text, Text>) t -> new Tuple2<>(new Text(t._1()), new Text(t._2()))) - .saveAsHadoopFile( - outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class); + .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class); } - private static void removeOutputDir(SparkSession spark, String path) { + private static void removeOutputDir(final SparkSession spark, final String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } - private static Map prepareAccumulators(SparkContext sc) { - Map accumulators = Maps.newHashMap(); + private static Map prepareAccumulators(final SparkContext sc) { + final Map accumulators = Maps.newHashMap(); accumulators .put( "resultResult_similarity_isAmongTopNSimilarDocuments", @@ -136,15 +124,13 @@ public class XmlConverterJob { sc.longAccumulator("resultResult_similarity_hasAmongTopNSimilarDocuments")); accumulators .put( - "resultResult_supplement_isSupplementTo", - sc.longAccumulator("resultResult_supplement_isSupplementTo")); + "resultResult_supplement_isSupplementTo", sc.longAccumulator("resultResult_supplement_isSupplementTo")); accumulators .put( "resultResult_supplement_isSupplementedBy", sc.longAccumulator("resultResult_supplement_isSupplementedBy")); accumulators - .put( - "resultResult_dedup_isMergedIn", sc.longAccumulator("resultResult_dedup_isMergedIn")); + .put("resultResult_dedup_isMergedIn", sc.longAccumulator("resultResult_dedup_isMergedIn")); accumulators.put("resultResult_dedup_merges", sc.longAccumulator("resultResult_dedup_merges")); accumulators @@ -152,16 +138,11 @@ public class XmlConverterJob { "resultResult_publicationDataset_isRelatedTo", sc.longAccumulator("resultResult_publicationDataset_isRelatedTo")); accumulators - .put( - "resultResult_relationship_isRelatedTo", - sc.longAccumulator("resultResult_relationship_isRelatedTo")); + .put("resultResult_relationship_isRelatedTo", sc.longAccumulator("resultResult_relationship_isRelatedTo")); accumulators - .put( - "resultProject_outcome_isProducedBy", - sc.longAccumulator("resultProject_outcome_isProducedBy")); + .put("resultProject_outcome_isProducedBy", sc.longAccumulator("resultProject_outcome_isProducedBy")); accumulators - .put( - "resultProject_outcome_produces", sc.longAccumulator("resultProject_outcome_produces")); + .put("resultProject_outcome_produces", sc.longAccumulator("resultProject_outcome_produces")); accumulators .put( "resultOrganization_affiliation_isAuthorInstitutionOf", @@ -184,9 +165,7 @@ public class XmlConverterJob { "organizationOrganization_dedup_isMergedIn", sc.longAccumulator("organizationOrganization_dedup_isMergedIn")); accumulators - .put( - "organizationOrganization_dedup_merges", - sc.longAccumulator("resultProject_outcome_produces")); + .put("organizationOrganization_dedup_merges", sc.longAccumulator("resultProject_outcome_produces")); accumulators .put( "datasourceOrganization_provision_isProvidedBy", diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index a985d2371..392d3cde6 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -1,7 +1,8 @@ package eu.dnetlib.dhp.oa.provision.utils; -import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.*; +import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.authorPidTypes; +import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.getRelDescriptor; import static org.apache.commons.lang3.StringUtils.isNotBlank; import static org.apache.commons.lang3.StringUtils.substringBefore; @@ -9,14 +10,23 @@ import java.io.IOException; import java.io.Serializable; import java.io.StringReader; import java.io.StringWriter; -import java.util.*; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; -import javax.xml.transform.*; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerConfigurationException; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; -import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import org.apache.commons.lang3.StringUtils; import org.apache.spark.util.LongAccumulator; import org.dom4j.Document; @@ -36,19 +46,38 @@ import com.google.common.collect.Sets; import com.mycila.xmltool.XMLDoc; import com.mycila.xmltool.XMLTag; -import eu.dnetlib.dhp.oa.provision.model.*; +import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; +import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; +import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.MainEntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.ExternalReference; +import eu.dnetlib.dhp.schema.oaf.ExtraInfo; +import eu.dnetlib.dhp.schema.oaf.Instance; +import eu.dnetlib.dhp.schema.oaf.Journal; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.Software; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class XmlRecordFactory implements Serializable { - private final Map accumulators; + /** + * + */ + private static final long serialVersionUID = 2912912999272373172L; - private final Set specialDatasourceTypes; + private final Map accumulators; private final ContextMapper contextMapper; @@ -61,23 +90,20 @@ public class XmlRecordFactory implements Serializable { public XmlRecordFactory( final ContextMapper contextMapper, final boolean indent, - final String schemaLocation, - final String otherDatasourceTypesUForUI) { + final String schemaLocation) { - this(Maps.newHashMap(), contextMapper, indent, schemaLocation, otherDatasourceTypesUForUI); + this(Maps.newHashMap(), contextMapper, indent, schemaLocation); } public XmlRecordFactory( final Map accumulators, final ContextMapper contextMapper, final boolean indent, - final String schemaLocation, - final String otherDatasourceTypesUForUI) { + final String schemaLocation) { this.accumulators = accumulators; this.contextMapper = contextMapper; this.schemaLocation = schemaLocation; - this.specialDatasourceTypes = Sets.newHashSet(Splitter.on(",").trimResults().split(otherDatasourceTypesUForUI)); this.indent = indent; } @@ -87,8 +113,8 @@ public class XmlRecordFactory implements Serializable { final Set contexts = Sets.newHashSet(); // final OafEntity entity = toOafEntity(je.getEntity()); - OafEntity entity = je.getEntity(); - TemplateFactory templateFactory = new TemplateFactory(); + final OafEntity entity = je.getEntity(); + final TemplateFactory templateFactory = new TemplateFactory(); try { final EntityType type = EntityType.fromClass(entity.getClass()); @@ -110,11 +136,7 @@ public class XmlRecordFactory implements Serializable { final String body = templateFactory .buildBody( - mainType, - metadata, - relations, - listChildren(entity, je, templateFactory), - listExtraInfo(entity)); + mainType, metadata, relations, listChildren(entity, je, templateFactory), listExtraInfo(entity)); return printXML(templateFactory.buildRecord(entity, schemaLocation, body), indent); } catch (final Throwable e) { @@ -142,19 +164,19 @@ public class XmlRecordFactory implements Serializable { default: throw new IllegalArgumentException("invalid type: " + type); } - } catch (IOException e) { + } catch (final IOException e) { throw new IllegalArgumentException(e); } } - private String printXML(String xml, boolean indent) { + private String printXML(final String xml, final boolean indent) { try { final Document doc = new SAXReader().read(new StringReader(xml)); - OutputFormat format = indent ? OutputFormat.createPrettyPrint() : OutputFormat.createCompactFormat(); + final OutputFormat format = indent ? OutputFormat.createPrettyPrint() : OutputFormat.createCompactFormat(); format.setExpandEmptyElements(false); format.setSuppressDeclaration(true); - StringWriter sw = new StringWriter(); - XMLWriter writer = new XMLWriter(sw, format); + final StringWriter sw = new StringWriter(); + final XMLWriter writer = new XMLWriter(sw, format); writer.write(doc); return sw.toString(); } catch (IOException | DocumentException e) { @@ -163,7 +185,9 @@ public class XmlRecordFactory implements Serializable { } private List metadata( - final EntityType type, final OafEntity entity, final Set contexts) { + final EntityType type, + final OafEntity entity, + final Set contexts) { final List metadata = Lists.newArrayList(); @@ -230,72 +254,63 @@ public class XmlRecordFactory implements Serializable { .getAuthor() .stream() .filter(Objects::nonNull) - .map( - a -> { - final StringBuilder sb = new StringBuilder(" isNotBlank(sp.getQualifier().getClassid()) - && isNotBlank(sp.getValue())) - .collect( - Collectors - .toMap( - p -> getAuthorPidType(p.getQualifier().getClassid()), - p -> p, - (p1, p2) -> p1)) - .values() - .stream() - .collect( - Collectors - .groupingBy( - p -> p.getValue(), - Collectors - .mapping( - p -> p, - Collectors.minBy(new AuthorPidTypeComparator())))) - .values() - .stream() - .map(op -> op.get()) - .forEach( - sp -> { - String pidType = getAuthorPidType(sp.getQualifier().getClassid()); - String pidValue = XmlSerializationUtils.escapeXml(sp.getValue()); - - // ugly hack: some records provide swapped pidtype and pidvalue - if (authorPidTypes.contains(pidValue.toLowerCase().trim())) { - sb.append(String.format(" %s=\"%s\"", pidValue, pidType)); - } else { - if (isNotBlank(pidType)) { - sb - .append( - String - .format( - " %s=\"%s\"", - pidType, - pidValue - .toLowerCase() - .replaceAll("^.*orcid\\.org\\/", ""))); - } - } - }); - } + .map(a -> { + final StringBuilder sb = new StringBuilder("" + XmlSerializationUtils.escapeXml(a.getFullname()) + ""); - return sb.toString(); - }) + .append(" surname=\"" + XmlSerializationUtils.escapeXml(a.getSurname()) + "\""); + } + if (a.getPid() != null) { + a + .getPid() + .stream() + .filter(Objects::nonNull) + .filter( + sp -> isNotBlank(sp.getQualifier().getClassid()) + && isNotBlank(sp.getValue())) + .collect( + Collectors + .toMap( + p -> getAuthorPidType(p.getQualifier().getClassid()), p -> p, + (p1, p2) -> p1)) + .values() + .stream() + .collect( + Collectors + .groupingBy( + p -> p.getValue(), Collectors + .mapping( + p -> p, Collectors.minBy(new AuthorPidTypeComparator())))) + .values() + .stream() + .map(op -> op.get()) + .forEach(sp -> { + final String pidType = getAuthorPidType(sp.getQualifier().getClassid()); + final String pidValue = XmlSerializationUtils.escapeXml(sp.getValue()); + + // ugly hack: some records provide swapped pidtype and pidvalue + if (authorPidTypes.contains(pidValue.toLowerCase().trim())) { + sb.append(String.format(" %s=\"%s\"", pidValue, pidType)); + } else { + if (isNotBlank(pidType)) { + sb + .append( + String + .format( + " %s=\"%s\"", pidType, pidValue + .toLowerCase() + .replaceAll("^.*orcid\\.org\\/", ""))); + } + } + }); + } + sb + .append(">" + XmlSerializationUtils.escapeXml(a.getFullname()) + ""); + return sb.toString(); + }) .collect(Collectors.toList())); } if (r.getContributor() != null) { @@ -332,8 +347,7 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "dateofacceptance", r.getDateofacceptance().getValue())); + .asXmlElement("dateofacceptance", r.getDateofacceptance().getValue())); } if (r.getDescription() != null) { metadata @@ -347,8 +361,7 @@ public class XmlRecordFactory implements Serializable { } if (r.getEmbargoenddate() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("embargoenddate", r.getEmbargoenddate().getValue())); + .add(XmlSerializationUtils.asXmlElement("embargoenddate", r.getEmbargoenddate().getValue())); } if (r.getSubject() != null) { metadata @@ -423,23 +436,20 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "lastmetadataupdate", d.getLastmetadataupdate().getValue())); + .asXmlElement("lastmetadataupdate", d.getLastmetadataupdate().getValue())); } if (d.getMetadataversionnumber() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "metadataversionnumber", d.getMetadataversionnumber().getValue())); + .asXmlElement("metadataversionnumber", d.getMetadataversionnumber().getValue())); } if (d.getSize() != null) { metadata.add(XmlSerializationUtils.asXmlElement("size", d.getSize().getValue())); } if (d.getStoragedate() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("storagedate", d.getStoragedate().getValue())); + .add(XmlSerializationUtils.asXmlElement("storagedate", d.getStoragedate().getValue())); } if (d.getVersion() != null) { metadata.add(XmlSerializationUtils.asXmlElement("version", d.getVersion().getValue())); @@ -509,98 +519,87 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "codeRepositoryUrl", s.getCodeRepositoryUrl().getValue())); + .asXmlElement("codeRepositoryUrl", s.getCodeRepositoryUrl().getValue())); } if (s.getProgrammingLanguage() != null) { metadata .add( XmlSerializationUtils - .mapQualifier( - "programmingLanguage", s.getProgrammingLanguage())); + .mapQualifier("programmingLanguage", s.getProgrammingLanguage())); } break; case datasource: final Datasource ds = (Datasource) entity; if (ds.getDatasourcetype() != null) { - mapDatasourceType(metadata, ds.getDatasourcetype()); + metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", ds.getDatasourcetype())); + } + if (ds.getDatasourcetypeui() != null) { + metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", ds.getDatasourcetypeui())); } if (ds.getOpenairecompatibility() != null) { metadata .add( XmlSerializationUtils - .mapQualifier( - "openairecompatibility", ds.getOpenairecompatibility())); + .mapQualifier("openairecompatibility", ds.getOpenairecompatibility())); } if (ds.getOfficialname() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("officialname", ds.getOfficialname().getValue())); + .add(XmlSerializationUtils.asXmlElement("officialname", ds.getOfficialname().getValue())); } if (ds.getEnglishname() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("englishname", ds.getEnglishname().getValue())); + .add(XmlSerializationUtils.asXmlElement("englishname", ds.getEnglishname().getValue())); } if (ds.getWebsiteurl() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("websiteurl", ds.getWebsiteurl().getValue())); + .add(XmlSerializationUtils.asXmlElement("websiteurl", ds.getWebsiteurl().getValue())); } if (ds.getLogourl() != null) { metadata.add(XmlSerializationUtils.asXmlElement("logourl", ds.getLogourl().getValue())); } if (ds.getContactemail() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("contactemail", ds.getContactemail().getValue())); + .add(XmlSerializationUtils.asXmlElement("contactemail", ds.getContactemail().getValue())); } if (ds.getNamespaceprefix() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "namespaceprefix", ds.getNamespaceprefix().getValue())); + .asXmlElement("namespaceprefix", ds.getNamespaceprefix().getValue())); } if (ds.getLatitude() != null) { metadata.add(XmlSerializationUtils.asXmlElement("latitude", ds.getLatitude().getValue())); } if (ds.getLongitude() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("longitude", ds.getLongitude().getValue())); + .add(XmlSerializationUtils.asXmlElement("longitude", ds.getLongitude().getValue())); } if (ds.getDateofvalidation() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "dateofvalidation", ds.getDateofvalidation().getValue())); + .asXmlElement("dateofvalidation", ds.getDateofvalidation().getValue())); } if (ds.getDescription() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("description", ds.getDescription().getValue())); + .add(XmlSerializationUtils.asXmlElement("description", ds.getDescription().getValue())); } if (ds.getOdnumberofitems() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "odnumberofitems", ds.getOdnumberofitems().getValue())); + .asXmlElement("odnumberofitems", ds.getOdnumberofitems().getValue())); } if (ds.getOdnumberofitemsdate() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "odnumberofitemsdate", ds.getOdnumberofitemsdate().getValue())); + .asXmlElement("odnumberofitemsdate", ds.getOdnumberofitemsdate().getValue())); } if (ds.getOdpolicies() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("odpolicies", ds.getOdpolicies().getValue())); + .add(XmlSerializationUtils.asXmlElement("odpolicies", ds.getOdpolicies().getValue())); } if (ds.getOdlanguages() != null) { metadata @@ -635,50 +634,43 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "releasestartdate", ds.getReleaseenddate().getValue())); + .asXmlElement("releasestartdate", ds.getReleaseenddate().getValue())); } if (ds.getReleaseenddate() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "releaseenddate", ds.getReleaseenddate().getValue())); + .asXmlElement("releaseenddate", ds.getReleaseenddate().getValue())); } if (ds.getMissionstatementurl() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "missionstatementurl", ds.getMissionstatementurl().getValue())); + .asXmlElement("missionstatementurl", ds.getMissionstatementurl().getValue())); } if (ds.getDataprovider() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "dataprovider", ds.getDataprovider().getValue().toString())); + .asXmlElement("dataprovider", ds.getDataprovider().getValue().toString())); } if (ds.getServiceprovider() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "serviceprovider", ds.getServiceprovider().getValue().toString())); + .asXmlElement("serviceprovider", ds.getServiceprovider().getValue().toString())); } if (ds.getDatabaseaccesstype() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "databaseaccesstype", ds.getDatabaseaccesstype().getValue())); + .asXmlElement("databaseaccesstype", ds.getDatabaseaccesstype().getValue())); } if (ds.getDatauploadtype() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "datauploadtype", ds.getDatauploadtype().getValue())); + .asXmlElement("datauploadtype", ds.getDatauploadtype().getValue())); } if (ds.getDatabaseaccessrestriction() != null) { metadata @@ -691,39 +683,33 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "datauploadrestriction", ds.getDatauploadrestriction().getValue())); + .asXmlElement("datauploadrestriction", ds.getDatauploadrestriction().getValue())); } if (ds.getVersioning() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "versioning", ds.getVersioning().getValue().toString())); + .asXmlElement("versioning", ds.getVersioning().getValue().toString())); } if (ds.getCitationguidelineurl() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "citationguidelineurl", ds.getCitationguidelineurl().getValue())); + .asXmlElement("citationguidelineurl", ds.getCitationguidelineurl().getValue())); } if (ds.getQualitymanagementkind() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "qualitymanagementkind", ds.getQualitymanagementkind().getValue())); + .asXmlElement("qualitymanagementkind", ds.getQualitymanagementkind().getValue())); } if (ds.getPidsystems() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems().getValue())); + .add(XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems().getValue())); } if (ds.getCertificates() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("certificates", ds.getCertificates().getValue())); + .add(XmlSerializationUtils.asXmlElement("certificates", ds.getCertificates().getValue())); } if (ds.getPolicies() != null) { metadata @@ -757,13 +743,11 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "legalshortname", o.getLegalshortname().getValue())); + .asXmlElement("legalshortname", o.getLegalshortname().getValue())); } if (o.getLegalname() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("legalname", o.getLegalname().getValue())); + .add(XmlSerializationUtils.asXmlElement("legalname", o.getLegalname().getValue())); } if (o.getAlternativeNames() != null) { metadata @@ -777,8 +761,7 @@ public class XmlRecordFactory implements Serializable { } if (o.getWebsiteurl() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl().getValue())); + .add(XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl().getValue())); } if (o.getLogourl() != null) { metadata.add(XmlSerializationUtils.asXmlElement("logourl", o.getLogourl().getValue())); @@ -786,32 +769,27 @@ public class XmlRecordFactory implements Serializable { if (o.getEclegalbody() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("eclegalbody", o.getEclegalbody().getValue())); + .add(XmlSerializationUtils.asXmlElement("eclegalbody", o.getEclegalbody().getValue())); } if (o.getEclegalperson() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("eclegalperson", o.getEclegalperson().getValue())); + .add(XmlSerializationUtils.asXmlElement("eclegalperson", o.getEclegalperson().getValue())); } if (o.getEcnonprofit() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("ecnonprofit", o.getEcnonprofit().getValue())); + .add(XmlSerializationUtils.asXmlElement("ecnonprofit", o.getEcnonprofit().getValue())); } if (o.getEcresearchorganization() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "ecresearchorganization", o.getEcresearchorganization().getValue())); + .asXmlElement("ecresearchorganization", o.getEcresearchorganization().getValue())); } if (o.getEchighereducation() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "echighereducation", o.getEchighereducation().getValue())); + .asXmlElement("echighereducation", o.getEchighereducation().getValue())); } if (o.getEcinternationalorganizationeurinterests() != null) { metadata @@ -830,20 +808,17 @@ public class XmlRecordFactory implements Serializable { } if (o.getEcenterprise() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("ecenterprise", o.getEcenterprise().getValue())); + .add(XmlSerializationUtils.asXmlElement("ecenterprise", o.getEcenterprise().getValue())); } if (o.getEcsmevalidated() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "ecsmevalidated", o.getEcsmevalidated().getValue())); + .asXmlElement("ecsmevalidated", o.getEcsmevalidated().getValue())); } if (o.getEcnutscode() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("ecnutscode", o.getEcnutscode().getValue())); + .add(XmlSerializationUtils.asXmlElement("ecnutscode", o.getEcnutscode().getValue())); } if (o.getCountry() != null) { metadata.add(XmlSerializationUtils.mapQualifier("country", o.getCountry())); @@ -855,8 +830,7 @@ public class XmlRecordFactory implements Serializable { if (p.getWebsiteurl() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("websiteurl", p.getWebsiteurl().getValue())); + .add(XmlSerializationUtils.asXmlElement("websiteurl", p.getWebsiteurl().getValue())); } if (p.getCode() != null) { metadata.add(XmlSerializationUtils.asXmlElement("code", p.getCode().getValue())); @@ -869,8 +843,7 @@ public class XmlRecordFactory implements Serializable { } if (p.getStartdate() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("startdate", p.getStartdate().getValue())); + .add(XmlSerializationUtils.asXmlElement("startdate", p.getStartdate().getValue())); } if (p.getEnddate() != null) { metadata.add(XmlSerializationUtils.asXmlElement("enddate", p.getEnddate().getValue())); @@ -879,8 +852,7 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "callidentifier", p.getCallidentifier().getValue())); + .asXmlElement("callidentifier", p.getCallidentifier().getValue())); } if (p.getKeywords() != null) { metadata.add(XmlSerializationUtils.asXmlElement("keywords", p.getKeywords().getValue())); @@ -890,8 +862,7 @@ public class XmlRecordFactory implements Serializable { } if (p.getEcarticle29_3() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("ecarticle29_3", p.getEcarticle29_3().getValue())); + .add(XmlSerializationUtils.asXmlElement("ecarticle29_3", p.getEcarticle29_3().getValue())); } if (p.getSubjects() != null) { metadata @@ -923,13 +894,11 @@ public class XmlRecordFactory implements Serializable { } if (p.getTotalcost() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("totalcost", p.getTotalcost().toString())); + .add(XmlSerializationUtils.asXmlElement("totalcost", p.getTotalcost().toString())); } if (p.getFundedamount() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("fundedamount", p.getFundedamount().toString())); + .add(XmlSerializationUtils.asXmlElement("fundedamount", p.getFundedamount().toString())); } if (p.getFundingtree() != null) { metadata @@ -950,28 +919,18 @@ public class XmlRecordFactory implements Serializable { return metadata; } - private String getAuthorPidType(String s) { + private String getAuthorPidType(final String s) { return XmlSerializationUtils .escapeXml(s) .replaceAll("\\W", "") .replaceAll("\\d", ""); } - private static boolean kvNotBlank(KeyValue kv) { + private static boolean kvNotBlank(final KeyValue kv) { return kv != null && StringUtils.isNotBlank(kv.getKey()) && StringUtils.isNotBlank(kv.getValue()); } - private void mapDatasourceType(List metadata, final Qualifier dsType) { - metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", dsType)); - - if (specialDatasourceTypes.contains(dsType.getClassid())) { - dsType.setClassid("other"); - dsType.setClassname("other"); - } - metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", dsType)); - } - - private List mapFields(RelatedEntityWrapper link, Set contexts) { + private List mapFields(final RelatedEntityWrapper link, final Set contexts) { final Relation rel = link.getRelation(); final RelatedEntity re = link.getTarget(); final String targetType = link.getTarget().getType(); @@ -987,16 +946,14 @@ public class XmlRecordFactory implements Serializable { } if (isNotBlank(re.getDateofacceptance())) { metadata - .add( - XmlSerializationUtils.asXmlElement("dateofacceptance", re.getDateofacceptance())); + .add(XmlSerializationUtils.asXmlElement("dateofacceptance", re.getDateofacceptance())); } if (isNotBlank(re.getPublisher())) { metadata.add(XmlSerializationUtils.asXmlElement("publisher", re.getPublisher())); } if (isNotBlank(re.getCodeRepositoryUrl())) { metadata - .add( - XmlSerializationUtils.asXmlElement("coderepositoryurl", re.getCodeRepositoryUrl())); + .add(XmlSerializationUtils.asXmlElement("coderepositoryurl", re.getCodeRepositoryUrl())); } if (re.getResulttype() != null && re.getResulttype().isBlank()) { metadata.add(XmlSerializationUtils.mapQualifier("resulttype", re.getResulttype())); @@ -1026,14 +983,16 @@ public class XmlRecordFactory implements Serializable { metadata.add(XmlSerializationUtils.asXmlElement("officialname", re.getOfficialname())); } if (re.getDatasourcetype() != null && !re.getDatasourcetype().isBlank()) { - mapDatasourceType(metadata, re.getDatasourcetype()); + metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", re.getDatasourcetype())); + } + if (re.getDatasourcetypeui() != null && !re.getDatasourcetypeui().isBlank()) { + metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", re.getDatasourcetypeui())); } if (re.getOpenairecompatibility() != null && !re.getOpenairecompatibility().isBlank()) { metadata .add( XmlSerializationUtils - .mapQualifier( - "openairecompatibility", re.getOpenairecompatibility())); + .mapQualifier("openairecompatibility", re.getOpenairecompatibility())); } break; case organization: @@ -1042,8 +1001,7 @@ public class XmlRecordFactory implements Serializable { } if (isNotBlank(re.getLegalshortname())) { metadata - .add( - XmlSerializationUtils.asXmlElement("legalshortname", re.getLegalshortname())); + .add(XmlSerializationUtils.asXmlElement("legalshortname", re.getLegalshortname())); } if (re.getCountry() != null && !re.getCountry().isBlank()) { metadata.add(XmlSerializationUtils.mapQualifier("country", re.getCountry())); @@ -1085,8 +1043,10 @@ public class XmlRecordFactory implements Serializable { return metadata; } - private String mapRelation(Set contexts, TemplateFactory templateFactory, EntityType type, - RelatedEntityWrapper link) { + private String mapRelation(final Set contexts, + final TemplateFactory templateFactory, + final EntityType type, + final RelatedEntityWrapper link) { final Relation rel = link.getRelation(); final String targetType = link.getTarget().getType(); final String scheme = ModelSupport.getScheme(type.toString(), targetType); @@ -1096,8 +1056,9 @@ public class XmlRecordFactory implements Serializable { String.format("missing scheme for: <%s - %s>", type, targetType)); } final HashSet fields = Sets.newHashSet(mapFields(link, contexts)); - if (rel.getValidated() == null) + if (rel.getValidated() == null) { rel.setValidated(false); + } return templateFactory .getRel( targetType, rel.getTarget(), fields, rel.getRelClass(), scheme, rel.getDataInfo(), rel.getValidated(), @@ -1105,12 +1066,14 @@ public class XmlRecordFactory implements Serializable { } private List listChildren( - final OafEntity entity, JoinedEntity je, TemplateFactory templateFactory) { + final OafEntity entity, + final JoinedEntity je, + final TemplateFactory templateFactory) { final EntityType entityType = EntityType.fromClass(je.getEntity().getClass()); final List links = je.getLinks(); - List children = links + final List children = links .stream() .filter(link -> isDuplicate(link)) .map(link -> { @@ -1131,13 +1094,11 @@ public class XmlRecordFactory implements Serializable { if (instance.getAccessright() != null && !instance.getAccessright().isBlank()) { fields - .add( - XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright())); + .add(XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright())); } if (instance.getCollectedfrom() != null && kvNotBlank(instance.getCollectedfrom())) { fields - .add( - XmlSerializationUtils.mapKeyValue("collectedfrom", instance.getCollectedfrom())); + .add(XmlSerializationUtils.mapKeyValue("collectedfrom", instance.getCollectedfrom())); } if (instance.getHostedby() != null && kvNotBlank(instance.getHostedby())) { fields.add(XmlSerializationUtils.mapKeyValue("hostedby", instance.getHostedby())); @@ -1147,20 +1108,17 @@ public class XmlRecordFactory implements Serializable { fields .add( XmlSerializationUtils - .asXmlElement( - "dateofacceptance", instance.getDateofacceptance().getValue())); + .asXmlElement("dateofacceptance", instance.getDateofacceptance().getValue())); } if (instance.getInstancetype() != null && !instance.getInstancetype().isBlank()) { fields - .add( - XmlSerializationUtils.mapQualifier("instancetype", instance.getInstancetype())); + .add(XmlSerializationUtils.mapQualifier("instancetype", instance.getInstancetype())); } if (isNotBlank(instance.getDistributionlocation())) { fields .add( XmlSerializationUtils - .asXmlElement( - "distributionlocation", instance.getDistributionlocation())); + .asXmlElement("distributionlocation", instance.getDistributionlocation())); } if (instance.getPid() != null) { fields @@ -1185,8 +1143,7 @@ public class XmlRecordFactory implements Serializable { if (instance.getRefereed() != null && !instance.getRefereed().isBlank()) { fields - .add( - XmlSerializationUtils.mapQualifier("refereed", instance.getRefereed())); + .add(XmlSerializationUtils.mapQualifier("refereed", instance.getRefereed())); } if (instance.getProcessingchargeamount() != null && isNotBlank(instance.getProcessingchargeamount().getValue())) { @@ -1208,8 +1165,7 @@ public class XmlRecordFactory implements Serializable { children .add( templateFactory - .getInstance( - instance.getHostedby().getKey(), fields, instance.getUrl())); + .getInstance(instance.getHostedby().getKey(), fields, instance.getUrl())); } } final List ext = ((Result) entity).getExternalReference(); @@ -1254,11 +1210,11 @@ public class XmlRecordFactory implements Serializable { return children; } - private boolean isDuplicate(RelatedEntityWrapper link) { + private boolean isDuplicate(final RelatedEntityWrapper link) { return ModelConstants.DEDUP.equalsIgnoreCase(link.getRelation().getSubRelType()); } - private List listExtraInfo(OafEntity entity) { + private List listExtraInfo(final OafEntity entity) { final List extraInfo = entity.getExtraInfo(); return extraInfo != null ? extraInfo @@ -1271,7 +1227,7 @@ public class XmlRecordFactory implements Serializable { private List buildContexts(final String type, final Set contexts) { final List res = Lists.newArrayList(); - if ((contextMapper != null) + if (contextMapper != null && !contextMapper.isEmpty() && MainEntityType.result.toString().equals(type)) { @@ -1302,8 +1258,7 @@ public class XmlRecordFactory implements Serializable { if (def.getName().equals("category")) { final String rootId = substringBefore(def.getId(), "::"); document = addContextDef( - document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), - def); + document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), def); } if (def.getName().equals("concept")) { @@ -1327,17 +1282,17 @@ public class XmlRecordFactory implements Serializable { private Transformer getTransformer() { try { - Transformer transformer = TransformerFactory.newInstance().newTransformer(); + final Transformer transformer = TransformerFactory.newInstance().newTransformer(); transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); return transformer; - } catch (TransformerConfigurationException e) { + } catch (final TransformerConfigurationException e) { throw new IllegalStateException("unable to create javax.xml.transform.Transformer", e); } } private XMLTag addContextDef(final XMLTag tag, final ContextDef def) { tag.addTag(def.getName()).addAttribute("id", def.getId()).addAttribute("label", def.getLabel()); - if ((def.getType() != null) && !def.getType().isEmpty()) { + if (def.getType() != null && !def.getType().isEmpty()) { tag.addAttribute("type", def.getType()); } return tag; @@ -1374,16 +1329,14 @@ public class XmlRecordFactory implements Serializable { if (level0 != null) { final String level0Id = Joiner.on("::").join(funderShortName, level0.valueOf("./name")); contextMapper - .put( - level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", "")); + .put(level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", "")); final Node level1 = fundingPath.selectSingleNode("//funding_level_1"); if (level1 == null) { contexts.add(level0Id); } else { final String level1Id = Joiner.on("::").join(level0Id, level1.valueOf("./name")); contextMapper - .put( - level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", "")); + .put(level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", "")); final Node level2 = fundingPath.selectSingleNode("//funding_level_2"); if (level2 == null) { contexts.add(level1Id); @@ -1391,8 +1344,7 @@ public class XmlRecordFactory implements Serializable { final String level2Id = Joiner.on("::").join(level1Id, level2.valueOf("./name")); contextMapper .put( - level2Id, - new ContextDef(level2Id, level2.valueOf("./description"), "concept", "")); + level2Id, new ContextDef(level2Id, level2.valueOf("./description"), "concept", "")); contexts.add(level2Id); } } @@ -1413,8 +1365,7 @@ public class XmlRecordFactory implements Serializable { funding += getFunderElement(ftree); for (final Object o : Lists - .reverse( - ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) { + .reverse(ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) { final Element e = (Element) o; final String _id = e.valueOf("./id"); funding += "<" diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json index 32720514e..eda6154d7 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json @@ -16,11 +16,5 @@ "paramLongName": "isLookupUrl", "paramDescription": "URL of the isLookUp Service", "paramRequired": true - }, - { - "paramName": "odt", - "paramLongName": "otherDsTypeId", - "paramDescription": "list of datasource types to populate field datasourcetypeui", - "paramRequired": true } ] diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml index 9280678c1..9f41805e2 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml @@ -25,10 +25,6 @@ targetMaxRelations maximum number of relations allowed for a each entity grouping by target - - otherDsTypeId - mapping used to populate datasourceTypeUi field - format metadata format name (DMF|TMF) @@ -582,7 +578,6 @@ --inputPath${workingDir}/join_entities --outputPath${workingDir}/xml --isLookupUrl${isLookupUrl} - --otherDsTypeId${otherDsTypeId} diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index e1ed4ffe4..cd07cfcb1 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.provision; import static org.junit.jupiter.api.Assertions.assertNotNull; import java.io.IOException; -import java.util.List; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; @@ -16,11 +15,9 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; -import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory; @@ -49,7 +46,7 @@ public class IndexRecordTransformerTest { @Test public void testPreBuiltRecordTransformation() throws IOException, TransformerException { - String record = IOUtils.toString(getClass().getResourceAsStream("record.xml")); + final String record = IOUtils.toString(getClass().getResourceAsStream("record.xml")); testRecordTransformation(record); } @@ -57,14 +54,14 @@ public class IndexRecordTransformerTest { @Test public void testPublicationRecordTransformation() throws IOException, TransformerException { - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - XmlRecordFactoryTest.otherDsTypeId); + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); - Publication p = load("publication.json", Publication.class); - Project pj = load("project.json", Project.class); - Relation rel = load("relToValidatedProject.json", Relation.class); + final Publication p = load("publication.json", Publication.class); + final Project pj = load("project.json", Project.class); + final Relation rel = load("relToValidatedProject.json", Relation.class); - JoinedEntity je = new JoinedEntity<>(p); + final JoinedEntity je = new JoinedEntity<>(p); je .setLinks( Lists @@ -72,24 +69,25 @@ public class IndexRecordTransformerTest { new RelatedEntityWrapper(rel, CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class)))); - String record = xmlRecordFactory.build(je); + final String record = xmlRecordFactory.build(je); assertNotNull(record); testRecordTransformation(record); } - private void testRecordTransformation(String record) throws IOException, TransformerException { - String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); - String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); + private void testRecordTransformation(final String record) throws IOException, TransformerException { + final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); + final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); - String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt); + final String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt); - Transformer tr = SaxonTransformerFactory.newInstance(transformer); + final Transformer tr = SaxonTransformerFactory.newInstance(transformer); - String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record); + final String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record); - SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID).parseDocument(indexRecordXML); + final SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID) + .parseDocument(indexRecordXML); final String xmlDoc = ClientUtils.toXML(solrDoc); @@ -97,7 +95,7 @@ public class IndexRecordTransformerTest { System.out.println(xmlDoc); } - private T load(String fileName, Class clazz) throws IOException { + private T load(final String fileName, final Class clazz) throws IOException { return XmlRecordFactoryTest.OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream(fileName)), clazz); } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 6631cb4da..2b5e08e92 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -1,7 +1,8 @@ package eu.dnetlib.dhp.oa.provision; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; import java.io.IOException; import java.io.StringReader; @@ -29,27 +30,25 @@ import eu.dnetlib.dhp.schema.oaf.Relation; public class XmlRecordFactoryTest { - public static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource"; - public static ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @Test public void testXMLRecordFactory() throws IOException, DocumentException { - ContextMapper contextMapper = new ContextMapper(); + final ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - otherDsTypeId); + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); - Publication p = OBJECT_MAPPER + final Publication p = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class); - String xml = xmlRecordFactory.build(new JoinedEntity<>(p)); + final String xml = xmlRecordFactory.build(new JoinedEntity<>(p)); assertNotNull(xml); - Document doc = new SAXReader().read(new StringReader(xml)); + final Document doc = new SAXReader().read(new StringReader(xml)); assertNotNull(doc); @@ -72,30 +71,29 @@ public class XmlRecordFactoryTest { @Test public void testXMLRecordFactoryWithValidatedProject() throws IOException, DocumentException { - ContextMapper contextMapper = new ContextMapper(); + final ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - otherDsTypeId); + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); - Publication p = OBJECT_MAPPER + final Publication p = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class); - Project pj = OBJECT_MAPPER + final Project pj = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("project.json")), Project.class); - Relation rel = OBJECT_MAPPER - .readValue( - (IOUtils.toString(getClass().getResourceAsStream("relToValidatedProject.json"))), Relation.class); - RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class); - List links = Lists.newArrayList(); - RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject); + final Relation rel = OBJECT_MAPPER + .readValue(IOUtils.toString(getClass().getResourceAsStream("relToValidatedProject.json")), Relation.class); + final RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class); + final List links = Lists.newArrayList(); + final RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject); links.add(rew); - JoinedEntity je = new JoinedEntity<>(p); + final JoinedEntity je = new JoinedEntity<>(p); je.setLinks(links); - String xml = xmlRecordFactory.build(je); + final String xml = xmlRecordFactory.build(je); assertNotNull(xml); - Document doc = new SAXReader().read(new StringReader(xml)); + final Document doc = new SAXReader().read(new StringReader(xml)); assertNotNull(doc); System.out.println(doc.asXML()); Assertions.assertEquals("2021-01-01", doc.valueOf("//validated/@date")); @@ -104,29 +102,29 @@ public class XmlRecordFactoryTest { @Test public void testXMLRecordFactoryWithNonValidatedProject() throws IOException, DocumentException { - ContextMapper contextMapper = new ContextMapper(); + final ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - otherDsTypeId); + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); - Publication p = OBJECT_MAPPER + final Publication p = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class); - Project pj = OBJECT_MAPPER + final Project pj = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("project.json")), Project.class); - Relation rel = OBJECT_MAPPER - .readValue((IOUtils.toString(getClass().getResourceAsStream("relToProject.json"))), Relation.class); - RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class); - List links = Lists.newArrayList(); - RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject); + final Relation rel = OBJECT_MAPPER + .readValue(IOUtils.toString(getClass().getResourceAsStream("relToProject.json")), Relation.class); + final RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class); + final List links = Lists.newArrayList(); + final RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject); links.add(rew); - JoinedEntity je = new JoinedEntity<>(p); + final JoinedEntity je = new JoinedEntity<>(p); je.setLinks(links); - String xml = xmlRecordFactory.build(je); + final String xml = xmlRecordFactory.build(je); assertNotNull(xml); - Document doc = new SAXReader().read(new StringReader(xml)); + final Document doc = new SAXReader().read(new StringReader(xml)); assertNotNull(doc); System.out.println(doc.asXML()); assertEquals("", doc.valueOf("//rel/validated")); diff --git a/pom.xml b/pom.xml index 6e4526e41..1a3523f2b 100644 --- a/pom.xml +++ b/pom.xml @@ -736,7 +736,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.7.14] + [2.7.15-SNAPSHOT] [4.0.3] [6.0.5] [3.1.6]