From 86cdb7a38f52209398db510455488c4331d4db08 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 23 Feb 2022 15:54:18 +0100 Subject: [PATCH] [provision] serialize measures defined on the result level --- .../oa/provision/utils/XmlRecordFactory.java | 23 +++++++- .../utils/XmlSerializationUtils.java | 14 +++++ .../oa/provision/XmlRecordFactoryTest.java | 7 +++ .../dnetlib/dhp/oa/provision/publication.json | 58 +++++++++++++++++++ 4 files changed, 101 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index e51a84e02..cb29dc121 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -23,7 +23,6 @@ import javax.xml.transform.stream.StreamResult; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; -import org.apache.http.protocol.HTTP; import org.apache.spark.util.LongAccumulator; import org.dom4j.Document; import org.dom4j.DocumentException; @@ -50,6 +49,7 @@ import eu.dnetlib.dhp.schema.common.*; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; +import scala.Tuple2; public class XmlRecordFactory implements Serializable { @@ -209,6 +209,10 @@ public class XmlRecordFactory implements Serializable { if (ModelSupport.isResult(type)) { final Result r = (Result) entity; + if (r.getMeasures() != null) { + metadata.addAll(measuresAsXml(r.getMeasures())); + } + if (r.getContext() != null) { contexts.addAll(r.getContext().stream().map(c -> c.getId()).collect(Collectors.toList())); /* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */ @@ -936,6 +940,23 @@ public class XmlRecordFactory implements Serializable { return metadata; } + private List measuresAsXml(List measures) { + return measures + .stream() + .flatMap( + m -> m + .getUnit() + .stream() + .map( + u -> Lists + .newArrayList( + new Tuple2<>("id", m.getId()), + new Tuple2<>("key", u.getKey()), + new Tuple2<>("value", u.getValue()))) + .map(l -> XmlSerializationUtils.asXmlElement("measure", l))) + .collect(Collectors.toList()); + } + private String getAuthorPidType(final String s) { return XmlSerializationUtils .escapeXml(s) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java index 213a62b32..73667e056 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java @@ -5,7 +5,10 @@ import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.removePrefix; import static org.apache.commons.lang3.StringUtils.isBlank; import static org.apache.commons.lang3.StringUtils.isNotBlank; +import java.util.List; + import eu.dnetlib.dhp.schema.oaf.*; +import scala.Tuple2; public class XmlSerializationUtils { @@ -147,4 +150,15 @@ public class XmlSerializationUtils { .append(attr("schemename", q.getSchemename())) .toString(); } + + public static String asXmlElement(String name, List> attributes) { + StringBuilder sb = new StringBuilder(); + sb.append("<"); + sb.append(name); + for (Tuple2 attr : attributes) { + sb.append(" ").append(attr(attr._1(), attr._2())); + } + sb.append("/>"); + return sb.toString(); + } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index c1e0567e6..1df980643 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -69,6 +69,13 @@ public class XmlRecordFactoryTest { assertEquals("1721.47", doc.valueOf("//processingchargeamount/text()")); assertEquals("EUR", doc.valueOf("//processingchargecurrency/text()")); + + assertEquals( + "1.00889953098e-08", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'influence']/@value")); + assertEquals( + "30.6576853333", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'popularity_alt']/@value")); + assertEquals( + "4.62970429725e-08", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'popularity']/@value")); } @Test diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json index 3b5b54bbf..d0c8ea4ed 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json @@ -1,4 +1,33 @@ { + "measures": [ + { + "id": "influence", + "unit": [ + { + "key": "score", + "value": "1.00889953098e-08" + } + ] + }, + { + "id": "popularity_alt", + "unit": [ + { + "key": "score", + "value": "30.6576853333" + } + ] + }, + { + "id": "popularity", + "unit": [ + { + "key": "score", + "value": "4.62970429725e-08" + } + ] + } + ], "author": [ { "affiliation": [], @@ -284,6 +313,35 @@ "id": "50|CSC_________::0000ec4dd9df012feaafa77e71a0fb4c", "instance": [ { + "measures": [ + { + "id": "influence", + "unit": [ + { + "key": "score", + "value": "1.00889953098e-08" + } + ] + }, + { + "id": "popularity_alt", + "unit": [ + { + "key": "score", + "value": "30.6576853333" + } + ] + }, + { + "id": "popularity", + "unit": [ + { + "key": "score", + "value": "4.62970429725e-08" + } + ] + } + ], "pid": [ { "dataInfo": {