Merge pull request '[provision wf] serialize measures defined on the result level' (#196) from xml_measures into beta

Reviewed-on: D-Net/dnet-hadoop#196
This commit is contained in:
Claudio Atzori 2022-02-23 15:56:28 +01:00
commit fbf192d6ba
4 changed files with 101 additions and 1 deletions

View File

@ -23,7 +23,6 @@ import javax.xml.transform.stream.StreamResult;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.http.protocol.HTTP;
import org.apache.spark.util.LongAccumulator;
import org.dom4j.Document;
import org.dom4j.DocumentException;
@ -50,6 +49,7 @@ import eu.dnetlib.dhp.schema.common.*;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import scala.Tuple2;
public class XmlRecordFactory implements Serializable {
@ -209,6 +209,10 @@ public class XmlRecordFactory implements Serializable {
if (ModelSupport.isResult(type)) {
final Result r = (Result) entity;
if (r.getMeasures() != null) {
metadata.addAll(measuresAsXml(r.getMeasures()));
}
if (r.getContext() != null) {
contexts.addAll(r.getContext().stream().map(c -> c.getId()).collect(Collectors.toList()));
/* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */
@ -936,6 +940,23 @@ public class XmlRecordFactory implements Serializable {
return metadata;
}
private List<String> measuresAsXml(List<Measure> measures) {
return measures
.stream()
.flatMap(
m -> m
.getUnit()
.stream()
.map(
u -> Lists
.newArrayList(
new Tuple2<>("id", m.getId()),
new Tuple2<>("key", u.getKey()),
new Tuple2<>("value", u.getValue())))
.map(l -> XmlSerializationUtils.asXmlElement("measure", l)))
.collect(Collectors.toList());
}
private String getAuthorPidType(final String s) {
return XmlSerializationUtils
.escapeXml(s)

View File

@ -5,7 +5,10 @@ import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.removePrefix;
import static org.apache.commons.lang3.StringUtils.isBlank;
import static org.apache.commons.lang3.StringUtils.isNotBlank;
import java.util.List;
import eu.dnetlib.dhp.schema.oaf.*;
import scala.Tuple2;
public class XmlSerializationUtils {
@ -147,4 +150,15 @@ public class XmlSerializationUtils {
.append(attr("schemename", q.getSchemename()))
.toString();
}
public static String asXmlElement(String name, List<Tuple2<String, String>> attributes) {
StringBuilder sb = new StringBuilder();
sb.append("<");
sb.append(name);
for (Tuple2<String, String> attr : attributes) {
sb.append(" ").append(attr(attr._1(), attr._2()));
}
sb.append("/>");
return sb.toString();
}
}

View File

@ -69,6 +69,13 @@ public class XmlRecordFactoryTest {
assertEquals("1721.47", doc.valueOf("//processingchargeamount/text()"));
assertEquals("EUR", doc.valueOf("//processingchargecurrency/text()"));
assertEquals(
"1.00889953098e-08", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'influence']/@value"));
assertEquals(
"30.6576853333", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'popularity_alt']/@value"));
assertEquals(
"4.62970429725e-08", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'popularity']/@value"));
}
@Test

View File

@ -1,4 +1,33 @@
{
"measures": [
{
"id": "influence",
"unit": [
{
"key": "score",
"value": "1.00889953098e-08"
}
]
},
{
"id": "popularity_alt",
"unit": [
{
"key": "score",
"value": "30.6576853333"
}
]
},
{
"id": "popularity",
"unit": [
{
"key": "score",
"value": "4.62970429725e-08"
}
]
}
],
"author": [
{
"affiliation": [],
@ -284,6 +313,35 @@
"id": "50|CSC_________::0000ec4dd9df012feaafa77e71a0fb4c",
"instance": [
{
"measures": [
{
"id": "influence",
"unit": [
{
"key": "score",
"value": "1.00889953098e-08"
}
]
},
{
"id": "popularity_alt",
"unit": [
{
"key": "score",
"value": "30.6576853333"
}
]
},
{
"id": "popularity",
"unit": [
{
"key": "score",
"value": "4.62970429725e-08"
}
]
}
],
"pid": [
{
"dataInfo": {