forked from D-Net/dnet-hadoop
[provision] serialize measures defined on the result level
This commit is contained in:
parent
5226d0a100
commit
86cdb7a38f
|
@ -23,7 +23,6 @@ import javax.xml.transform.stream.StreamResult;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.commons.lang3.tuple.ImmutablePair;
|
import org.apache.commons.lang3.tuple.ImmutablePair;
|
||||||
import org.apache.commons.lang3.tuple.Pair;
|
import org.apache.commons.lang3.tuple.Pair;
|
||||||
import org.apache.http.protocol.HTTP;
|
|
||||||
import org.apache.spark.util.LongAccumulator;
|
import org.apache.spark.util.LongAccumulator;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
import org.dom4j.DocumentException;
|
import org.dom4j.DocumentException;
|
||||||
|
@ -50,6 +49,7 @@ import eu.dnetlib.dhp.schema.common.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
public class XmlRecordFactory implements Serializable {
|
public class XmlRecordFactory implements Serializable {
|
||||||
|
|
||||||
|
@ -209,6 +209,10 @@ public class XmlRecordFactory implements Serializable {
|
||||||
if (ModelSupport.isResult(type)) {
|
if (ModelSupport.isResult(type)) {
|
||||||
final Result r = (Result) entity;
|
final Result r = (Result) entity;
|
||||||
|
|
||||||
|
if (r.getMeasures() != null) {
|
||||||
|
metadata.addAll(measuresAsXml(r.getMeasures()));
|
||||||
|
}
|
||||||
|
|
||||||
if (r.getContext() != null) {
|
if (r.getContext() != null) {
|
||||||
contexts.addAll(r.getContext().stream().map(c -> c.getId()).collect(Collectors.toList()));
|
contexts.addAll(r.getContext().stream().map(c -> c.getId()).collect(Collectors.toList()));
|
||||||
/* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */
|
/* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */
|
||||||
|
@ -936,6 +940,23 @@ public class XmlRecordFactory implements Serializable {
|
||||||
return metadata;
|
return metadata;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private List<String> measuresAsXml(List<Measure> measures) {
|
||||||
|
return measures
|
||||||
|
.stream()
|
||||||
|
.flatMap(
|
||||||
|
m -> m
|
||||||
|
.getUnit()
|
||||||
|
.stream()
|
||||||
|
.map(
|
||||||
|
u -> Lists
|
||||||
|
.newArrayList(
|
||||||
|
new Tuple2<>("id", m.getId()),
|
||||||
|
new Tuple2<>("key", u.getKey()),
|
||||||
|
new Tuple2<>("value", u.getValue())))
|
||||||
|
.map(l -> XmlSerializationUtils.asXmlElement("measure", l)))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
private String getAuthorPidType(final String s) {
|
private String getAuthorPidType(final String s) {
|
||||||
return XmlSerializationUtils
|
return XmlSerializationUtils
|
||||||
.escapeXml(s)
|
.escapeXml(s)
|
||||||
|
|
|
@ -5,7 +5,10 @@ import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.removePrefix;
|
||||||
import static org.apache.commons.lang3.StringUtils.isBlank;
|
import static org.apache.commons.lang3.StringUtils.isBlank;
|
||||||
import static org.apache.commons.lang3.StringUtils.isNotBlank;
|
import static org.apache.commons.lang3.StringUtils.isNotBlank;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
public class XmlSerializationUtils {
|
public class XmlSerializationUtils {
|
||||||
|
|
||||||
|
@ -147,4 +150,15 @@ public class XmlSerializationUtils {
|
||||||
.append(attr("schemename", q.getSchemename()))
|
.append(attr("schemename", q.getSchemename()))
|
||||||
.toString();
|
.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String asXmlElement(String name, List<Tuple2<String, String>> attributes) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append("<");
|
||||||
|
sb.append(name);
|
||||||
|
for (Tuple2<String, String> attr : attributes) {
|
||||||
|
sb.append(" ").append(attr(attr._1(), attr._2()));
|
||||||
|
}
|
||||||
|
sb.append("/>");
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -69,6 +69,13 @@ public class XmlRecordFactoryTest {
|
||||||
|
|
||||||
assertEquals("1721.47", doc.valueOf("//processingchargeamount/text()"));
|
assertEquals("1721.47", doc.valueOf("//processingchargeamount/text()"));
|
||||||
assertEquals("EUR", doc.valueOf("//processingchargecurrency/text()"));
|
assertEquals("EUR", doc.valueOf("//processingchargecurrency/text()"));
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
"1.00889953098e-08", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'influence']/@value"));
|
||||||
|
assertEquals(
|
||||||
|
"30.6576853333", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'popularity_alt']/@value"));
|
||||||
|
assertEquals(
|
||||||
|
"4.62970429725e-08", doc.valueOf("//*[local-name() = 'result']/measure[./@id = 'popularity']/@value"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -1,4 +1,33 @@
|
||||||
{
|
{
|
||||||
|
"measures": [
|
||||||
|
{
|
||||||
|
"id": "influence",
|
||||||
|
"unit": [
|
||||||
|
{
|
||||||
|
"key": "score",
|
||||||
|
"value": "1.00889953098e-08"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "popularity_alt",
|
||||||
|
"unit": [
|
||||||
|
{
|
||||||
|
"key": "score",
|
||||||
|
"value": "30.6576853333"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "popularity",
|
||||||
|
"unit": [
|
||||||
|
{
|
||||||
|
"key": "score",
|
||||||
|
"value": "4.62970429725e-08"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
"author": [
|
"author": [
|
||||||
{
|
{
|
||||||
"affiliation": [],
|
"affiliation": [],
|
||||||
|
@ -284,6 +313,35 @@
|
||||||
"id": "50|CSC_________::0000ec4dd9df012feaafa77e71a0fb4c",
|
"id": "50|CSC_________::0000ec4dd9df012feaafa77e71a0fb4c",
|
||||||
"instance": [
|
"instance": [
|
||||||
{
|
{
|
||||||
|
"measures": [
|
||||||
|
{
|
||||||
|
"id": "influence",
|
||||||
|
"unit": [
|
||||||
|
{
|
||||||
|
"key": "score",
|
||||||
|
"value": "1.00889953098e-08"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "popularity_alt",
|
||||||
|
"unit": [
|
||||||
|
{
|
||||||
|
"key": "score",
|
||||||
|
"value": "30.6576853333"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "popularity",
|
||||||
|
"unit": [
|
||||||
|
{
|
||||||
|
"key": "score",
|
||||||
|
"value": "4.62970429725e-08"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
"pid": [
|
"pid": [
|
||||||
{
|
{
|
||||||
"dataInfo": {
|
"dataInfo": {
|
||||||
|
|
Loading…
Reference in New Issue