[graph provision] align serialisation of the usage count measures to the agrred specifications

This commit is contained in:
Claudio Atzori 2024-06-05 16:34:40 +02:00
commit 91b49366c6
4 changed files with 68 additions and 31 deletions

View File

@ -172,7 +172,7 @@ public class SparkBulkTagJob {
.option("compression", "gzip") .option("compression", "gzip")
.json(outputPath + "project"); .json(outputPath + "project");
readPath(spark, outputPath + "project", Datasource.class) readPath(spark, outputPath + "project", Project.class)
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")

View File

@ -170,18 +170,19 @@ public class XmlSerializationUtils {
return sb.toString(); return sb.toString();
} }
// <measure views="0" datasource="infrastruct_::f66f1bd369679b5b077dcdf006089556||OpenAIRE" /> // <measure id="views" count="0" datasource="infrastruct_::f66f1bd369679b5b077dcdf006089556||OpenAIRE" />
// <measure downloads="0" datasource="infrastruct_::f66f1bd369679b5b077dcdf006089556||OpenAIRE" /> // <measure id="downloads" count="0" datasource="infrastruct_::f66f1bd369679b5b077dcdf006089556||OpenAIRE" />
public static String usageMeasureAsXmlElement(String name, Measure measure) { public static String usageMeasureAsXmlElement(String name, Measure measure) {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
for (KeyValue kv : measure.getUnit()) { for (KeyValue kv : measure.getUnit()) {
sb sb
.append("<") .append("<")
.append(name) .append(name)
.append(" ") .append(" ")
.append(attr(measure.getId(), kv.getValue())) .append(attr("id", measure.getId()))
.append(attr("datasource", kv.getKey())) .append(attr("count", kv.getValue()))
.append(" />"); .append(attr("datasource", kv.getKey()))
.append(" />");
} }
return sb.toString(); return sb.toString();
} }

View File

@ -9,6 +9,7 @@ import java.io.StringReader;
import java.util.List; import java.util.List;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.math3.optim.AbstractOptimizationProblem;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.DocumentException; import org.dom4j.DocumentException;
import org.dom4j.Element; import org.dom4j.Element;
@ -42,6 +43,7 @@ public class XmlRecordFactoryTest {
final Publication p = OBJECT_MAPPER final Publication p = OBJECT_MAPPER
.readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class); .readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class);
final String xml = xmlRecordFactory.build(new JoinedEntity(p)); final String xml = xmlRecordFactory.build(new JoinedEntity(p));
assertNotNull(xml); assertNotNull(xml);
@ -51,7 +53,12 @@ public class XmlRecordFactoryTest {
assertNotNull(doc); assertNotNull(doc);
// System.out.println(doc.asXML()); System.out.println(doc.asXML());
assertEquals("10", doc.valueOf("//measure[@id = 'downloads']/@count"));
assertEquals("fakeds", doc.valueOf("//measure[@id = 'downloads']/@datasource"));
assertEquals(0, doc.selectNodes("//measure[@id = 'views']").size());
assertEquals("0000-0001-9613-6638", doc.valueOf("//creator[@rank = '1']/@orcid")); assertEquals("0000-0001-9613-6638", doc.valueOf("//creator[@rank = '1']/@orcid"));
assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid_pending")); assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid_pending"));

View File

@ -8,6 +8,26 @@
} }
], ],
"measures": [ "measures": [
{
"id": "downloads",
"unit": [
{
"key": "fakeds",
"value": "10",
"dataInfo": null
}
]
},
{
"id": "views",
"unit": [
{
"key": "fakedss",
"value": "0",
"dataInfo": null
}
]
},
{ {
"id": "influence", "id": "influence",
"unit": [ "unit": [
@ -505,13 +525,13 @@
"extraInfo": [], "extraInfo": [],
"format": [], "format": [],
"fulltext": [ "fulltext": [
{ "value" : "https://osf.io/preprints/socarxiv/7vgtu/download" }, {"value": "https://osf.io/preprints/socarxiv/7vgtu/download"},
{ "value" : "https://osf.io/preprints/socarxiv/7vgtu/download2" } {"value": "https://osf.io/preprints/socarxiv/7vgtu/download2"}
], ],
"id": "50|CSC_________::0000ec4dd9df012feaafa77e71a0fb4c", "id": "50|CSC_________::0000ec4dd9df012feaafa77e71a0fb4c",
"instance": [ "instance": [
{ {
"fulltext" : "https://osf.io/preprints/socarxiv/7vgtu/download", "fulltext": "https://osf.io/preprints/socarxiv/7vgtu/download",
"measures": [ "measures": [
{ {
"id": "influence", "id": "influence",
@ -539,6 +559,24 @@
"value": "4.62970429725e-08" "value": "4.62970429725e-08"
} }
] ]
},
{
"id": "downloads",
"unit": [
{
"key": "fakeds",
"value": "10"
}
]
},
{
"id": "views",
"unit": [
{
"key": "fakeds",
"value": "10"
}
]
} }
], ],
"pid": [ "pid": [
@ -674,9 +712,7 @@
}, },
"value": "" "value": ""
}, },
"url": [ "url": ["http://dx.doi.org/10.1109/TED.2018.2853550"]
"http://dx.doi.org/10.1109/TED.2018.2853550"
]
}, },
{ {
"pid": [ "pid": [
@ -812,9 +848,7 @@
}, },
"value": "CC-BY" "value": "CC-BY"
}, },
"url": [ "url": ["http://dx.doi.org/10.1109/TED.2018.2853550"]
"http://dx.doi.org/10.1109/TED.2018.2853550"
]
}, },
{ {
"pid": [ "pid": [
@ -950,9 +984,7 @@
}, },
"value": "" "value": ""
}, },
"url": [ "url": ["http://dx.doi.org/10.1109/TED.2018.2853551"]
"http://dx.doi.org/10.1109/TED.2018.2853551"
]
}, },
{ {
"pid": [ "pid": [
@ -1221,8 +1253,7 @@
}, },
"value": "" "value": ""
}, },
"url": [ "url": []
]
}, },
{ {
"pid": [ "pid": [
@ -1352,9 +1383,7 @@
}, },
"value": "" "value": ""
}, },
"url": [ "url": [""]
""
]
}, },
{ {
"pid": [ "pid": [
@ -1484,9 +1513,7 @@
}, },
"value": "" "value": ""
}, },
"url": [ "url": ["asdasd://not a URL"]
"asdasd://not a URL"
]
} }
], ],
"journal": { "journal": {
@ -1580,7 +1607,9 @@
"schemeid": "dnet:result_typologies", "schemeid": "dnet:result_typologies",
"schemename": "dnet:result_typologies" "schemename": "dnet:result_typologies"
}, },
"source": [ { "value" : "Bulletin of the National Technical University \"KhPI\" A series of \"Information and Modeling\"; № 1 (3) (2020):" } ], "source": [
{"value": "Bulletin of the National Technical University \"KhPI\" A series of \"Information and Modeling\"; № 1 (3) (2020):"}
],
"subject": [ "subject": [
{ {
"dataInfo": { "dataInfo": {