forked from D-Net/dnet-hadoop
mergin with branch beta
This commit is contained in:
commit
bd1108f98b
|
@ -3,6 +3,8 @@ package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
|
|
||||||
|
import java.sql.Array;
|
||||||
|
import java.sql.SQLException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
|
@ -118,6 +120,17 @@ public class OafMapperUtils {
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static <T> List<T> listValues(Array values) throws SQLException {
|
||||||
|
if (Objects.isNull(values)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return Arrays
|
||||||
|
.stream((T[]) values.getArray())
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.distinct()
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
public static List<Field<String>> listFields(final DataInfo info, final List<String> values) {
|
public static List<Field<String>> listFields(final DataInfo info, final List<String> values) {
|
||||||
return values
|
return values
|
||||||
.stream()
|
.stream()
|
||||||
|
|
|
@ -44,105 +44,104 @@ class OafMapperUtilsTest {
|
||||||
@Test
|
@Test
|
||||||
void testDateValidation() {
|
void testDateValidation() {
|
||||||
|
|
||||||
assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z ").isPresent());
|
assertNotNull(GraphCleaningFunctions.cleanDate("2016-05-07T12:41:19.202Z "));
|
||||||
assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent());
|
assertNotNull(GraphCleaningFunctions.cleanDate("2020-09-10 11:08:52 "));
|
||||||
assertTrue(GraphCleaningFunctions.doCleanDate(" 2016-04-05").isPresent());
|
assertNotNull(GraphCleaningFunctions.cleanDate(" 2016-04-05"));
|
||||||
|
|
||||||
assertEquals("2016-04-05", GraphCleaningFunctions.doCleanDate("2016 Apr 05").get());
|
assertEquals("2016-04-05", GraphCleaningFunctions.cleanDate("2016 Apr 05"));
|
||||||
|
|
||||||
assertEquals("2009-05-08", GraphCleaningFunctions.doCleanDate("May 8, 2009 5:57:51 PM").get());
|
assertEquals("2009-05-08", GraphCleaningFunctions.cleanDate("May 8, 2009 5:57:51 PM"));
|
||||||
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct 7, 1970").get());
|
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct 7, 1970"));
|
||||||
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct 7, '70").get());
|
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct 7, '70"));
|
||||||
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct. 7, 1970").get());
|
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct. 7, 1970"));
|
||||||
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct. 7, 70").get());
|
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct. 7, 70"));
|
||||||
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan 2 15:04:05 2006").get());
|
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon Jan 2 15:04:05 2006"));
|
||||||
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan 2 15:04:05 MST 2006").get());
|
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon Jan 2 15:04:05 MST 2006"));
|
||||||
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan 02 15:04:05 -0700 2006").get());
|
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon Jan 02 15:04:05 -0700 2006"));
|
||||||
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Monday, 02-Jan-06 15:04:05 MST").get());
|
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Monday, 02-Jan-06 15:04:05 MST"));
|
||||||
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon, 02 Jan 2006 15:04:05 MST").get());
|
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon, 02 Jan 2006 15:04:05 MST"));
|
||||||
assertEquals("2017-07-11", GraphCleaningFunctions.doCleanDate("Tue, 11 Jul 2017 16:28:13 +0200 (CEST)").get());
|
assertEquals("2017-07-11", GraphCleaningFunctions.cleanDate("Tue, 11 Jul 2017 16:28:13 +0200 (CEST)"));
|
||||||
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon, 02 Jan 2006 15:04:05 -0700").get());
|
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon, 02 Jan 2006 15:04:05 -0700"));
|
||||||
assertEquals("2018-01-04", GraphCleaningFunctions.doCleanDate("Thu, 4 Jan 2018 17:53:36 +0000").get());
|
assertEquals("2018-01-04", GraphCleaningFunctions.cleanDate("Thu, 4 Jan 2018 17:53:36 +0000"));
|
||||||
assertEquals("2015-08-10", GraphCleaningFunctions.doCleanDate("Mon Aug 10 15:44:11 UTC+0100 2015").get());
|
assertEquals("2015-08-10", GraphCleaningFunctions.cleanDate("Mon Aug 10 15:44:11 UTC+0100 2015"));
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"2015-07-03",
|
"2015-07-03",
|
||||||
GraphCleaningFunctions.doCleanDate("Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)").get());
|
GraphCleaningFunctions.cleanDate("Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)"));
|
||||||
assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012 10:09am").get());
|
assertEquals("2012-09-17", GraphCleaningFunctions.cleanDate("September 17, 2012 10:09am"));
|
||||||
assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012 at 10:09am PST-08").get());
|
assertEquals("2012-09-17", GraphCleaningFunctions.cleanDate("September 17, 2012 at 10:09am PST-08"));
|
||||||
assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012, 10:10:09").get());
|
assertEquals("2012-09-17", GraphCleaningFunctions.cleanDate("September 17, 2012, 10:10:09"));
|
||||||
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("October 7, 1970").get());
|
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("October 7, 1970"));
|
||||||
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("October 7th, 1970").get());
|
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("October 7th, 1970"));
|
||||||
assertEquals("2006-02-12", GraphCleaningFunctions.doCleanDate("12 Feb 2006, 19:17").get());
|
assertEquals("2006-02-12", GraphCleaningFunctions.cleanDate("12 Feb 2006, 19:17"));
|
||||||
assertEquals("2006-02-12", GraphCleaningFunctions.doCleanDate("12 Feb 2006 19:17").get());
|
assertEquals("2006-02-12", GraphCleaningFunctions.cleanDate("12 Feb 2006 19:17"));
|
||||||
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("7 oct 70").get());
|
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("7 oct 70"));
|
||||||
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("7 oct 1970").get());
|
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("7 oct 1970"));
|
||||||
assertEquals("2013-02-03", GraphCleaningFunctions.doCleanDate("03 February 2013").get());
|
assertEquals("2013-02-03", GraphCleaningFunctions.cleanDate("03 February 2013"));
|
||||||
assertEquals("2013-07-01", GraphCleaningFunctions.doCleanDate("1 July 2013").get());
|
assertEquals("2013-07-01", GraphCleaningFunctions.cleanDate("1 July 2013"));
|
||||||
assertEquals("2013-02-03", GraphCleaningFunctions.doCleanDate("2013-Feb-03").get());
|
assertEquals("2013-02-03", GraphCleaningFunctions.cleanDate("2013-Feb-03"));
|
||||||
assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("3/31/2014").get());
|
assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("3/31/2014"));
|
||||||
assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("03/31/2014").get());
|
assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("03/31/2014"));
|
||||||
assertEquals("1971-08-21", GraphCleaningFunctions.doCleanDate("08/21/71").get());
|
assertEquals("1971-08-21", GraphCleaningFunctions.cleanDate("08/21/71"));
|
||||||
assertEquals("1971-01-08", GraphCleaningFunctions.doCleanDate("8/1/71").get());
|
assertEquals("1971-01-08", GraphCleaningFunctions.cleanDate("8/1/71"));
|
||||||
assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("4/8/2014 22:05").get());
|
assertEquals("2014-08-04", GraphCleaningFunctions.cleanDate("4/8/2014 22:05"));
|
||||||
assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("04/08/2014 22:05").get());
|
assertEquals("2014-08-04", GraphCleaningFunctions.cleanDate("04/08/2014 22:05"));
|
||||||
assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("4/8/14 22:05").get());
|
assertEquals("2014-08-04", GraphCleaningFunctions.cleanDate("4/8/14 22:05"));
|
||||||
assertEquals("2014-02-04", GraphCleaningFunctions.doCleanDate("04/2/2014 03:00:51").get());
|
assertEquals("2014-02-04", GraphCleaningFunctions.cleanDate("04/2/2014 03:00:51"));
|
||||||
assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 12:00:00 AM").get());
|
assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 12:00:00 AM"));
|
||||||
assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 01:00:01 PM").get());
|
assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 01:00:01 PM"));
|
||||||
assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 01:00 PM").get());
|
assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 01:00 PM"));
|
||||||
assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 1:00 PM").get());
|
assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 1:00 PM"));
|
||||||
assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 12:00 AM").get());
|
assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 12:00 AM"));
|
||||||
assertEquals("2014-02-04", GraphCleaningFunctions.doCleanDate("4/02/2014 03:00:51").get());
|
assertEquals("2014-02-04", GraphCleaningFunctions.cleanDate("4/02/2014 03:00:51"));
|
||||||
assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("03/19/2012 10:11:59").get());
|
assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("03/19/2012 10:11:59"));
|
||||||
assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("03/19/2012 10:11:59.3186369").get());
|
assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("03/19/2012 10:11:59.3186369"));
|
||||||
assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("2014/3/31").get());
|
assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("2014/3/31"));
|
||||||
assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("2014/03/31").get());
|
assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("2014/03/31"));
|
||||||
assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014/4/8 22:05").get());
|
assertEquals("2014-04-08", GraphCleaningFunctions.cleanDate("2014/4/8 22:05"));
|
||||||
assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014/04/08 22:05").get());
|
assertEquals("2014-04-08", GraphCleaningFunctions.cleanDate("2014/04/08 22:05"));
|
||||||
assertEquals("2014-04-02", GraphCleaningFunctions.doCleanDate("2014/04/2 03:00:51").get());
|
assertEquals("2014-04-02", GraphCleaningFunctions.cleanDate("2014/04/2 03:00:51"));
|
||||||
assertEquals("2014-04-02", GraphCleaningFunctions.doCleanDate("2014/4/02 03:00:51").get());
|
assertEquals("2014-04-02", GraphCleaningFunctions.cleanDate("2014/4/02 03:00:51"));
|
||||||
assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("2012/03/19 10:11:59").get());
|
assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("2012/03/19 10:11:59"));
|
||||||
assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("2012/03/19 10:11:59.3186369").get());
|
assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("2012/03/19 10:11:59.3186369"));
|
||||||
assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014年04月08日").get());
|
assertEquals("2014-04-08", GraphCleaningFunctions.cleanDate("2014年04月08日"));
|
||||||
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("2006-01-02T15:04:05+0000").get());
|
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("2006-01-02T15:04:05+0000"));
|
||||||
assertEquals("2009-08-13", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09-07:00").get());
|
assertEquals("2009-08-13", GraphCleaningFunctions.cleanDate("2009-08-12T22:15:09-07:00"));
|
||||||
assertEquals("2009-08-12", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09").get());
|
assertEquals("2009-08-12", GraphCleaningFunctions.cleanDate("2009-08-12T22:15:09"));
|
||||||
assertEquals("2009-08-13", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09Z").get());
|
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 17:24:37.3186369"));
|
||||||
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.3186369").get());
|
assertEquals("2012-08-03", GraphCleaningFunctions.cleanDate("2012-08-03 18:31:59.257000000"));
|
||||||
assertEquals("2012-08-03", GraphCleaningFunctions.doCleanDate("2012-08-03 18:31:59.257000000").get());
|
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 17:24:37.123"));
|
||||||
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.123").get());
|
assertEquals("2013-04-01", GraphCleaningFunctions.cleanDate("2013-04-01 22:43"));
|
||||||
assertEquals("2013-04-01", GraphCleaningFunctions.doCleanDate("2013-04-01 22:43").get());
|
assertEquals("2013-04-01", GraphCleaningFunctions.cleanDate("2013-04-01 22:43:22"));
|
||||||
assertEquals("2013-04-01", GraphCleaningFunctions.doCleanDate("2013-04-01 22:43:22").get());
|
assertEquals("2014-12-16", GraphCleaningFunctions.cleanDate("2014-12-16 06:20:00 UTC"));
|
||||||
assertEquals("2014-12-16", GraphCleaningFunctions.doCleanDate("2014-12-16 06:20:00 UTC").get());
|
assertEquals("2014-12-16", GraphCleaningFunctions.cleanDate("2014-12-16 06:20:00 GMT"));
|
||||||
assertEquals("2014-12-16", GraphCleaningFunctions.doCleanDate("2014-12-16 06:20:00 GMT").get());
|
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 05:24:37 PM"));
|
||||||
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 05:24:37 PM").get());
|
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 13:13:43 +0800"));
|
||||||
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:43 +0800").get());
|
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 13:13:43 +0800 +08"));
|
||||||
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:43 +0800 +08").get());
|
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 13:13:44 +09:00"));
|
||||||
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:44 +09:00").get());
|
assertEquals("2012-08-03", GraphCleaningFunctions.cleanDate("2012-08-03 18:31:59.257000000 +0000 UTC"));
|
||||||
assertEquals("2012-08-03", GraphCleaningFunctions.doCleanDate("2012-08-03 18:31:59.257000000 +0000 UTC").get());
|
assertEquals("2015-09-30", GraphCleaningFunctions.cleanDate("2015-09-30 18:48:56.35272715 +0000 UTC"));
|
||||||
assertEquals("2015-09-30", GraphCleaningFunctions.doCleanDate("2015-09-30 18:48:56.35272715 +0000 UTC").get());
|
assertEquals("2015-02-18", GraphCleaningFunctions.cleanDate("2015-02-18 00:12:00 +0000 GMT"));
|
||||||
assertEquals("2015-02-18", GraphCleaningFunctions.doCleanDate("2015-02-18 00:12:00 +0000 GMT").get());
|
assertEquals("2015-02-18", GraphCleaningFunctions.cleanDate("2015-02-18 00:12:00 +0000 UTC"));
|
||||||
assertEquals("2015-02-18", GraphCleaningFunctions.doCleanDate("2015-02-18 00:12:00 +0000 UTC").get());
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"2015-02-08", GraphCleaningFunctions.doCleanDate("2015-02-08 03:02:00 +0300 MSK m=+0.000000001").get());
|
"2015-02-08", GraphCleaningFunctions.cleanDate("2015-02-08 03:02:00 +0300 MSK m=+0.000000001"));
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"2015-02-08", GraphCleaningFunctions.doCleanDate("2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001").get());
|
"2015-02-08", GraphCleaningFunctions.cleanDate("2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001"));
|
||||||
assertEquals("2017-07-19", GraphCleaningFunctions.doCleanDate("2017-07-19 03:21:51+00:00").get());
|
assertEquals("2017-07-19", GraphCleaningFunctions.cleanDate("2017-07-19 03:21:51+00:00"));
|
||||||
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26").get());
|
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26"));
|
||||||
assertEquals("2014-04-01", GraphCleaningFunctions.doCleanDate("2014-04").get());
|
assertEquals("2014-04-01", GraphCleaningFunctions.cleanDate("2014-04"));
|
||||||
assertEquals("2014-01-01", GraphCleaningFunctions.doCleanDate("2014").get());
|
assertEquals("2014-01-01", GraphCleaningFunctions.cleanDate("2014"));
|
||||||
assertEquals("2014-05-11", GraphCleaningFunctions.doCleanDate("2014-05-11 08:20:13,787").get());
|
assertEquals("2014-05-11", GraphCleaningFunctions.cleanDate("2014-05-11 08:20:13,787"));
|
||||||
assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("3.31.2014").get());
|
assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("3.31.2014"));
|
||||||
assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("03.31.2014").get());
|
assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("03.31.2014"));
|
||||||
assertEquals("1971-08-21", GraphCleaningFunctions.doCleanDate("08.21.71").get());
|
assertEquals("1971-08-21", GraphCleaningFunctions.cleanDate("08.21.71"));
|
||||||
assertEquals("2014-03-01", GraphCleaningFunctions.doCleanDate("2014.03").get());
|
assertEquals("2014-03-01", GraphCleaningFunctions.cleanDate("2014.03"));
|
||||||
assertEquals("2014-03-30", GraphCleaningFunctions.doCleanDate("2014.03.30").get());
|
assertEquals("2014-03-30", GraphCleaningFunctions.cleanDate("2014.03.30"));
|
||||||
assertEquals("2014-06-01", GraphCleaningFunctions.doCleanDate("20140601").get());
|
assertEquals("2014-06-01", GraphCleaningFunctions.cleanDate("20140601"));
|
||||||
assertEquals("2014-07-22", GraphCleaningFunctions.doCleanDate("20140722105203").get());
|
assertEquals("2014-07-22", GraphCleaningFunctions.cleanDate("20140722105203"));
|
||||||
assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("1332151919").get());
|
assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("1332151919"));
|
||||||
assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367189").get());
|
assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367189"));
|
||||||
assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367111222").get());
|
assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367111222"));
|
||||||
assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367111222333").get());
|
assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367111222333"));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -284,4 +284,4 @@ object SparkGenerateDoiBoost {
|
||||||
.save(s"$workingDirPath/doiBoostOrganization")
|
.save(s"$workingDirPath/doiBoostOrganization")
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -280,10 +280,10 @@ case object Crossref2Oaf {
|
||||||
instance.setDateofacceptance(asField(createdDate.getValue))
|
instance.setDateofacceptance(asField(createdDate.getValue))
|
||||||
}
|
}
|
||||||
val s: List[String] = List("https://doi.org/" + doi)
|
val s: List[String] = List("https://doi.org/" + doi)
|
||||||
// val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct
|
// val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct
|
||||||
// if (links.nonEmpty) {
|
// if (links.nonEmpty) {
|
||||||
// instance.setUrl(links.asJava)
|
// instance.setUrl(links.asJava)
|
||||||
// }
|
// }
|
||||||
if (s.nonEmpty) {
|
if (s.nonEmpty) {
|
||||||
instance.setUrl(s.asJava)
|
instance.setUrl(s.asJava)
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,15 +27,7 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM;
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.asString;
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.createOpenaireId;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.journal;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listFields;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
|
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -143,8 +135,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
smdbe.execute("queryClaims.sql", smdbe::processClaims);
|
smdbe.execute("queryClaims.sql", smdbe::processClaims);
|
||||||
break;
|
break;
|
||||||
case openaire:
|
case openaire:
|
||||||
log.info("Processing datasources...");
|
log.info("Processing services...");
|
||||||
smdbe.execute("queryDatasources.sql", smdbe::processDatasource, verifyNamespacePrefix);
|
smdbe.execute("queryServices.sql", smdbe::processService, verifyNamespacePrefix);
|
||||||
|
|
||||||
log.info("Processing projects...");
|
log.info("Processing projects...");
|
||||||
if (dbSchema.equalsIgnoreCase("beta")) {
|
if (dbSchema.equalsIgnoreCase("beta")) {
|
||||||
|
@ -156,10 +148,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
log.info("Processing Organizations...");
|
log.info("Processing Organizations...");
|
||||||
smdbe.execute("queryOrganizations.sql", smdbe::processOrganization, verifyNamespacePrefix);
|
smdbe.execute("queryOrganizations.sql", smdbe::processOrganization, verifyNamespacePrefix);
|
||||||
|
|
||||||
log.info("Processing relationsNoRemoval ds <-> orgs ...");
|
log.info("Processing relations services <-> orgs ...");
|
||||||
smdbe
|
smdbe
|
||||||
.execute(
|
.execute(
|
||||||
"queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization,
|
"queryServiceOrganization.sql", smdbe::processServiceOrganization,
|
||||||
verifyNamespacePrefix);
|
verifyNamespacePrefix);
|
||||||
|
|
||||||
log.info("Processing projects <-> orgs ...");
|
log.info("Processing projects <-> orgs ...");
|
||||||
|
@ -235,32 +227,30 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
dbClient.processResults(sql, consumer);
|
dbClient.processResults(sql, consumer);
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Oaf> processDatasource(final ResultSet rs) {
|
public List<Oaf> processService(final ResultSet rs) {
|
||||||
try {
|
try {
|
||||||
final DataInfo info = prepareDataInfo(rs);
|
final DataInfo info = prepareDataInfo(rs);
|
||||||
|
|
||||||
final Datasource ds = new Datasource();
|
final Datasource ds = new Datasource();
|
||||||
|
|
||||||
ds.setId(createOpenaireId(10, rs.getString("datasourceid"), true));
|
ds.setId(createOpenaireId(10, rs.getString("id"), true));
|
||||||
ds
|
ds
|
||||||
.setOriginalId(
|
.setOriginalId(
|
||||||
Arrays
|
Arrays
|
||||||
.asList((String[]) rs.getArray("identities").getArray())
|
.asList((String[]) rs.getArray("originalid").getArray())
|
||||||
.stream()
|
.stream()
|
||||||
.filter(StringUtils::isNotBlank)
|
.filter(StringUtils::isNotBlank)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
ds
|
ds.setCollectedfrom(prepareCollectedfrom(rs.getArray("collectedfrom")));
|
||||||
.setCollectedfrom(
|
ds.setPid(prepareListOfStructProps(rs.getArray("pid"), info));
|
||||||
listKeyValues(
|
|
||||||
createOpenaireId(10, rs.getString("collectedfromid"), true),
|
|
||||||
rs.getString("collectedfromname")));
|
|
||||||
ds.setPid(new ArrayList<>());
|
|
||||||
ds.setDateofcollection(asString(rs.getDate("dateofcollection")));
|
ds.setDateofcollection(asString(rs.getDate("dateofcollection")));
|
||||||
ds.setDateoftransformation(null); // Value not returned by the SQL query
|
ds.setDateoftransformation(null); // Value not returned by the SQL query
|
||||||
ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB
|
ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB
|
||||||
ds.setOaiprovenance(null); // Values not present in the DB
|
ds.setOaiprovenance(null); // Values not present in the DB
|
||||||
ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype")));
|
ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype")));
|
||||||
ds.setDatasourcetypeui(prepareQualifierSplitting(rs.getString("datasourcetypeui")));
|
ds.setDatasourcetypeui(prepareQualifierSplitting(rs.getString("datasourcetypeui")));
|
||||||
|
ds.setEosctype(prepareQualifierSplitting(rs.getString("eosctype")));
|
||||||
|
ds.setEoscdatasourcetype(prepareQualifierSplitting(rs.getString("eoscdatasourcetype")));
|
||||||
ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility")));
|
ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility")));
|
||||||
ds.setOfficialname(field(rs.getString("officialname"), info));
|
ds.setOfficialname(field(rs.getString("officialname"), info));
|
||||||
ds.setEnglishname(field(rs.getString("englishname"), info));
|
ds.setEnglishname(field(rs.getString("englishname"), info));
|
||||||
|
@ -277,20 +267,19 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
ds.setOdnumberofitemsdate(field(asString(rs.getDate("odnumberofitemsdate")), info));
|
ds.setOdnumberofitemsdate(field(asString(rs.getDate("odnumberofitemsdate")), info));
|
||||||
ds.setOdpolicies(field(rs.getString("odpolicies"), info));
|
ds.setOdpolicies(field(rs.getString("odpolicies"), info));
|
||||||
ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info));
|
ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info));
|
||||||
ds.setOdcontenttypes(prepareListFields(rs.getArray("odcontenttypes"), info));
|
ds.setLanguages(listValues(rs.getArray("languages")));
|
||||||
ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info));
|
ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info));
|
||||||
ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info));
|
ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info));
|
||||||
ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info));
|
ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info));
|
||||||
ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info));
|
ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info));
|
||||||
ds.setDataprovider(field(rs.getBoolean("dataprovider"), info));
|
|
||||||
ds.setServiceprovider(field(rs.getBoolean("serviceprovider"), info));
|
|
||||||
ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info));
|
ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info));
|
||||||
ds.setDatauploadtype(field(rs.getString("datauploadtype"), info));
|
ds.setDatauploadtype(field(rs.getString("datauploadtype"), info));
|
||||||
ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info));
|
ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info));
|
||||||
ds.setDatauploadrestriction(field(rs.getString("datauploadrestriction"), info));
|
ds.setDatauploadrestriction(field(rs.getString("datauploadrestriction"), info));
|
||||||
ds.setVersioning(field(rs.getBoolean("versioning"), info));
|
ds.setVersioning(field(rs.getBoolean("versioning"), info));
|
||||||
|
ds.setVersioncontrol(rs.getBoolean("versioncontrol"));
|
||||||
ds.setCitationguidelineurl(field(rs.getString("citationguidelineurl"), info));
|
ds.setCitationguidelineurl(field(rs.getString("citationguidelineurl"), info));
|
||||||
ds.setQualitymanagementkind(field(rs.getString("qualitymanagementkind"), info));
|
|
||||||
ds.setPidsystems(field(rs.getString("pidsystems"), info));
|
ds.setPidsystems(field(rs.getString("pidsystems"), info));
|
||||||
ds.setCertificates(field(rs.getString("certificates"), info));
|
ds.setCertificates(field(rs.getString("certificates"), info));
|
||||||
ds.setPolicies(new ArrayList<>()); // The sql query returns an empty array
|
ds.setPolicies(new ArrayList<>()); // The sql query returns an empty array
|
||||||
|
@ -299,13 +288,18 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
journal(
|
journal(
|
||||||
rs.getString("officialname"), rs.getString("issnPrinted"), rs.getString("issnOnline"),
|
rs.getString("officialname"), rs.getString("issnPrinted"), rs.getString("issnOnline"),
|
||||||
rs.getString("issnLinking"), info)); // Journal
|
rs.getString("issnLinking"), info)); // Journal
|
||||||
ds.setDataInfo(info);
|
|
||||||
ds.setLastupdatetimestamp(lastUpdateTimestamp);
|
|
||||||
|
|
||||||
|
ds.setResearchentitytypes(listValues(rs.getArray("researchentitytypes")));
|
||||||
ds.setJurisdiction(prepareQualifierSplitting(rs.getString("jurisdiction")));
|
ds.setJurisdiction(prepareQualifierSplitting(rs.getString("jurisdiction")));
|
||||||
ds.setThematic(rs.getBoolean("thematic"));
|
ds.setThematic(rs.getBoolean("thematic"));
|
||||||
ds.setKnowledgegraph(rs.getBoolean("knowledgegraph"));
|
|
||||||
ds.setContentpolicies(prepareListOfQualifiers(rs.getArray("contentpolicies")));
|
ds.setContentpolicies(prepareListOfQualifiers(rs.getArray("contentpolicies")));
|
||||||
|
ds.setSubmissionpolicyurl(rs.getString("submissionpolicyurl"));
|
||||||
|
ds.setPreservationpolicyurl(rs.getString("preservationpolicyurl"));
|
||||||
|
ds.setResearchproductaccesspolicies(listValues(rs.getArray("researchproductaccesspolicies")));
|
||||||
|
ds
|
||||||
|
.setResearchproductmetadataaccesspolicies(
|
||||||
|
listValues(rs.getArray("researchproductmetadataaccesspolicies")));
|
||||||
|
|
||||||
ds.setConsenttermsofuse(rs.getBoolean("consenttermsofuse"));
|
ds.setConsenttermsofuse(rs.getBoolean("consenttermsofuse"));
|
||||||
ds.setFulltextdownload(rs.getBoolean("fulltextdownload"));
|
ds.setFulltextdownload(rs.getBoolean("fulltextdownload"));
|
||||||
ds
|
ds
|
||||||
|
@ -313,8 +307,18 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(
|
.ofNullable(
|
||||||
rs.getDate("consenttermsofusedate"))
|
rs.getDate("consenttermsofusedate"))
|
||||||
.map(c -> c.toString())
|
.map(java.sql.Date::toString)
|
||||||
.orElse(null));
|
.orElse(null));
|
||||||
|
ds
|
||||||
|
.setLastconsenttermsofusedate(
|
||||||
|
Optional
|
||||||
|
.ofNullable(
|
||||||
|
rs.getDate("lastconsenttermsofusedate"))
|
||||||
|
.map(java.sql.Date::toString)
|
||||||
|
.orElse(null));
|
||||||
|
|
||||||
|
ds.setDataInfo(info);
|
||||||
|
ds.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||||
|
|
||||||
return Arrays.asList(ds);
|
return Arrays.asList(ds);
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
|
@ -425,11 +429,11 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Oaf> processDatasourceOrganization(final ResultSet rs) {
|
public List<Oaf> processServiceOrganization(final ResultSet rs) {
|
||||||
try {
|
try {
|
||||||
final DataInfo info = prepareDataInfo(rs);
|
final DataInfo info = prepareDataInfo(rs);
|
||||||
final String orgId = createOpenaireId(20, rs.getString("organization"), true);
|
final String orgId = createOpenaireId(20, rs.getString("organization"), true);
|
||||||
final String dsId = createOpenaireId(10, rs.getString("datasource"), true);
|
final String dsId = createOpenaireId(10, rs.getString("service"), true);
|
||||||
final List<KeyValue> collectedFrom = listKeyValues(
|
final List<KeyValue> collectedFrom = listKeyValues(
|
||||||
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
|
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
|
||||||
|
|
||||||
|
@ -603,6 +607,32 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
String.format("%.3f", trust));
|
String.format("%.3f", trust));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private List<KeyValue> prepareCollectedfrom(Array values) throws SQLException {
|
||||||
|
if (Objects.isNull(values)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return Arrays
|
||||||
|
.stream((String[]) values.getArray())
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.distinct()
|
||||||
|
.map(s -> keyValueSplitting(s, "@@@"))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static KeyValue keyValueSplitting(final String s, String separator) {
|
||||||
|
if (StringUtils.isBlank(s)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
final String[] arr = s.split(separator);
|
||||||
|
if (arr.length != 2) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
KeyValue kv = new KeyValue();
|
||||||
|
kv.setKey(createOpenaireId(10, arr[0], true));
|
||||||
|
kv.setValue(arr[1]);
|
||||||
|
return kv;
|
||||||
|
}
|
||||||
|
|
||||||
private Qualifier prepareQualifierSplitting(final String s) {
|
private Qualifier prepareQualifierSplitting(final String s) {
|
||||||
if (StringUtils.isBlank(s)) {
|
if (StringUtils.isBlank(s)) {
|
||||||
return null;
|
return null;
|
||||||
|
|
|
@ -30,6 +30,11 @@
|
||||||
<value></value>
|
<value></value>
|
||||||
<description>a blacklist of nsprefixes (comma separeted)</description>
|
<description>a blacklist of nsprefixes (comma separeted)</description>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>reuseContent</name>
|
||||||
|
<value>false</value>
|
||||||
|
<description>reuse content in the aggregator database</description>
|
||||||
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>sparkDriverMemory</name>
|
<name>sparkDriverMemory</name>
|
||||||
<description>memory for driver process</description>
|
<description>memory for driver process</description>
|
||||||
|
@ -85,12 +90,20 @@
|
||||||
</configuration>
|
</configuration>
|
||||||
</global>
|
</global>
|
||||||
|
|
||||||
<start to="ImportDB"/>
|
<start to="reuse_db"/>
|
||||||
|
|
||||||
<kill name="Kill">
|
<kill name="Kill">
|
||||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
</kill>
|
</kill>
|
||||||
|
|
||||||
|
<decision name="reuse_db">
|
||||||
|
<switch>
|
||||||
|
<case to="ImportDB">${wf:conf('reuseContent') eq false}</case>
|
||||||
|
<case to="GenerateEntities">${wf:conf('reuseContent') eq true}</case>
|
||||||
|
<default to="ImportDB"/>
|
||||||
|
</switch>
|
||||||
|
</decision>
|
||||||
|
|
||||||
<action name="ImportDB">
|
<action name="ImportDB">
|
||||||
<java>
|
<java>
|
||||||
<prepare>
|
<prepare>
|
||||||
|
@ -102,6 +115,7 @@
|
||||||
<arg>--postgresUser</arg><arg>${postgresUser}</arg>
|
<arg>--postgresUser</arg><arg>${postgresUser}</arg>
|
||||||
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
|
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
|
<arg>--action</arg><arg>openaire</arg>
|
||||||
<arg>--dbschema</arg><arg>${dbSchema}</arg>
|
<arg>--dbschema</arg><arg>${dbSchema}</arg>
|
||||||
<arg>--nsPrefixBlacklist</arg><arg>${nsPrefixBlacklist}</arg>
|
<arg>--nsPrefixBlacklist</arg><arg>${nsPrefixBlacklist}</arg>
|
||||||
</java>
|
</java>
|
||||||
|
@ -124,6 +138,55 @@
|
||||||
<arg>--action</arg><arg>claims</arg>
|
<arg>--action</arg><arg>claims</arg>
|
||||||
<arg>--nsPrefixBlacklist</arg><arg>${nsPrefixBlacklist}</arg>
|
<arg>--nsPrefixBlacklist</arg><arg>${nsPrefixBlacklist}</arg>
|
||||||
</java>
|
</java>
|
||||||
|
<ok to="GenerateEntities"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="GenerateEntities">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>GenerateEntities</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory ${sparkExecutorMemory}
|
||||||
|
--executor-cores ${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePaths</arg><arg>${contentPath}/db_records,${contentPath}/db_claims</arg>
|
||||||
|
<arg>--targetPath</arg><arg>${workingDir}/entities</arg>
|
||||||
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
|
<arg>--shouldHashId</arg><arg>true</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="GenerateGraph"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="GenerateGraph">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>GenerateGraph</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.raw.DispatchEntitiesApplication</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory ${sparkExecutorMemory}
|
||||||
|
--executor-cores ${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${workingDir}/entities</arg>
|
||||||
|
<arg>--graphRawPath</arg><arg>${workingDir}/graph_aggregator</arg>
|
||||||
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
|
@ -27,7 +27,7 @@ SELECT
|
||||||
'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
|
'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
|
||||||
array_remove(array_agg(DISTINCT i.pid || '###' || i.issuertype || '@@@' || i.issuertype), NULL) AS pid
|
array_remove(array_agg(DISTINCT i.pid || '###' || i.issuertype || '@@@' || i.issuertype), NULL) AS pid
|
||||||
FROM dsm_organizations o
|
FROM dsm_organizations o
|
||||||
LEFT OUTER JOIN dsm_datasources d ON (d.id = o.collectedfrom)
|
LEFT OUTER JOIN dsm_services d ON (d.id = o.collectedfrom)
|
||||||
LEFT OUTER JOIN dsm_organizationpids p ON (p.organization = o.id)
|
LEFT OUTER JOIN dsm_organizationpids p ON (p.organization = o.id)
|
||||||
LEFT OUTER JOIN dsm_identities i ON (i.pid = p.pid)
|
LEFT OUTER JOIN dsm_identities i ON (i.pid = p.pid)
|
||||||
GROUP BY
|
GROUP BY
|
||||||
|
|
|
@ -16,4 +16,4 @@ SELECT
|
||||||
|
|
||||||
FROM project_organization po
|
FROM project_organization po
|
||||||
LEFT OUTER JOIN projects p ON (p.id = po.project)
|
LEFT OUTER JOIN projects p ON (p.id = po.project)
|
||||||
LEFT OUTER JOIN dsm_datasources dc ON (dc.id = p.collectedfrom);
|
LEFT OUTER JOIN dsm_services dc ON (dc.id = p.collectedfrom);
|
|
@ -42,7 +42,7 @@ SELECT
|
||||||
LEFT OUTER JOIN projectpids pp ON (pp.project = p.id)
|
LEFT OUTER JOIN projectpids pp ON (pp.project = p.id)
|
||||||
LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid)
|
LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid)
|
||||||
|
|
||||||
LEFT OUTER JOIN dsm_datasources dc ON (dc.id = p.collectedfrom)
|
LEFT OUTER JOIN dsm_services dc ON (dc.id = p.collectedfrom)
|
||||||
|
|
||||||
LEFT OUTER JOIN project_fundingpath pf ON (pf.project = p.id)
|
LEFT OUTER JOIN project_fundingpath pf ON (pf.project = p.id)
|
||||||
LEFT OUTER JOIN fundingpaths fp ON (fp.id = pf.funding)
|
LEFT OUTER JOIN fundingpaths fp ON (fp.id = pf.funding)
|
||||||
|
|
|
@ -40,7 +40,7 @@ SELECT
|
||||||
LEFT OUTER JOIN projectpids pp ON (pp.project = p.id)
|
LEFT OUTER JOIN projectpids pp ON (pp.project = p.id)
|
||||||
LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid)
|
LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid)
|
||||||
|
|
||||||
LEFT OUTER JOIN dsm_datasources dc ON (dc.id = p.collectedfrom)
|
LEFT OUTER JOIN dsm_services dc ON (dc.id = p.collectedfrom)
|
||||||
|
|
||||||
LEFT OUTER JOIN project_fundingpath pf ON (pf.project = p.id)
|
LEFT OUTER JOIN project_fundingpath pf ON (pf.project = p.id)
|
||||||
LEFT OUTER JOIN fundingpaths fp ON (fp.id = pf.funding)
|
LEFT OUTER JOIN fundingpaths fp ON (fp.id = pf.funding)
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
SELECT
|
SELECT
|
||||||
dor.datasource AS datasource,
|
dor.service AS service,
|
||||||
dor.organization AS organization,
|
dor.organization AS organization,
|
||||||
NULL AS startdate,
|
NULL AS startdate,
|
||||||
NULL AS enddate,
|
NULL AS enddate,
|
||||||
|
@ -11,6 +11,6 @@ SELECT
|
||||||
dc.officialname AS collectedfromname,
|
dc.officialname AS collectedfromname,
|
||||||
'providedBy@@@dnet:datasources_organizations_typologies' AS semantics,
|
'providedBy@@@dnet:datasources_organizations_typologies' AS semantics,
|
||||||
d.provenanceaction || '@@@dnet:provenanceActions' AS provenanceaction
|
d.provenanceaction || '@@@dnet:provenanceActions' AS provenanceaction
|
||||||
FROM dsm_datasource_organization dor
|
FROM dsm_service_organization dor
|
||||||
LEFT OUTER JOIN dsm_datasources d ON (dor.datasource = d.id)
|
LEFT OUTER JOIN dsm_services d ON (dor.service = d.id)
|
||||||
LEFT OUTER JOIN dsm_datasources dc ON (dc.id = d.collectedfrom);
|
LEFT OUTER JOIN dsm_services dc ON (dc.id = d.collectedfrom);
|
|
@ -1,6 +1,7 @@
|
||||||
SELECT
|
SELECT
|
||||||
d.id AS datasourceid,
|
d.id AS id,
|
||||||
d.id || array_agg(distinct di.pid) AS identities,
|
array_remove(d.id || array_agg(distinct CASE WHEN dp.pid like 'piwik%' THEN di.pid ELSE NULL END) || array_agg(distinct dds.duplicate), NULL) AS originalid,
|
||||||
|
array_remove(array_agg(distinct CASE WHEN di.pid NOT LIKE 'piwik%' THEN di.pid||'###'||di.issuertype||'@@@'||'dnet:pid_types' ELSE NULL END), NULL) as pid,
|
||||||
d.officialname AS officialname,
|
d.officialname AS officialname,
|
||||||
d.englishname AS englishname,
|
d.englishname AS englishname,
|
||||||
d.contactemail AS contactemail,
|
d.contactemail AS contactemail,
|
||||||
|
@ -9,8 +10,8 @@ SELECT
|
||||||
THEN
|
THEN
|
||||||
'openaire-cris_1.1@@@dnet:datasourceCompatibilityLevel'
|
'openaire-cris_1.1@@@dnet:datasourceCompatibilityLevel'
|
||||||
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire4.0'])
|
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire4.0'])
|
||||||
THEN
|
THEN
|
||||||
'openaire4.0@@@dnet:datasourceCompatibilityLevel'
|
'openaire4.0@@@dnet:datasourceCompatibilityLevel'
|
||||||
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['driver', 'openaire2.0'])
|
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['driver', 'openaire2.0'])
|
||||||
THEN
|
THEN
|
||||||
'driver-openaire2.0@@@dnet:datasourceCompatibilityLevel'
|
'driver-openaire2.0@@@dnet:datasourceCompatibilityLevel'
|
||||||
|
@ -40,25 +41,29 @@ SELECT
|
||||||
END AS openairecompatibility,
|
END AS openairecompatibility,
|
||||||
d.websiteurl AS websiteurl,
|
d.websiteurl AS websiteurl,
|
||||||
d.logourl AS logourl,
|
d.logourl AS logourl,
|
||||||
array_agg(DISTINCT CASE WHEN a.protocol = 'oai' and last_aggregation_date is not null THEN a.baseurl ELSE NULL END) AS accessinfopackage,
|
array_remove(array_agg(DISTINCT CASE WHEN a.protocol = 'oai' and last_aggregation_date is not null THEN a.baseurl ELSE NULL END), NULL) AS accessinfopackage,
|
||||||
d.latitude AS latitude,
|
d.latitude AS latitude,
|
||||||
d.longitude AS longitude,
|
d.longitude AS longitude,
|
||||||
d.namespaceprefix AS namespaceprefix,
|
d.namespaceprefix AS namespaceprefix,
|
||||||
NULL AS odnumberofitems,
|
NULL AS odnumberofitems,
|
||||||
NULL AS odnumberofitemsdate,
|
NULL AS odnumberofitemsdate,
|
||||||
|
|
||||||
(SELECT array_agg(s|| '###keywords@@@dnet:subject_classification_typologies')
|
(SELECT array_agg(s|| '###keywords@@@dnet:subject_classification_typologies')
|
||||||
FROM UNNEST(
|
FROM UNNEST(
|
||||||
ARRAY(
|
ARRAY(
|
||||||
SELECT trim(s)
|
SELECT trim(s)
|
||||||
FROM unnest(string_to_array(d.subjects, '@@')) AS s)) AS s) AS subjects,
|
FROM unnest(string_to_array(d.subjects, '@@')) AS s)) AS s) AS subjects,
|
||||||
|
|
||||||
d.description AS description,
|
d.description AS description,
|
||||||
NULL AS odpolicies,
|
NULL AS odpolicies,
|
||||||
ARRAY(SELECT trim(s)
|
array_remove(ARRAY(SELECT trim(s)
|
||||||
FROM unnest(string_to_array(d.languages, ',')) AS s) AS odlanguages,
|
FROM unnest(string_to_array(regexp_replace(d.languages, '{|}|"', '', 'g'), ',')) AS s), '{}') AS odlanguages,
|
||||||
ARRAY(SELECT trim(s)
|
array_remove(ARRAY(SELECT trim(s)
|
||||||
FROM unnest(string_to_array(d.od_contenttypes, '-')) AS s) AS odcontenttypes,
|
FROM unnest(string_to_array(regexp_replace(d.languages, '{|}|"', '', 'g'), ',')) AS s), '{}') AS languages,
|
||||||
|
-- Term provided only by OpenDOAR:
|
||||||
|
-- probably updating the TR it could be replaced by research_entity_types[]
|
||||||
|
-- But a study on the vocabulary terms is needed
|
||||||
|
-- REMOVED: ARRAY(SELECT trim(s) FROM unnest(string_to_array(d.od_contenttypes, '-')) AS s) AS odcontenttypes,
|
||||||
|
|
||||||
false AS inferred,
|
false AS inferred,
|
||||||
false AS deletedbyinference,
|
false AS deletedbyinference,
|
||||||
0.9 AS trust,
|
0.9 AS trust,
|
||||||
|
@ -69,39 +74,59 @@ SELECT
|
||||||
d.releasestartdate AS releasestartdate,
|
d.releasestartdate AS releasestartdate,
|
||||||
d.releaseenddate AS releaseenddate,
|
d.releaseenddate AS releaseenddate,
|
||||||
d.missionstatementurl AS missionstatementurl,
|
d.missionstatementurl AS missionstatementurl,
|
||||||
d.dataprovider AS dataprovider,
|
-- the following 2 fields (provided by re3data) have been replaced by research_entity_types[]
|
||||||
d.serviceprovider AS serviceprovider,
|
-- VALUE 'Research Data' : d.dataprovider AS dataprovider,
|
||||||
|
-- VALUE 'Services' : d.serviceprovider AS serviceprovider,
|
||||||
d.databaseaccesstype AS databaseaccesstype,
|
d.databaseaccesstype AS databaseaccesstype,
|
||||||
d.datauploadtype AS datauploadtype,
|
d.datauploadtype AS datauploadtype,
|
||||||
d.databaseaccessrestriction AS databaseaccessrestriction,
|
d.databaseaccessrestriction AS databaseaccessrestriction,
|
||||||
d.datauploadrestriction AS datauploadrestriction,
|
d.datauploadrestriction AS datauploadrestriction,
|
||||||
d.versioning AS versioning,
|
-- REPLACED BY version_control : d.versioning AS versioning,
|
||||||
|
d.version_control AS versioning,
|
||||||
|
d.version_control AS versioncontrol,
|
||||||
d.citationguidelineurl AS citationguidelineurl,
|
d.citationguidelineurl AS citationguidelineurl,
|
||||||
d.qualitymanagementkind AS qualitymanagementkind,
|
array_to_string(array_agg(distinct dps.scheme), ' ') AS pidsystems,
|
||||||
d.pidsystems AS pidsystems,
|
|
||||||
d.certificates AS certificates,
|
d.certificates AS certificates,
|
||||||
ARRAY[]::text[] AS policies,
|
ARRAY[]::text[] AS policies,
|
||||||
dc.id AS collectedfromid,
|
array_remove(
|
||||||
dc.officialname AS collectedfromname,
|
array(
|
||||||
d.typology||'@@@dnet:datasource_typologies' AS datasourcetype,
|
select distinct cf
|
||||||
d.typology||'@@@dnet:datasource_typologies_ui' AS datasourcetypeui,
|
from unnest(
|
||||||
'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
|
dc.id||'@@@'||dc.officialname || array_agg(distinct dds_cf.id||'@@@'||dds_cf.officialname)
|
||||||
|
) as cf),
|
||||||
|
NULL) AS collectedfrom,
|
||||||
|
d._typology_to_remove_||'@@@dnet:datasource_typologies' AS datasourcetype,
|
||||||
|
d._typology_to_remove_||'@@@dnet:datasource_typologies_ui' AS datasourcetypeui,
|
||||||
|
d.eosc_type||'@@@dnet:eosc_types' AS eosctype,
|
||||||
|
d.eosc_datasource_type||'@@@dnet:eosc_datasource_types' AS eoscdatasourcetype,
|
||||||
d.issn AS issnPrinted,
|
d.issn AS issnPrinted,
|
||||||
d.eissn AS issnOnline,
|
d.eissn AS issnOnline,
|
||||||
d.lissn AS issnLinking,
|
d.lissn AS issnLinking,
|
||||||
|
d.research_entity_types AS researchentitytypes,
|
||||||
d.consenttermsofuse AS consenttermsofuse,
|
d.consenttermsofuse AS consenttermsofuse,
|
||||||
d.fulltextdownload AS fulltextdownload,
|
d.fulltextdownload AS fulltextdownload,
|
||||||
d.consenttermsofusedate AS consenttermsofusedate,
|
d.consenttermsofusedate AS consenttermsofusedate,
|
||||||
de.jurisdiction||'@@@eosc:jurisdictions' AS jurisdiction,
|
d.lastconsenttermsofusedate AS lastconsenttermsofusedate,
|
||||||
de.thematic AS thematic,
|
d.jurisdiction||'@@@eosc:jurisdictions' AS jurisdiction,
|
||||||
de.knowledge_graph AS knowledgegraph,
|
d.thematic AS thematic,
|
||||||
array(select unnest(de.content_policies)||'@@@eosc:contentpolicies') AS contentpolicies
|
array(select unnest(d.content_policies)||'@@@eosc:contentpolicies') AS contentpolicies,
|
||||||
|
nullif(trim(d.submission_policy_url), '') AS submissionpolicyurl,
|
||||||
|
nullif(trim(d.preservation_policy_url), '') AS preservationpolicyurl,
|
||||||
|
array_remove(d.research_product_access_policies, '') AS researchproductaccesspolicies,
|
||||||
|
array_remove(d.research_product_metadata_access_policies, '') AS researchproductmetadataaccesspolicies
|
||||||
|
|
||||||
FROM dsm_datasources d
|
FROM dsm_services d
|
||||||
LEFT OUTER JOIN dsm_datasources_eosc de on (d.id = de.id)
|
LEFT OUTER JOIN dsm_services dc on (d.collectedfrom = dc.id)
|
||||||
LEFT OUTER JOIN dsm_datasources dc on (d.collectedfrom = dc.id)
|
LEFT OUTER JOIN dsm_api a ON (d.id = a.service)
|
||||||
LEFT OUTER JOIN dsm_api a ON (d.id = a.datasource)
|
LEFT OUTER JOIN dsm_servicepids dp ON (d.id = dp.service)
|
||||||
LEFT OUTER JOIN dsm_datasourcepids di ON (d.id = di.datasource)
|
LEFT OUTER JOIN dsm_identities di ON (dp.pid = di.pid)
|
||||||
|
LEFT OUTER JOIN dsm_dedup_services dds ON (d.id = dds.id)
|
||||||
|
LEFT OUTER JOIN dsm_services dds_dup ON (dds.duplicate = dds_dup.id)
|
||||||
|
LEFT OUTER JOIN dsm_services dds_cf ON (dds_dup.collectedfrom = dds_cf.id)
|
||||||
|
LEFT OUTER JOIN dsm_pid_systems dps ON (d.id = dps.service)
|
||||||
|
|
||||||
|
WHERE
|
||||||
|
d.dedup_main_service = true
|
||||||
|
|
||||||
GROUP BY
|
GROUP BY
|
||||||
d.id,
|
d.id,
|
||||||
|
@ -119,23 +144,27 @@ GROUP BY
|
||||||
d.releasestartdate,
|
d.releasestartdate,
|
||||||
d.releaseenddate,
|
d.releaseenddate,
|
||||||
d.missionstatementurl,
|
d.missionstatementurl,
|
||||||
d.dataprovider,
|
-- TODO REMOVED ???: d.dataprovider,
|
||||||
d.serviceprovider,
|
-- TODO REMOVED ???: d.serviceprovider,
|
||||||
d.databaseaccesstype,
|
d.databaseaccesstype,
|
||||||
d.datauploadtype,
|
d.datauploadtype,
|
||||||
d.databaseaccessrestriction,
|
d.databaseaccessrestriction,
|
||||||
d.datauploadrestriction,
|
d.datauploadrestriction,
|
||||||
d.versioning,
|
-- REPLACED BY version_control : d.versioning,
|
||||||
|
d.version_control,
|
||||||
d.citationguidelineurl,
|
d.citationguidelineurl,
|
||||||
d.qualitymanagementkind,
|
-- REMOVED: d.qualitymanagementkind,
|
||||||
d.pidsystems,
|
|
||||||
d.certificates,
|
d.certificates,
|
||||||
dc.id,
|
dc.id,
|
||||||
dc.officialname,
|
dc.officialname,
|
||||||
d.issn,
|
d.issn,
|
||||||
d.eissn,
|
d.eissn,
|
||||||
d.lissn,
|
d.lissn,
|
||||||
de.jurisdiction,
|
d.jurisdiction,
|
||||||
de.thematic,
|
d.thematic,
|
||||||
de.knowledge_graph,
|
-- REMOVED ???: de.knowledge_graph,
|
||||||
de.content_policies
|
d.content_policies,
|
||||||
|
d.submission_policy_url,
|
||||||
|
d.preservation_policy_url,
|
||||||
|
d.research_product_access_policies,
|
||||||
|
d.research_product_metadata_access_policies
|
|
@ -12,8 +12,11 @@ import java.sql.Array;
|
||||||
import java.sql.Date;
|
import java.sql.Date;
|
||||||
import java.sql.ResultSet;
|
import java.sql.ResultSet;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
@ -28,12 +31,7 @@ import com.fasterxml.jackson.core.type.TypeReference;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
|
|
||||||
@ExtendWith(MockitoExtension.class)
|
@ExtendWith(MockitoExtension.class)
|
||||||
|
@ -63,22 +61,32 @@ public class MigrateDbEntitiesApplicationTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testProcessDatasource() throws Exception {
|
public void testProcessService() throws Exception {
|
||||||
final List<TypedField> fields = prepareMocks("datasources_resultset_entry.json");
|
final List<TypedField> fields = prepareMocks("services_resultset_entry.json");
|
||||||
|
|
||||||
final List<Oaf> list = app.processDatasource(rs);
|
final List<Oaf> list = app.processService(rs);
|
||||||
assertEquals(1, list.size());
|
assertEquals(1, list.size());
|
||||||
verifyMocks(fields);
|
verifyMocks(fields);
|
||||||
|
|
||||||
final Datasource ds = (Datasource) list.get(0);
|
final Datasource ds = (Datasource) list.get(0);
|
||||||
assertValidId(ds.getId());
|
assertValidId(ds.getId());
|
||||||
assertValidId(ds.getCollectedfrom().get(0).getKey());
|
ds
|
||||||
|
.getCollectedfrom()
|
||||||
|
.stream()
|
||||||
|
.map(KeyValue::getKey)
|
||||||
|
.forEach(dsId -> assertValidId(dsId));
|
||||||
|
|
||||||
|
assertEquals(1, ds.getPid().size());
|
||||||
|
assertEquals("r3d100010218", ds.getPid().get(0).getValue());
|
||||||
|
assertEquals("re3data", ds.getPid().get(0).getQualifier().getClassid());
|
||||||
|
assertEquals("dnet:pid_types", ds.getPid().get(0).getQualifier().getSchemeid());
|
||||||
|
|
||||||
assertEquals(getValueAsString("officialname", fields), ds.getOfficialname().getValue());
|
assertEquals(getValueAsString("officialname", fields), ds.getOfficialname().getValue());
|
||||||
assertEquals(getValueAsString("englishname", fields), ds.getEnglishname().getValue());
|
assertEquals(getValueAsString("englishname", fields), ds.getEnglishname().getValue());
|
||||||
assertEquals(getValueAsString("contactemail", fields), ds.getContactemail().getValue());
|
|
||||||
assertEquals(getValueAsString("websiteurl", fields), ds.getWebsiteurl().getValue());
|
assertEquals(getValueAsString("websiteurl", fields), ds.getWebsiteurl().getValue());
|
||||||
|
assertEquals(getValueAsString("logourl", fields), ds.getLogourl());
|
||||||
|
assertEquals(getValueAsString("contactemail", fields), ds.getContactemail().getValue());
|
||||||
assertEquals(getValueAsString("namespaceprefix", fields), ds.getNamespaceprefix().getValue());
|
assertEquals(getValueAsString("namespaceprefix", fields), ds.getNamespaceprefix().getValue());
|
||||||
assertEquals(getValueAsString("collectedfromname", fields), ds.getCollectedfrom().get(0).getValue());
|
|
||||||
assertEquals(getValueAsString("officialname", fields), ds.getJournal().getName());
|
assertEquals(getValueAsString("officialname", fields), ds.getJournal().getName());
|
||||||
assertEquals(getValueAsString("issnPrinted", fields), ds.getJournal().getIssnPrinted());
|
assertEquals(getValueAsString("issnPrinted", fields), ds.getJournal().getIssnPrinted());
|
||||||
assertEquals(getValueAsString("issnOnline", fields), ds.getJournal().getIssnOnline());
|
assertEquals(getValueAsString("issnOnline", fields), ds.getJournal().getIssnOnline());
|
||||||
|
@ -90,19 +98,98 @@ public class MigrateDbEntitiesApplicationTest {
|
||||||
assertEquals("pubsrepository::journal", ds.getDatasourcetypeui().getClassid());
|
assertEquals("pubsrepository::journal", ds.getDatasourcetypeui().getClassid());
|
||||||
assertEquals("dnet:datasource_typologies_ui", ds.getDatasourcetypeui().getSchemeid());
|
assertEquals("dnet:datasource_typologies_ui", ds.getDatasourcetypeui().getSchemeid());
|
||||||
|
|
||||||
|
assertEquals("Data Source", ds.getEosctype().getClassid());
|
||||||
|
assertEquals("Data Source", ds.getEosctype().getClassname());
|
||||||
|
assertEquals("dnet:eosc_types", ds.getEosctype().getSchemeid());
|
||||||
|
assertEquals("dnet:eosc_types", ds.getEosctype().getSchemename());
|
||||||
|
|
||||||
|
assertEquals("Journal archive", ds.getEoscdatasourcetype().getClassid());
|
||||||
|
assertEquals("Journal archive", ds.getEoscdatasourcetype().getClassname());
|
||||||
|
assertEquals("dnet:eosc_datasource_types", ds.getEoscdatasourcetype().getSchemeid());
|
||||||
|
assertEquals("dnet:eosc_datasource_types", ds.getEoscdatasourcetype().getSchemename());
|
||||||
|
|
||||||
|
assertEquals("openaire4.0", ds.getOpenairecompatibility().getClassid());
|
||||||
|
assertEquals("openaire4.0", ds.getOpenairecompatibility().getClassname());
|
||||||
|
assertEquals("dnet:datasourceCompatibilityLevel", ds.getOpenairecompatibility().getSchemeid());
|
||||||
|
assertEquals("dnet:datasourceCompatibilityLevel", ds.getOpenairecompatibility().getSchemename());
|
||||||
|
|
||||||
|
assertEquals(getValueAsDouble("latitude", fields).toString(), ds.getLatitude().getValue());
|
||||||
|
assertEquals(getValueAsDouble("longitude", fields).toString(), ds.getLongitude().getValue());
|
||||||
|
assertEquals(getValueAsString("dateofvalidation", fields), ds.getDateofvalidation());
|
||||||
|
|
||||||
|
assertEquals(getValueAsString("description", fields), ds.getDescription().getValue());
|
||||||
|
|
||||||
|
// TODO assertEquals(getValueAsString("subjects", fields), ds.getSubjects());
|
||||||
|
|
||||||
|
assertEquals("0.0", ds.getOdnumberofitems().getValue());
|
||||||
|
assertEquals(getValueAsString("odnumberofitemsdate", fields), ds.getOdnumberofitemsdate());
|
||||||
|
assertEquals(getValueAsString("odpolicies", fields), ds.getOdpolicies());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
getValueAsList("odlanguages", fields),
|
||||||
|
ds.getOdlanguages().stream().map(Field::getValue).collect(Collectors.toList()));
|
||||||
|
assertEquals(getValueAsList("languages", fields), ds.getLanguages());
|
||||||
|
assertEquals(
|
||||||
|
getValueAsList("accessinfopackage", fields),
|
||||||
|
ds.getAccessinfopackage().stream().map(Field::getValue).collect(Collectors.toList()));
|
||||||
|
assertEquals(getValueAsString("releasestartdate", fields), ds.getReleasestartdate());
|
||||||
|
assertEquals(getValueAsString("releaseenddate", fields), ds.getReleasestartdate());
|
||||||
|
assertEquals(getValueAsString("missionstatementurl", fields), ds.getMissionstatementurl());
|
||||||
|
|
||||||
|
assertEquals(false, ds.getDataprovider().getValue());
|
||||||
|
assertEquals(false, ds.getServiceprovider().getValue());
|
||||||
|
|
||||||
|
assertEquals(getValueAsString("databaseaccesstype", fields), ds.getDatabaseaccesstype());
|
||||||
|
assertEquals(getValueAsString("datauploadtype", fields), ds.getDatauploadtype());
|
||||||
|
assertEquals(getValueAsString("databaseaccessrestriction", fields), ds.getDatabaseaccessrestriction());
|
||||||
|
assertEquals(getValueAsString("datauploadrestriction", fields), ds.getDatauploadrestriction());
|
||||||
|
|
||||||
|
assertEquals(false, ds.getVersioning().getValue());
|
||||||
|
assertEquals(false, ds.getVersioncontrol());
|
||||||
|
|
||||||
|
assertEquals(getValueAsString("citationguidelineurl", fields), ds.getCitationguidelineurl());
|
||||||
|
assertEquals(getValueAsString("pidsystems", fields), ds.getPidsystems());
|
||||||
|
assertEquals(getValueAsString("certificates", fields), ds.getCertificates());
|
||||||
|
|
||||||
|
assertEquals(getValueAsList("researchentitytypes", fields), ds.getResearchentitytypes());
|
||||||
|
|
||||||
assertEquals("National", ds.getJurisdiction().getClassid());
|
assertEquals("National", ds.getJurisdiction().getClassid());
|
||||||
assertEquals("eosc:jurisdictions", ds.getJurisdiction().getSchemeid());
|
assertEquals("eosc:jurisdictions", ds.getJurisdiction().getSchemeid());
|
||||||
|
|
||||||
assertTrue(ds.getThematic());
|
assertTrue(ds.getThematic());
|
||||||
assertTrue(ds.getKnowledgegraph());
|
|
||||||
|
|
||||||
assertEquals(1, ds.getContentpolicies().size());
|
HashSet<String> cpSchemeId = ds
|
||||||
assertEquals("Journal article", ds.getContentpolicies().get(0).getClassid());
|
.getContentpolicies()
|
||||||
assertEquals("eosc:contentpolicies", ds.getContentpolicies().get(0).getSchemeid());
|
.stream()
|
||||||
|
.map(Qualifier::getSchemeid)
|
||||||
|
.collect(Collectors.toCollection(HashSet::new));
|
||||||
|
assertTrue(cpSchemeId.size() == 1);
|
||||||
|
assertTrue(cpSchemeId.contains("eosc:contentpolicies"));
|
||||||
|
HashSet<String> cpSchemeName = ds
|
||||||
|
.getContentpolicies()
|
||||||
|
.stream()
|
||||||
|
.map(Qualifier::getSchemename)
|
||||||
|
.collect(Collectors.toCollection(HashSet::new));
|
||||||
|
assertTrue(cpSchemeName.size() == 1);
|
||||||
|
assertTrue(cpSchemeName.contains("eosc:contentpolicies"));
|
||||||
|
assertEquals(2, ds.getContentpolicies().size());
|
||||||
|
assertEquals("Taxonomic classification", ds.getContentpolicies().get(0).getClassid());
|
||||||
|
assertEquals("Resource collection", ds.getContentpolicies().get(1).getClassid());
|
||||||
|
|
||||||
|
assertEquals(getValueAsString("submissionpolicyurl", fields), ds.getSubmissionpolicyurl());
|
||||||
|
assertEquals(getValueAsString("preservationpolicyurl", fields), ds.getPreservationpolicyurl());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
getValueAsList("researchproductaccesspolicies", fields),
|
||||||
|
ds.getResearchproductaccesspolicies());
|
||||||
|
assertEquals(
|
||||||
|
getValueAsList("researchproductmetadataaccesspolicies", fields),
|
||||||
|
ds.getResearchproductmetadataaccesspolicies());
|
||||||
|
|
||||||
assertEquals(true, ds.getConsenttermsofuse());
|
assertEquals(true, ds.getConsenttermsofuse());
|
||||||
assertEquals(true, ds.getFulltextdownload());
|
assertEquals(true, ds.getFulltextdownload());
|
||||||
assertEquals("2022-03-11", ds.getConsenttermsofusedate());
|
assertEquals("2022-03-11", ds.getConsenttermsofusedate());
|
||||||
|
assertEquals("2022-03-11", ds.getLastconsenttermsofusedate());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -154,7 +241,7 @@ public class MigrateDbEntitiesApplicationTest {
|
||||||
public void testProcessDatasourceOrganization() throws Exception {
|
public void testProcessDatasourceOrganization() throws Exception {
|
||||||
final List<TypedField> fields = prepareMocks("datasourceorganization_resultset_entry.json");
|
final List<TypedField> fields = prepareMocks("datasourceorganization_resultset_entry.json");
|
||||||
|
|
||||||
final List<Oaf> list = app.processDatasourceOrganization(rs);
|
final List<Oaf> list = app.processServiceOrganization(rs);
|
||||||
|
|
||||||
assertEquals(2, list.size());
|
assertEquals(2, list.size());
|
||||||
verifyMocks(fields);
|
verifyMocks(fields);
|
||||||
|
@ -356,18 +443,31 @@ public class MigrateDbEntitiesApplicationTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
private Float getValueAsFloat(final String name, final List<TypedField> fields) {
|
private Float getValueAsFloat(final String name, final List<TypedField> fields) {
|
||||||
return new Float(getValueAs(name, fields).toString());
|
final Object value = getValueAs(name, fields);
|
||||||
|
return value != null ? new Float(value.toString()) : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Double getValueAsDouble(final String name, final List<TypedField> fields) {
|
||||||
|
final Object value = getValueAs(name, fields);
|
||||||
|
return value != null ? new Double(value.toString()) : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Integer getValueAsInt(final String name, final List<TypedField> fields) {
|
||||||
|
final Object value = getValueAs(name, fields);
|
||||||
|
return value != null ? new Integer(value.toString()) : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private <T> T getValueAs(final String name, final List<TypedField> fields) {
|
private <T> T getValueAs(final String name, final List<TypedField> fields) {
|
||||||
return fields
|
final Optional<T> field = fields
|
||||||
.stream()
|
.stream()
|
||||||
.filter(f -> f.getField().equals(name))
|
.filter(f -> f.getField().equals(name))
|
||||||
.map(TypedField::getValue)
|
|
||||||
.filter(Objects::nonNull)
|
|
||||||
.map(o -> (T) o)
|
|
||||||
.findFirst()
|
.findFirst()
|
||||||
.get();
|
.map(TypedField::getValue)
|
||||||
|
.map(o -> (T) o);
|
||||||
|
if (!field.isPresent()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return field.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<String> getValueAsList(final String name, final List<TypedField> fields) {
|
private List<String> getValueAsList(final String name, final List<TypedField> fields) {
|
||||||
|
|
|
@ -5,14 +5,47 @@
|
||||||
"value": "274269ac6f3b::2579-5449"
|
"value": "274269ac6f3b::2579-5449"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "identities",
|
"field": "originalid",
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"value": [
|
"value": [
|
||||||
"274269ac6f3b::2579-5449",
|
"fairsharing_::1562",
|
||||||
"piwik:13",
|
"piwik:13",
|
||||||
null
|
null,
|
||||||
|
"re3data_____::r3d100010213"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"field": "pid",
|
||||||
|
"type": "array",
|
||||||
|
"value": [
|
||||||
|
"r3d100010218###re3data@@@dnet:pid_types"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "datasourcetype",
|
||||||
|
"type": "string",
|
||||||
|
"value": "pubsrepository::journal@@@dnet:datasource_typologies"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "datasourcetypeui",
|
||||||
|
"type": "string",
|
||||||
|
"value": "pubsrepository::journal@@@dnet:datasource_typologies_ui"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "eosctype",
|
||||||
|
"type": "string",
|
||||||
|
"value": "Data Source@@@dnet:eosc_types"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "eoscdatasourcetype",
|
||||||
|
"type": "string",
|
||||||
|
"value": "Journal archive@@@dnet:eosc_datasource_types"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "openairecompatibility",
|
||||||
|
"type": "string",
|
||||||
|
"value": "openaire4.0@@@dnet:datasourceCompatibilityLevel"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"field": "officialname",
|
"field": "officialname",
|
||||||
"type": "string",
|
"type": "string",
|
||||||
|
@ -23,16 +56,6 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"value": "Jurnal Ilmiah Pendidikan Scholastic"
|
"value": "Jurnal Ilmiah Pendidikan Scholastic"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"field": "contactemail",
|
|
||||||
"type": "string",
|
|
||||||
"value": "test@test.it"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"field": "openairecompatibility",
|
|
||||||
"type": "string",
|
|
||||||
"value": "hostedBy@@@dnet:datasourceCompatibilityLevel"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"field": "websiteurl",
|
"field": "websiteurl",
|
||||||
"type": "string",
|
"type": "string",
|
||||||
|
@ -44,11 +67,14 @@
|
||||||
"value": null
|
"value": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "accessinfopackage",
|
"field": "contactemail",
|
||||||
"type": "array",
|
"type": "string",
|
||||||
"value": [
|
"value": "test@test.it"
|
||||||
null
|
},
|
||||||
]
|
{
|
||||||
|
"field": "namespaceprefix",
|
||||||
|
"type": "string",
|
||||||
|
"value": "ojs_25795449"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "latitude",
|
"field": "latitude",
|
||||||
|
@ -61,9 +87,19 @@
|
||||||
"value": 0
|
"value": 0
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "namespaceprefix",
|
"field": "dateofvalidation",
|
||||||
|
"type": "date",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "description",
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"value": "ojs_25795449"
|
"value": "veterinary medicine"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "subjects",
|
||||||
|
"type": "array",
|
||||||
|
"value": []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "odnumberofitems",
|
"field": "odnumberofitems",
|
||||||
|
@ -75,16 +111,6 @@
|
||||||
"type": "date",
|
"type": "date",
|
||||||
"value": null
|
"value": null
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"field": "subjects",
|
|
||||||
"type": "array",
|
|
||||||
"value": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"field": "description",
|
|
||||||
"type": "string",
|
|
||||||
"value": null
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"field": "odpolicies",
|
"field": "odpolicies",
|
||||||
"type": "string",
|
"type": "string",
|
||||||
|
@ -93,44 +119,33 @@
|
||||||
{
|
{
|
||||||
"field": "odlanguages",
|
"field": "odlanguages",
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"value": []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"field": "odcontenttypes",
|
|
||||||
"type": "array",
|
|
||||||
"value": [
|
"value": [
|
||||||
"Journal articles"
|
"English",
|
||||||
|
"German",
|
||||||
|
"French",
|
||||||
|
"Danish",
|
||||||
|
"Norwegian",
|
||||||
|
"Swedish"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "inferred",
|
"field": "languages",
|
||||||
"type": "boolean",
|
"type": "array",
|
||||||
"value": false
|
"value": [
|
||||||
|
"English",
|
||||||
|
"German",
|
||||||
|
"French",
|
||||||
|
"Danish",
|
||||||
|
"Norwegian",
|
||||||
|
"Swedish"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "deletedbyinference",
|
"field": "accessinfopackage",
|
||||||
"type": "boolean",
|
"type": "array",
|
||||||
"value": false
|
"value": [
|
||||||
},
|
"http://www.revista.vocesdelaeducacion.com.mx/index.php/index/oai"
|
||||||
{
|
]
|
||||||
"field": "trust",
|
|
||||||
"type": "double",
|
|
||||||
"value": 0.9
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"field": "inferenceprovenance",
|
|
||||||
"type": "string",
|
|
||||||
"value": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"field": "dateofcollection",
|
|
||||||
"type": "date",
|
|
||||||
"value": "2020-01-21"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"field": "dateofvalidation",
|
|
||||||
"type": "date",
|
|
||||||
"value": null
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "releasestartdate",
|
"field": "releasestartdate",
|
||||||
|
@ -147,16 +162,6 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"value": null
|
"value": null
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"field": "dataprovider",
|
|
||||||
"type": "boolean",
|
|
||||||
"value": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"field": "serviceprovider",
|
|
||||||
"type": "boolean",
|
|
||||||
"value": null
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"field": "databaseaccesstype",
|
"field": "databaseaccesstype",
|
||||||
"type": "string",
|
"type": "string",
|
||||||
|
@ -183,12 +188,12 @@
|
||||||
"value": null
|
"value": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "citationguidelineurl",
|
"field": "versioncontrol",
|
||||||
"type": "string",
|
"type": "boolean",
|
||||||
"value": null
|
"value": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "qualitymanagementkind",
|
"field": "citationguidelineurl",
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"value": null
|
"value": null
|
||||||
},
|
},
|
||||||
|
@ -208,29 +213,38 @@
|
||||||
"value": []
|
"value": []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "collectedfromid",
|
"field": "inferred",
|
||||||
"type": "string",
|
"type": "boolean",
|
||||||
"value": "openaire____::SnVybmFsIEZha3VsdGFzIFNhc3RyYSBVbml2ZXJzaXRhcyBFa2FzYWt0aQ=="
|
"value": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "collectedfromname",
|
"field": "deletedbyinference",
|
||||||
"type": "string",
|
"type": "boolean",
|
||||||
"value": "Jurnal Fakultas Sastra Universitas Ekasakti"
|
"value": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "datasourcetype",
|
"field": "trust",
|
||||||
"type": "string",
|
"type": "double",
|
||||||
"value": "pubsrepository::journal@@@dnet:datasource_typologies"
|
"value": 0.9
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "datasourcetypeui",
|
"field": "inferenceprovenance",
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"value": "pubsrepository::journal@@@dnet:datasource_typologies_ui"
|
"value": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "provenanceaction",
|
"field": "dateofcollection",
|
||||||
"type": "not_used",
|
"type": "date",
|
||||||
"value": "sysimport:crosswalk:entityregistry@@@dnet:provenance_actions"
|
"value": "2020-01-21"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "collectedfrom",
|
||||||
|
"type": "array",
|
||||||
|
"value": [
|
||||||
|
"openaire____::fairsharing@@@FAIRsharing.org",
|
||||||
|
"openaire____::opendoar@@@OpenDOAR",
|
||||||
|
"openaire____::re3data@@@Registry of Research Data Repository"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"field": "issnPrinted",
|
"field": "issnPrinted",
|
||||||
|
@ -247,6 +261,20 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"value": "2579-5447"
|
"value": "2579-5447"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"field": "researchentitytypes",
|
||||||
|
"type": "array",
|
||||||
|
"value": [
|
||||||
|
"Research Data"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "providedproducttypes",
|
||||||
|
"type": "array",
|
||||||
|
"value": [
|
||||||
|
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"field": "jurisdiction",
|
"field": "jurisdiction",
|
||||||
"type": "string",
|
"type": "string",
|
||||||
|
@ -266,7 +294,32 @@
|
||||||
"field": "contentpolicies",
|
"field": "contentpolicies",
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"value": [
|
"value": [
|
||||||
"Journal article@@@eosc:contentpolicies"
|
"Taxonomic classification@@@eosc:contentpolicies",
|
||||||
|
"Resource collection@@@eosc:contentpolicies"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "submissionpolicyurl",
|
||||||
|
"type": "string",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "preservationpolicyurl",
|
||||||
|
"type": "string",
|
||||||
|
"value": "Permanent Archiving https://datadryad.org/stash/faq"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "researchproductaccesspolicies",
|
||||||
|
"type": "array",
|
||||||
|
"value": [
|
||||||
|
"https://100percentit.com/legal/"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "researchproductmetadataaccesspolicies",
|
||||||
|
"type": "array",
|
||||||
|
"value": [
|
||||||
|
"https://wenmr.science.uu.nl/conditions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -283,5 +336,10 @@
|
||||||
"field": "consenttermsofusedate",
|
"field": "consenttermsofusedate",
|
||||||
"type": "date",
|
"type": "date",
|
||||||
"value": "2022-03-11"
|
"value": "2022-03-11"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "lastconsenttermsofusedate",
|
||||||
|
"type": "date",
|
||||||
|
"value": "2022-03-11"
|
||||||
}
|
}
|
||||||
]
|
]
|
|
@ -535,6 +535,12 @@ public class XmlRecordFactory implements Serializable {
|
||||||
if (ds.getDatasourcetypeui() != null) {
|
if (ds.getDatasourcetypeui() != null) {
|
||||||
metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", ds.getDatasourcetypeui()));
|
metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", ds.getDatasourcetypeui()));
|
||||||
}
|
}
|
||||||
|
if (ds.getEosctype() != null) {
|
||||||
|
metadata.add(XmlSerializationUtils.mapQualifier("eosctype", ds.getEosctype()));
|
||||||
|
}
|
||||||
|
if (ds.getEoscdatasourcetype() != null) {
|
||||||
|
metadata.add(XmlSerializationUtils.mapQualifier("eoscdatasourcetype", ds.getEoscdatasourcetype()));
|
||||||
|
}
|
||||||
if (ds.getOpenairecompatibility() != null) {
|
if (ds.getOpenairecompatibility() != null) {
|
||||||
metadata
|
metadata
|
||||||
.add(
|
.add(
|
||||||
|
@ -583,6 +589,16 @@ public class XmlRecordFactory implements Serializable {
|
||||||
metadata
|
metadata
|
||||||
.add(XmlSerializationUtils.asXmlElement("description", ds.getDescription().getValue()));
|
.add(XmlSerializationUtils.asXmlElement("description", ds.getDescription().getValue()));
|
||||||
}
|
}
|
||||||
|
if (ds.getSubjects() != null) {
|
||||||
|
metadata
|
||||||
|
.addAll(
|
||||||
|
ds
|
||||||
|
.getSubjects()
|
||||||
|
.stream()
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.map(sp -> XmlSerializationUtils.mapStructuredProperty("subjects", sp))
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
}
|
||||||
if (ds.getOdnumberofitems() != null) {
|
if (ds.getOdnumberofitems() != null) {
|
||||||
metadata
|
metadata
|
||||||
.add(
|
.add(
|
||||||
|
@ -609,6 +625,16 @@ public class XmlRecordFactory implements Serializable {
|
||||||
.map(c -> XmlSerializationUtils.asXmlElement("odlanguages", c.getValue()))
|
.map(c -> XmlSerializationUtils.asXmlElement("odlanguages", c.getValue()))
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
}
|
}
|
||||||
|
if (ds.getLanguages() != null) {
|
||||||
|
metadata
|
||||||
|
.addAll(
|
||||||
|
ds
|
||||||
|
.getLanguages()
|
||||||
|
.stream()
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.map(c -> XmlSerializationUtils.asXmlElement("languages", c))
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
}
|
||||||
if (ds.getOdcontenttypes() != null) {
|
if (ds.getOdcontenttypes() != null) {
|
||||||
metadata
|
metadata
|
||||||
.addAll(
|
.addAll(
|
||||||
|
@ -689,18 +715,18 @@ public class XmlRecordFactory implements Serializable {
|
||||||
XmlSerializationUtils
|
XmlSerializationUtils
|
||||||
.asXmlElement("versioning", ds.getVersioning().getValue().toString()));
|
.asXmlElement("versioning", ds.getVersioning().getValue().toString()));
|
||||||
}
|
}
|
||||||
|
if (ds.getVersioncontrol() != null) {
|
||||||
|
metadata
|
||||||
|
.add(
|
||||||
|
XmlSerializationUtils
|
||||||
|
.asXmlElement("versioncontrol", ds.getVersioncontrol().toString()));
|
||||||
|
}
|
||||||
if (ds.getCitationguidelineurl() != null) {
|
if (ds.getCitationguidelineurl() != null) {
|
||||||
metadata
|
metadata
|
||||||
.add(
|
.add(
|
||||||
XmlSerializationUtils
|
XmlSerializationUtils
|
||||||
.asXmlElement("citationguidelineurl", ds.getCitationguidelineurl().getValue()));
|
.asXmlElement("citationguidelineurl", ds.getCitationguidelineurl().getValue()));
|
||||||
}
|
}
|
||||||
if (ds.getQualitymanagementkind() != null) {
|
|
||||||
metadata
|
|
||||||
.add(
|
|
||||||
XmlSerializationUtils
|
|
||||||
.asXmlElement("qualitymanagementkind", ds.getQualitymanagementkind().getValue()));
|
|
||||||
}
|
|
||||||
if (ds.getPidsystems() != null) {
|
if (ds.getPidsystems() != null) {
|
||||||
metadata
|
metadata
|
||||||
.add(XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems().getValue()));
|
.add(XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems().getValue()));
|
||||||
|
@ -722,17 +748,24 @@ public class XmlRecordFactory implements Serializable {
|
||||||
if (ds.getJournal() != null) {
|
if (ds.getJournal() != null) {
|
||||||
metadata.add(XmlSerializationUtils.mapJournal(ds.getJournal()));
|
metadata.add(XmlSerializationUtils.mapJournal(ds.getJournal()));
|
||||||
}
|
}
|
||||||
if (ds.getSubjects() != null) {
|
if (ds.getResearchentitytypes() != null) {
|
||||||
metadata
|
metadata
|
||||||
.addAll(
|
.addAll(
|
||||||
ds
|
ds
|
||||||
.getSubjects()
|
.getResearchentitytypes()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(Objects::nonNull)
|
.map(c -> XmlSerializationUtils.asXmlElement("researchentitytypes", c))
|
||||||
.map(sp -> XmlSerializationUtils.mapStructuredProperty("subjects", sp))
|
.collect(Collectors.toList()));
|
||||||
|
}
|
||||||
|
if (ds.getProvidedproducttypes() != null) {
|
||||||
|
metadata
|
||||||
|
.addAll(
|
||||||
|
ds
|
||||||
|
.getProvidedproducttypes()
|
||||||
|
.stream()
|
||||||
|
.map(c -> XmlSerializationUtils.asXmlElement("providedproducttypes", c))
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ds.getJurisdiction() != null) {
|
if (ds.getJurisdiction() != null) {
|
||||||
metadata.add(XmlSerializationUtils.mapQualifier("jurisdiction", ds.getJurisdiction()));
|
metadata.add(XmlSerializationUtils.mapQualifier("jurisdiction", ds.getJurisdiction()));
|
||||||
}
|
}
|
||||||
|
@ -741,11 +774,6 @@ public class XmlRecordFactory implements Serializable {
|
||||||
metadata.add(XmlSerializationUtils.asXmlElement("thematic", ds.getThematic().toString()));
|
metadata.add(XmlSerializationUtils.asXmlElement("thematic", ds.getThematic().toString()));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ds.getKnowledgegraph() != null) {
|
|
||||||
metadata
|
|
||||||
.add(XmlSerializationUtils.asXmlElement("knowledgegraph", ds.getKnowledgegraph().toString()));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ds.getContentpolicies() != null) {
|
if (ds.getContentpolicies() != null) {
|
||||||
metadata
|
metadata
|
||||||
.addAll(
|
.addAll(
|
||||||
|
@ -756,7 +784,34 @@ public class XmlRecordFactory implements Serializable {
|
||||||
.map(q -> XmlSerializationUtils.mapQualifier("contentpolicy", q))
|
.map(q -> XmlSerializationUtils.mapQualifier("contentpolicy", q))
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
}
|
}
|
||||||
|
if (ds.getSubmissionpolicyurl() != null) {
|
||||||
|
metadata
|
||||||
|
.add(XmlSerializationUtils.asXmlElement("submissionpolicyurl", ds.getSubmissionpolicyurl()));
|
||||||
|
}
|
||||||
|
if (ds.getPreservationpolicyurl() != null) {
|
||||||
|
metadata
|
||||||
|
.add(
|
||||||
|
XmlSerializationUtils.asXmlElement("preservationpolicyurl", ds.getPreservationpolicyurl()));
|
||||||
|
}
|
||||||
|
if (ds.getResearchproductaccesspolicies() != null) {
|
||||||
|
metadata
|
||||||
|
.addAll(
|
||||||
|
ds
|
||||||
|
.getResearchproductaccesspolicies()
|
||||||
|
.stream()
|
||||||
|
.map(c -> XmlSerializationUtils.asXmlElement("researchproductaccesspolicies", c))
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
}
|
||||||
|
if (ds.getResearchproductmetadataaccesspolicies() != null) {
|
||||||
|
metadata
|
||||||
|
.addAll(
|
||||||
|
ds
|
||||||
|
.getResearchproductmetadataaccesspolicies()
|
||||||
|
.stream()
|
||||||
|
.map(
|
||||||
|
c -> XmlSerializationUtils.asXmlElement("researchproductmetadataaccesspolicies", c))
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case organization:
|
case organization:
|
||||||
final Organization o = (Organization) entity;
|
final Organization o = (Organization) entity;
|
||||||
|
|
|
@ -11,6 +11,7 @@ import java.util.List;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
import org.dom4j.DocumentException;
|
import org.dom4j.DocumentException;
|
||||||
|
import org.dom4j.Element;
|
||||||
import org.dom4j.io.SAXReader;
|
import org.dom4j.io.SAXReader;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
@ -142,7 +143,7 @@ public class XmlRecordFactoryTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDatasource() throws IOException, DocumentException {
|
public void testService() throws IOException, DocumentException {
|
||||||
final ContextMapper contextMapper = new ContextMapper();
|
final ContextMapper contextMapper = new ContextMapper();
|
||||||
|
|
||||||
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
|
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
|
||||||
|
@ -167,6 +168,14 @@ public class XmlRecordFactoryTest {
|
||||||
assertEquals("true", doc.valueOf("//thematic"));
|
assertEquals("true", doc.valueOf("//thematic"));
|
||||||
assertEquals("Journal article", doc.valueOf("//contentpolicy/@classname"));
|
assertEquals("Journal article", doc.valueOf("//contentpolicy/@classname"));
|
||||||
assertEquals("Journal archive", doc.valueOf("//datasourcetypeui/@classname"));
|
assertEquals("Journal archive", doc.valueOf("//datasourcetypeui/@classname"));
|
||||||
|
assertEquals("Data Source", doc.valueOf("//eosctype/@classname"));
|
||||||
|
|
||||||
|
final List pids = doc.selectNodes("//pid");
|
||||||
|
assertEquals(1, pids.size());
|
||||||
|
assertEquals("re3data", ((Element) pids.get(0)).attribute("classid").getValue());
|
||||||
|
assertEquals(
|
||||||
|
"Registry of research data repositories", ((Element) pids.get(0)).attribute("classname").getValue());
|
||||||
|
assertEquals("dnet:pid_types", ((Element) pids.get(0)).attribute("schemeid").getValue());
|
||||||
|
assertEquals("dnet:pid_types", ((Element) pids.get(0)).attribute("schemename").getValue());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because one or more lines are too long
2
pom.xml
2
pom.xml
|
@ -801,7 +801,7 @@
|
||||||
<mockito-core.version>3.3.3</mockito-core.version>
|
<mockito-core.version>3.3.3</mockito-core.version>
|
||||||
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
||||||
<vtd.version>[2.12,3.0)</vtd.version>
|
<vtd.version>[2.12,3.0)</vtd.version>
|
||||||
<dhp-schemas.version>[2.10.32]</dhp-schemas.version>
|
<dhp-schemas.version>[2.12.0]</dhp-schemas.version>
|
||||||
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
||||||
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
||||||
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
||||||
|
|
Loading…
Reference in New Issue