forked from D-Net/dnet-hadoop
[eosc_services] resolved merge conflicts
This commit is contained in:
commit
da611cfbbd
|
@ -3,6 +3,8 @@ package eu.dnetlib.dhp.schema.oaf.utils;
|
|||
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||
|
||||
import java.sql.Array;
|
||||
import java.sql.SQLException;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.function.Function;
|
||||
|
@ -118,6 +120,17 @@ public class OafMapperUtils {
|
|||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static <T> List<T> listValues(Array values) throws SQLException {
|
||||
if (Objects.isNull(values)) {
|
||||
return null;
|
||||
}
|
||||
return Arrays
|
||||
.stream((T[]) values.getArray())
|
||||
.filter(Objects::nonNull)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static List<Field<String>> listFields(final DataInfo info, final List<String> values) {
|
||||
return values
|
||||
.stream()
|
||||
|
|
|
@ -44,105 +44,104 @@ class OafMapperUtilsTest {
|
|||
@Test
|
||||
void testDateValidation() {
|
||||
|
||||
assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z ").isPresent());
|
||||
assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent());
|
||||
assertTrue(GraphCleaningFunctions.doCleanDate(" 2016-04-05").isPresent());
|
||||
assertNotNull(GraphCleaningFunctions.cleanDate("2016-05-07T12:41:19.202Z "));
|
||||
assertNotNull(GraphCleaningFunctions.cleanDate("2020-09-10 11:08:52 "));
|
||||
assertNotNull(GraphCleaningFunctions.cleanDate(" 2016-04-05"));
|
||||
|
||||
assertEquals("2016-04-05", GraphCleaningFunctions.doCleanDate("2016 Apr 05").get());
|
||||
assertEquals("2016-04-05", GraphCleaningFunctions.cleanDate("2016 Apr 05"));
|
||||
|
||||
assertEquals("2009-05-08", GraphCleaningFunctions.doCleanDate("May 8, 2009 5:57:51 PM").get());
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct 7, 1970").get());
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct 7, '70").get());
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct. 7, 1970").get());
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct. 7, 70").get());
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan 2 15:04:05 2006").get());
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan 2 15:04:05 MST 2006").get());
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan 02 15:04:05 -0700 2006").get());
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Monday, 02-Jan-06 15:04:05 MST").get());
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon, 02 Jan 2006 15:04:05 MST").get());
|
||||
assertEquals("2017-07-11", GraphCleaningFunctions.doCleanDate("Tue, 11 Jul 2017 16:28:13 +0200 (CEST)").get());
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon, 02 Jan 2006 15:04:05 -0700").get());
|
||||
assertEquals("2018-01-04", GraphCleaningFunctions.doCleanDate("Thu, 4 Jan 2018 17:53:36 +0000").get());
|
||||
assertEquals("2015-08-10", GraphCleaningFunctions.doCleanDate("Mon Aug 10 15:44:11 UTC+0100 2015").get());
|
||||
assertEquals("2009-05-08", GraphCleaningFunctions.cleanDate("May 8, 2009 5:57:51 PM"));
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct 7, 1970"));
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct 7, '70"));
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct. 7, 1970"));
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct. 7, 70"));
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon Jan 2 15:04:05 2006"));
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon Jan 2 15:04:05 MST 2006"));
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon Jan 02 15:04:05 -0700 2006"));
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Monday, 02-Jan-06 15:04:05 MST"));
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon, 02 Jan 2006 15:04:05 MST"));
|
||||
assertEquals("2017-07-11", GraphCleaningFunctions.cleanDate("Tue, 11 Jul 2017 16:28:13 +0200 (CEST)"));
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon, 02 Jan 2006 15:04:05 -0700"));
|
||||
assertEquals("2018-01-04", GraphCleaningFunctions.cleanDate("Thu, 4 Jan 2018 17:53:36 +0000"));
|
||||
assertEquals("2015-08-10", GraphCleaningFunctions.cleanDate("Mon Aug 10 15:44:11 UTC+0100 2015"));
|
||||
assertEquals(
|
||||
"2015-07-03",
|
||||
GraphCleaningFunctions.doCleanDate("Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)").get());
|
||||
assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012 10:09am").get());
|
||||
assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012 at 10:09am PST-08").get());
|
||||
assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012, 10:10:09").get());
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("October 7, 1970").get());
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("October 7th, 1970").get());
|
||||
assertEquals("2006-02-12", GraphCleaningFunctions.doCleanDate("12 Feb 2006, 19:17").get());
|
||||
assertEquals("2006-02-12", GraphCleaningFunctions.doCleanDate("12 Feb 2006 19:17").get());
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("7 oct 70").get());
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("7 oct 1970").get());
|
||||
assertEquals("2013-02-03", GraphCleaningFunctions.doCleanDate("03 February 2013").get());
|
||||
assertEquals("2013-07-01", GraphCleaningFunctions.doCleanDate("1 July 2013").get());
|
||||
assertEquals("2013-02-03", GraphCleaningFunctions.doCleanDate("2013-Feb-03").get());
|
||||
assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("3/31/2014").get());
|
||||
assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("03/31/2014").get());
|
||||
assertEquals("1971-08-21", GraphCleaningFunctions.doCleanDate("08/21/71").get());
|
||||
assertEquals("1971-01-08", GraphCleaningFunctions.doCleanDate("8/1/71").get());
|
||||
assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("4/8/2014 22:05").get());
|
||||
assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("04/08/2014 22:05").get());
|
||||
assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("4/8/14 22:05").get());
|
||||
assertEquals("2014-02-04", GraphCleaningFunctions.doCleanDate("04/2/2014 03:00:51").get());
|
||||
assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 12:00:00 AM").get());
|
||||
assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 01:00:01 PM").get());
|
||||
assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 01:00 PM").get());
|
||||
assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 1:00 PM").get());
|
||||
assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 12:00 AM").get());
|
||||
assertEquals("2014-02-04", GraphCleaningFunctions.doCleanDate("4/02/2014 03:00:51").get());
|
||||
assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("03/19/2012 10:11:59").get());
|
||||
assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("03/19/2012 10:11:59.3186369").get());
|
||||
assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("2014/3/31").get());
|
||||
assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("2014/03/31").get());
|
||||
assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014/4/8 22:05").get());
|
||||
assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014/04/08 22:05").get());
|
||||
assertEquals("2014-04-02", GraphCleaningFunctions.doCleanDate("2014/04/2 03:00:51").get());
|
||||
assertEquals("2014-04-02", GraphCleaningFunctions.doCleanDate("2014/4/02 03:00:51").get());
|
||||
assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("2012/03/19 10:11:59").get());
|
||||
assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("2012/03/19 10:11:59.3186369").get());
|
||||
assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014年04月08日").get());
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("2006-01-02T15:04:05+0000").get());
|
||||
assertEquals("2009-08-13", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09-07:00").get());
|
||||
assertEquals("2009-08-12", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09").get());
|
||||
assertEquals("2009-08-13", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09Z").get());
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.3186369").get());
|
||||
assertEquals("2012-08-03", GraphCleaningFunctions.doCleanDate("2012-08-03 18:31:59.257000000").get());
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.123").get());
|
||||
assertEquals("2013-04-01", GraphCleaningFunctions.doCleanDate("2013-04-01 22:43").get());
|
||||
assertEquals("2013-04-01", GraphCleaningFunctions.doCleanDate("2013-04-01 22:43:22").get());
|
||||
assertEquals("2014-12-16", GraphCleaningFunctions.doCleanDate("2014-12-16 06:20:00 UTC").get());
|
||||
assertEquals("2014-12-16", GraphCleaningFunctions.doCleanDate("2014-12-16 06:20:00 GMT").get());
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 05:24:37 PM").get());
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:43 +0800").get());
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:43 +0800 +08").get());
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:44 +09:00").get());
|
||||
assertEquals("2012-08-03", GraphCleaningFunctions.doCleanDate("2012-08-03 18:31:59.257000000 +0000 UTC").get());
|
||||
assertEquals("2015-09-30", GraphCleaningFunctions.doCleanDate("2015-09-30 18:48:56.35272715 +0000 UTC").get());
|
||||
assertEquals("2015-02-18", GraphCleaningFunctions.doCleanDate("2015-02-18 00:12:00 +0000 GMT").get());
|
||||
assertEquals("2015-02-18", GraphCleaningFunctions.doCleanDate("2015-02-18 00:12:00 +0000 UTC").get());
|
||||
GraphCleaningFunctions.cleanDate("Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)"));
|
||||
assertEquals("2012-09-17", GraphCleaningFunctions.cleanDate("September 17, 2012 10:09am"));
|
||||
assertEquals("2012-09-17", GraphCleaningFunctions.cleanDate("September 17, 2012 at 10:09am PST-08"));
|
||||
assertEquals("2012-09-17", GraphCleaningFunctions.cleanDate("September 17, 2012, 10:10:09"));
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("October 7, 1970"));
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("October 7th, 1970"));
|
||||
assertEquals("2006-02-12", GraphCleaningFunctions.cleanDate("12 Feb 2006, 19:17"));
|
||||
assertEquals("2006-02-12", GraphCleaningFunctions.cleanDate("12 Feb 2006 19:17"));
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("7 oct 70"));
|
||||
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("7 oct 1970"));
|
||||
assertEquals("2013-02-03", GraphCleaningFunctions.cleanDate("03 February 2013"));
|
||||
assertEquals("2013-07-01", GraphCleaningFunctions.cleanDate("1 July 2013"));
|
||||
assertEquals("2013-02-03", GraphCleaningFunctions.cleanDate("2013-Feb-03"));
|
||||
assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("3/31/2014"));
|
||||
assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("03/31/2014"));
|
||||
assertEquals("1971-08-21", GraphCleaningFunctions.cleanDate("08/21/71"));
|
||||
assertEquals("1971-01-08", GraphCleaningFunctions.cleanDate("8/1/71"));
|
||||
assertEquals("2014-08-04", GraphCleaningFunctions.cleanDate("4/8/2014 22:05"));
|
||||
assertEquals("2014-08-04", GraphCleaningFunctions.cleanDate("04/08/2014 22:05"));
|
||||
assertEquals("2014-08-04", GraphCleaningFunctions.cleanDate("4/8/14 22:05"));
|
||||
assertEquals("2014-02-04", GraphCleaningFunctions.cleanDate("04/2/2014 03:00:51"));
|
||||
assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 12:00:00 AM"));
|
||||
assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 01:00:01 PM"));
|
||||
assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 01:00 PM"));
|
||||
assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 1:00 PM"));
|
||||
assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 12:00 AM"));
|
||||
assertEquals("2014-02-04", GraphCleaningFunctions.cleanDate("4/02/2014 03:00:51"));
|
||||
assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("03/19/2012 10:11:59"));
|
||||
assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("03/19/2012 10:11:59.3186369"));
|
||||
assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("2014/3/31"));
|
||||
assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("2014/03/31"));
|
||||
assertEquals("2014-04-08", GraphCleaningFunctions.cleanDate("2014/4/8 22:05"));
|
||||
assertEquals("2014-04-08", GraphCleaningFunctions.cleanDate("2014/04/08 22:05"));
|
||||
assertEquals("2014-04-02", GraphCleaningFunctions.cleanDate("2014/04/2 03:00:51"));
|
||||
assertEquals("2014-04-02", GraphCleaningFunctions.cleanDate("2014/4/02 03:00:51"));
|
||||
assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("2012/03/19 10:11:59"));
|
||||
assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("2012/03/19 10:11:59.3186369"));
|
||||
assertEquals("2014-04-08", GraphCleaningFunctions.cleanDate("2014年04月08日"));
|
||||
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("2006-01-02T15:04:05+0000"));
|
||||
assertEquals("2009-08-13", GraphCleaningFunctions.cleanDate("2009-08-12T22:15:09-07:00"));
|
||||
assertEquals("2009-08-12", GraphCleaningFunctions.cleanDate("2009-08-12T22:15:09"));
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 17:24:37.3186369"));
|
||||
assertEquals("2012-08-03", GraphCleaningFunctions.cleanDate("2012-08-03 18:31:59.257000000"));
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 17:24:37.123"));
|
||||
assertEquals("2013-04-01", GraphCleaningFunctions.cleanDate("2013-04-01 22:43"));
|
||||
assertEquals("2013-04-01", GraphCleaningFunctions.cleanDate("2013-04-01 22:43:22"));
|
||||
assertEquals("2014-12-16", GraphCleaningFunctions.cleanDate("2014-12-16 06:20:00 UTC"));
|
||||
assertEquals("2014-12-16", GraphCleaningFunctions.cleanDate("2014-12-16 06:20:00 GMT"));
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 05:24:37 PM"));
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 13:13:43 +0800"));
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 13:13:43 +0800 +08"));
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 13:13:44 +09:00"));
|
||||
assertEquals("2012-08-03", GraphCleaningFunctions.cleanDate("2012-08-03 18:31:59.257000000 +0000 UTC"));
|
||||
assertEquals("2015-09-30", GraphCleaningFunctions.cleanDate("2015-09-30 18:48:56.35272715 +0000 UTC"));
|
||||
assertEquals("2015-02-18", GraphCleaningFunctions.cleanDate("2015-02-18 00:12:00 +0000 GMT"));
|
||||
assertEquals("2015-02-18", GraphCleaningFunctions.cleanDate("2015-02-18 00:12:00 +0000 UTC"));
|
||||
assertEquals(
|
||||
"2015-02-08", GraphCleaningFunctions.doCleanDate("2015-02-08 03:02:00 +0300 MSK m=+0.000000001").get());
|
||||
"2015-02-08", GraphCleaningFunctions.cleanDate("2015-02-08 03:02:00 +0300 MSK m=+0.000000001"));
|
||||
assertEquals(
|
||||
"2015-02-08", GraphCleaningFunctions.doCleanDate("2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001").get());
|
||||
assertEquals("2017-07-19", GraphCleaningFunctions.doCleanDate("2017-07-19 03:21:51+00:00").get());
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26").get());
|
||||
assertEquals("2014-04-01", GraphCleaningFunctions.doCleanDate("2014-04").get());
|
||||
assertEquals("2014-01-01", GraphCleaningFunctions.doCleanDate("2014").get());
|
||||
assertEquals("2014-05-11", GraphCleaningFunctions.doCleanDate("2014-05-11 08:20:13,787").get());
|
||||
assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("3.31.2014").get());
|
||||
assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("03.31.2014").get());
|
||||
assertEquals("1971-08-21", GraphCleaningFunctions.doCleanDate("08.21.71").get());
|
||||
assertEquals("2014-03-01", GraphCleaningFunctions.doCleanDate("2014.03").get());
|
||||
assertEquals("2014-03-30", GraphCleaningFunctions.doCleanDate("2014.03.30").get());
|
||||
assertEquals("2014-06-01", GraphCleaningFunctions.doCleanDate("20140601").get());
|
||||
assertEquals("2014-07-22", GraphCleaningFunctions.doCleanDate("20140722105203").get());
|
||||
assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("1332151919").get());
|
||||
assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367189").get());
|
||||
assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367111222").get());
|
||||
assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367111222333").get());
|
||||
"2015-02-08", GraphCleaningFunctions.cleanDate("2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001"));
|
||||
assertEquals("2017-07-19", GraphCleaningFunctions.cleanDate("2017-07-19 03:21:51+00:00"));
|
||||
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26"));
|
||||
assertEquals("2014-04-01", GraphCleaningFunctions.cleanDate("2014-04"));
|
||||
assertEquals("2014-01-01", GraphCleaningFunctions.cleanDate("2014"));
|
||||
assertEquals("2014-05-11", GraphCleaningFunctions.cleanDate("2014-05-11 08:20:13,787"));
|
||||
assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("3.31.2014"));
|
||||
assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("03.31.2014"));
|
||||
assertEquals("1971-08-21", GraphCleaningFunctions.cleanDate("08.21.71"));
|
||||
assertEquals("2014-03-01", GraphCleaningFunctions.cleanDate("2014.03"));
|
||||
assertEquals("2014-03-30", GraphCleaningFunctions.cleanDate("2014.03.30"));
|
||||
assertEquals("2014-06-01", GraphCleaningFunctions.cleanDate("20140601"));
|
||||
assertEquals("2014-07-22", GraphCleaningFunctions.cleanDate("20140722105203"));
|
||||
assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("1332151919"));
|
||||
assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367189"));
|
||||
assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367111222"));
|
||||
assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367111222333"));
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -284,4 +284,4 @@ object SparkGenerateDoiBoost {
|
|||
.save(s"$workingDirPath/doiBoostOrganization")
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -280,10 +280,10 @@ case object Crossref2Oaf {
|
|||
instance.setDateofacceptance(asField(createdDate.getValue))
|
||||
}
|
||||
val s: List[String] = List("https://doi.org/" + doi)
|
||||
// val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct
|
||||
// if (links.nonEmpty) {
|
||||
// instance.setUrl(links.asJava)
|
||||
// }
|
||||
// val links: List[String] = ((for {JString(url) <- json \ "link" \ "URL"} yield url) ::: List(s)).filter(p => p != null && p.toLowerCase().contains(doi.toLowerCase())).distinct
|
||||
// if (links.nonEmpty) {
|
||||
// instance.setUrl(links.asJava)
|
||||
// }
|
||||
if (s.nonEmpty) {
|
||||
instance.setUrl(s.asJava)
|
||||
}
|
||||
|
@ -584,12 +584,10 @@ case object Crossref2Oaf {
|
|||
if (dp.length == 10) {
|
||||
return GraphCleaningFunctions.cleanDate(dp)
|
||||
}
|
||||
}
|
||||
else if (res.size ==2) {
|
||||
} else if (res.size == 2) {
|
||||
val dp = f"${res.head}-${res(1)}%02d-01"
|
||||
return GraphCleaningFunctions.cleanDate(dp)
|
||||
}
|
||||
else if (res.size ==1) {
|
||||
} else if (res.size == 1) {
|
||||
return GraphCleaningFunctions.cleanDate(s"${res.head}-01-01")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -73,10 +73,10 @@ class CrossrefMappingTest {
|
|||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
def crossrefIssueDateTest(): Unit = {
|
||||
val json = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/issue_date.json")).mkString
|
||||
val json =
|
||||
Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/issue_date.json")).mkString
|
||||
assertNotNull(json)
|
||||
assertFalse(json.isEmpty)
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
|
|
|
@ -27,15 +27,7 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
|
|||
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.asString;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.createOpenaireId;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.journal;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listFields;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
|
@ -143,8 +135,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
smdbe.execute("queryClaims.sql", smdbe::processClaims);
|
||||
break;
|
||||
case openaire:
|
||||
log.info("Processing datasources...");
|
||||
smdbe.execute("queryDatasources.sql", smdbe::processDatasource, verifyNamespacePrefix);
|
||||
log.info("Processing services...");
|
||||
smdbe.execute("queryServices.sql", smdbe::processService, verifyNamespacePrefix);
|
||||
|
||||
log.info("Processing projects...");
|
||||
if (dbSchema.equalsIgnoreCase("beta")) {
|
||||
|
@ -156,10 +148,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
log.info("Processing Organizations...");
|
||||
smdbe.execute("queryOrganizations.sql", smdbe::processOrganization, verifyNamespacePrefix);
|
||||
|
||||
log.info("Processing relationsNoRemoval ds <-> orgs ...");
|
||||
log.info("Processing relations services <-> orgs ...");
|
||||
smdbe
|
||||
.execute(
|
||||
"queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization,
|
||||
"queryServiceOrganization.sql", smdbe::processServiceOrganization,
|
||||
verifyNamespacePrefix);
|
||||
|
||||
log.info("Processing projects <-> orgs ...");
|
||||
|
@ -235,32 +227,30 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
dbClient.processResults(sql, consumer);
|
||||
}
|
||||
|
||||
public List<Oaf> processDatasource(final ResultSet rs) {
|
||||
public List<Oaf> processService(final ResultSet rs) {
|
||||
try {
|
||||
final DataInfo info = prepareDataInfo(rs);
|
||||
|
||||
final Datasource ds = new Datasource();
|
||||
|
||||
ds.setId(createOpenaireId(10, rs.getString("datasourceid"), true));
|
||||
ds.setId(createOpenaireId(10, rs.getString("id"), true));
|
||||
ds
|
||||
.setOriginalId(
|
||||
Arrays
|
||||
.asList((String[]) rs.getArray("identities").getArray())
|
||||
.asList((String[]) rs.getArray("originalid").getArray())
|
||||
.stream()
|
||||
.filter(StringUtils::isNotBlank)
|
||||
.collect(Collectors.toList()));
|
||||
ds
|
||||
.setCollectedfrom(
|
||||
listKeyValues(
|
||||
createOpenaireId(10, rs.getString("collectedfromid"), true),
|
||||
rs.getString("collectedfromname")));
|
||||
ds.setPid(new ArrayList<>());
|
||||
ds.setCollectedfrom(prepareCollectedfrom(rs.getArray("collectedfrom")));
|
||||
ds.setPid(prepareListOfStructProps(rs.getArray("pid"), info));
|
||||
ds.setDateofcollection(asString(rs.getDate("dateofcollection")));
|
||||
ds.setDateoftransformation(null); // Value not returned by the SQL query
|
||||
ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB
|
||||
ds.setOaiprovenance(null); // Values not present in the DB
|
||||
ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype")));
|
||||
ds.setDatasourcetypeui(prepareQualifierSplitting(rs.getString("datasourcetypeui")));
|
||||
ds.setEosctype(prepareQualifierSplitting(rs.getString("eosctype")));
|
||||
ds.setEoscdatasourcetype(prepareQualifierSplitting(rs.getString("eoscdatasourcetype")));
|
||||
ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility")));
|
||||
ds.setOfficialname(field(rs.getString("officialname"), info));
|
||||
ds.setEnglishname(field(rs.getString("englishname"), info));
|
||||
|
@ -277,20 +267,19 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
ds.setOdnumberofitemsdate(field(asString(rs.getDate("odnumberofitemsdate")), info));
|
||||
ds.setOdpolicies(field(rs.getString("odpolicies"), info));
|
||||
ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info));
|
||||
ds.setOdcontenttypes(prepareListFields(rs.getArray("odcontenttypes"), info));
|
||||
ds.setLanguages(listValues(rs.getArray("languages")));
|
||||
ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info));
|
||||
ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info));
|
||||
ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info));
|
||||
ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info));
|
||||
ds.setDataprovider(field(rs.getBoolean("dataprovider"), info));
|
||||
ds.setServiceprovider(field(rs.getBoolean("serviceprovider"), info));
|
||||
ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info));
|
||||
ds.setDatauploadtype(field(rs.getString("datauploadtype"), info));
|
||||
ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info));
|
||||
ds.setDatauploadrestriction(field(rs.getString("datauploadrestriction"), info));
|
||||
ds.setVersioning(field(rs.getBoolean("versioning"), info));
|
||||
ds.setVersioncontrol(rs.getBoolean("versioncontrol"));
|
||||
ds.setCitationguidelineurl(field(rs.getString("citationguidelineurl"), info));
|
||||
ds.setQualitymanagementkind(field(rs.getString("qualitymanagementkind"), info));
|
||||
|
||||
ds.setPidsystems(field(rs.getString("pidsystems"), info));
|
||||
ds.setCertificates(field(rs.getString("certificates"), info));
|
||||
ds.setPolicies(new ArrayList<>()); // The sql query returns an empty array
|
||||
|
@ -299,13 +288,18 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
journal(
|
||||
rs.getString("officialname"), rs.getString("issnPrinted"), rs.getString("issnOnline"),
|
||||
rs.getString("issnLinking"), info)); // Journal
|
||||
ds.setDataInfo(info);
|
||||
ds.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
|
||||
ds.setResearchentitytypes(listValues(rs.getArray("researchentitytypes")));
|
||||
ds.setJurisdiction(prepareQualifierSplitting(rs.getString("jurisdiction")));
|
||||
ds.setThematic(rs.getBoolean("thematic"));
|
||||
ds.setKnowledgegraph(rs.getBoolean("knowledgegraph"));
|
||||
ds.setContentpolicies(prepareListOfQualifiers(rs.getArray("contentpolicies")));
|
||||
ds.setSubmissionpolicyurl(rs.getString("submissionpolicyurl"));
|
||||
ds.setPreservationpolicyurl(rs.getString("preservationpolicyurl"));
|
||||
ds.setResearchproductaccesspolicies(listValues(rs.getArray("researchproductaccesspolicies")));
|
||||
ds
|
||||
.setResearchproductmetadataaccesspolicies(
|
||||
listValues(rs.getArray("researchproductmetadataaccesspolicies")));
|
||||
|
||||
ds.setConsenttermsofuse(rs.getBoolean("consenttermsofuse"));
|
||||
ds.setFulltextdownload(rs.getBoolean("fulltextdownload"));
|
||||
ds
|
||||
|
@ -313,8 +307,18 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
Optional
|
||||
.ofNullable(
|
||||
rs.getDate("consenttermsofusedate"))
|
||||
.map(c -> c.toString())
|
||||
.map(java.sql.Date::toString)
|
||||
.orElse(null));
|
||||
ds
|
||||
.setLastconsenttermsofusedate(
|
||||
Optional
|
||||
.ofNullable(
|
||||
rs.getDate("lastconsenttermsofusedate"))
|
||||
.map(java.sql.Date::toString)
|
||||
.orElse(null));
|
||||
|
||||
ds.setDataInfo(info);
|
||||
ds.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
|
||||
return Arrays.asList(ds);
|
||||
} catch (final Exception e) {
|
||||
|
@ -425,11 +429,11 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
}
|
||||
}
|
||||
|
||||
public List<Oaf> processDatasourceOrganization(final ResultSet rs) {
|
||||
public List<Oaf> processServiceOrganization(final ResultSet rs) {
|
||||
try {
|
||||
final DataInfo info = prepareDataInfo(rs);
|
||||
final String orgId = createOpenaireId(20, rs.getString("organization"), true);
|
||||
final String dsId = createOpenaireId(10, rs.getString("datasource"), true);
|
||||
final String dsId = createOpenaireId(10, rs.getString("service"), true);
|
||||
final List<KeyValue> collectedFrom = listKeyValues(
|
||||
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
|
||||
|
||||
|
@ -603,6 +607,32 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
String.format("%.3f", trust));
|
||||
}
|
||||
|
||||
private List<KeyValue> prepareCollectedfrom(Array values) throws SQLException {
|
||||
if (Objects.isNull(values)) {
|
||||
return null;
|
||||
}
|
||||
return Arrays
|
||||
.stream((String[]) values.getArray())
|
||||
.filter(Objects::nonNull)
|
||||
.distinct()
|
||||
.map(s -> keyValueSplitting(s, "@@@"))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static KeyValue keyValueSplitting(final String s, String separator) {
|
||||
if (StringUtils.isBlank(s)) {
|
||||
return null;
|
||||
}
|
||||
final String[] arr = s.split(separator);
|
||||
if (arr.length != 2) {
|
||||
return null;
|
||||
}
|
||||
KeyValue kv = new KeyValue();
|
||||
kv.setKey(createOpenaireId(10, arr[0], true));
|
||||
kv.setValue(arr[1]);
|
||||
return kv;
|
||||
}
|
||||
|
||||
private Qualifier prepareQualifierSplitting(final String s) {
|
||||
if (StringUtils.isBlank(s)) {
|
||||
return null;
|
||||
|
|
|
@ -30,6 +30,11 @@
|
|||
<value></value>
|
||||
<description>a blacklist of nsprefixes (comma separeted)</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>reuseContent</name>
|
||||
<value>false</value>
|
||||
<description>reuse content in the aggregator database</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
|
@ -85,12 +90,20 @@
|
|||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="ImportDB"/>
|
||||
<start to="reuse_db"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<decision name="reuse_db">
|
||||
<switch>
|
||||
<case to="ImportDB">${wf:conf('reuseContent') eq false}</case>
|
||||
<case to="GenerateEntities">${wf:conf('reuseContent') eq true}</case>
|
||||
<default to="ImportDB"/>
|
||||
</switch>
|
||||
</decision>
|
||||
|
||||
<action name="ImportDB">
|
||||
<java>
|
||||
<prepare>
|
||||
|
@ -102,6 +115,7 @@
|
|||
<arg>--postgresUser</arg><arg>${postgresUser}</arg>
|
||||
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
|
||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||
<arg>--action</arg><arg>openaire</arg>
|
||||
<arg>--dbschema</arg><arg>${dbSchema}</arg>
|
||||
<arg>--nsPrefixBlacklist</arg><arg>${nsPrefixBlacklist}</arg>
|
||||
</java>
|
||||
|
@ -124,6 +138,55 @@
|
|||
<arg>--action</arg><arg>claims</arg>
|
||||
<arg>--nsPrefixBlacklist</arg><arg>${nsPrefixBlacklist}</arg>
|
||||
</java>
|
||||
<ok to="GenerateEntities"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="GenerateEntities">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>GenerateEntities</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory ${sparkExecutorMemory}
|
||||
--executor-cores ${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePaths</arg><arg>${contentPath}/db_records,${contentPath}/db_claims</arg>
|
||||
<arg>--targetPath</arg><arg>${workingDir}/entities</arg>
|
||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||
<arg>--shouldHashId</arg><arg>true</arg>
|
||||
</spark>
|
||||
<ok to="GenerateGraph"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="GenerateGraph">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>GenerateGraph</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.raw.DispatchEntitiesApplication</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory ${sparkExecutorMemory}
|
||||
--executor-cores ${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=7680
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/entities</arg>
|
||||
<arg>--graphRawPath</arg><arg>${workingDir}/graph_aggregator</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
|
|
@ -27,7 +27,7 @@ SELECT
|
|||
'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
|
||||
array_remove(array_agg(DISTINCT i.pid || '###' || i.issuertype || '@@@' || i.issuertype), NULL) AS pid
|
||||
FROM dsm_organizations o
|
||||
LEFT OUTER JOIN dsm_datasources d ON (d.id = o.collectedfrom)
|
||||
LEFT OUTER JOIN dsm_services d ON (d.id = o.collectedfrom)
|
||||
LEFT OUTER JOIN dsm_organizationpids p ON (p.organization = o.id)
|
||||
LEFT OUTER JOIN dsm_identities i ON (i.pid = p.pid)
|
||||
GROUP BY
|
||||
|
|
|
@ -16,4 +16,4 @@ SELECT
|
|||
|
||||
FROM project_organization po
|
||||
LEFT OUTER JOIN projects p ON (p.id = po.project)
|
||||
LEFT OUTER JOIN dsm_datasources dc ON (dc.id = p.collectedfrom);
|
||||
LEFT OUTER JOIN dsm_services dc ON (dc.id = p.collectedfrom);
|
|
@ -42,7 +42,7 @@ SELECT
|
|||
LEFT OUTER JOIN projectpids pp ON (pp.project = p.id)
|
||||
LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid)
|
||||
|
||||
LEFT OUTER JOIN dsm_datasources dc ON (dc.id = p.collectedfrom)
|
||||
LEFT OUTER JOIN dsm_services dc ON (dc.id = p.collectedfrom)
|
||||
|
||||
LEFT OUTER JOIN project_fundingpath pf ON (pf.project = p.id)
|
||||
LEFT OUTER JOIN fundingpaths fp ON (fp.id = pf.funding)
|
||||
|
|
|
@ -40,7 +40,7 @@ SELECT
|
|||
LEFT OUTER JOIN projectpids pp ON (pp.project = p.id)
|
||||
LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid)
|
||||
|
||||
LEFT OUTER JOIN dsm_datasources dc ON (dc.id = p.collectedfrom)
|
||||
LEFT OUTER JOIN dsm_services dc ON (dc.id = p.collectedfrom)
|
||||
|
||||
LEFT OUTER JOIN project_fundingpath pf ON (pf.project = p.id)
|
||||
LEFT OUTER JOIN fundingpaths fp ON (fp.id = pf.funding)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
SELECT
|
||||
dor.datasource AS datasource,
|
||||
dor.service AS service,
|
||||
dor.organization AS organization,
|
||||
NULL AS startdate,
|
||||
NULL AS enddate,
|
||||
|
@ -11,6 +11,6 @@ SELECT
|
|||
dc.officialname AS collectedfromname,
|
||||
'providedBy@@@dnet:datasources_organizations_typologies' AS semantics,
|
||||
d.provenanceaction || '@@@dnet:provenanceActions' AS provenanceaction
|
||||
FROM dsm_datasource_organization dor
|
||||
LEFT OUTER JOIN dsm_datasources d ON (dor.datasource = d.id)
|
||||
LEFT OUTER JOIN dsm_datasources dc ON (dc.id = d.collectedfrom);
|
||||
FROM dsm_service_organization dor
|
||||
LEFT OUTER JOIN dsm_services d ON (dor.service = d.id)
|
||||
LEFT OUTER JOIN dsm_services dc ON (dc.id = d.collectedfrom);
|
|
@ -1,6 +1,7 @@
|
|||
SELECT
|
||||
d.id AS datasourceid,
|
||||
d.id || array_agg(distinct di.pid) AS identities,
|
||||
d.id AS id,
|
||||
array_remove(d.id || array_agg(distinct CASE WHEN dp.pid like 'piwik%' THEN di.pid ELSE NULL END) || array_agg(distinct dds.duplicate), NULL) AS originalid,
|
||||
array_remove(array_agg(distinct CASE WHEN di.pid NOT LIKE 'piwik%' THEN di.pid||'###'||di.issuertype||'@@@'||'dnet:pid_types' ELSE NULL END), NULL) as pid,
|
||||
d.officialname AS officialname,
|
||||
d.englishname AS englishname,
|
||||
d.contactemail AS contactemail,
|
||||
|
@ -9,8 +10,8 @@ SELECT
|
|||
THEN
|
||||
'openaire-cris_1.1@@@dnet:datasourceCompatibilityLevel'
|
||||
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire4.0'])
|
||||
THEN
|
||||
'openaire4.0@@@dnet:datasourceCompatibilityLevel'
|
||||
THEN
|
||||
'openaire4.0@@@dnet:datasourceCompatibilityLevel'
|
||||
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['driver', 'openaire2.0'])
|
||||
THEN
|
||||
'driver-openaire2.0@@@dnet:datasourceCompatibilityLevel'
|
||||
|
@ -40,25 +41,29 @@ SELECT
|
|||
END AS openairecompatibility,
|
||||
d.websiteurl AS websiteurl,
|
||||
d.logourl AS logourl,
|
||||
array_agg(DISTINCT CASE WHEN a.protocol = 'oai' and last_aggregation_date is not null THEN a.baseurl ELSE NULL END) AS accessinfopackage,
|
||||
array_remove(array_agg(DISTINCT CASE WHEN a.protocol = 'oai' and last_aggregation_date is not null THEN a.baseurl ELSE NULL END), NULL) AS accessinfopackage,
|
||||
d.latitude AS latitude,
|
||||
d.longitude AS longitude,
|
||||
d.namespaceprefix AS namespaceprefix,
|
||||
NULL AS odnumberofitems,
|
||||
NULL AS odnumberofitemsdate,
|
||||
|
||||
(SELECT array_agg(s|| '###keywords@@@dnet:subject_classification_typologies')
|
||||
FROM UNNEST(
|
||||
ARRAY(
|
||||
SELECT trim(s)
|
||||
FROM unnest(string_to_array(d.subjects, '@@')) AS s)) AS s) AS subjects,
|
||||
FROM unnest(string_to_array(d.subjects, '@@')) AS s)) AS s) AS subjects,
|
||||
|
||||
d.description AS description,
|
||||
NULL AS odpolicies,
|
||||
ARRAY(SELECT trim(s)
|
||||
FROM unnest(string_to_array(d.languages, ',')) AS s) AS odlanguages,
|
||||
ARRAY(SELECT trim(s)
|
||||
FROM unnest(string_to_array(d.od_contenttypes, '-')) AS s) AS odcontenttypes,
|
||||
array_remove(ARRAY(SELECT trim(s)
|
||||
FROM unnest(string_to_array(regexp_replace(d.languages, '{|}|"', '', 'g'), ',')) AS s), '{}') AS odlanguages,
|
||||
array_remove(ARRAY(SELECT trim(s)
|
||||
FROM unnest(string_to_array(regexp_replace(d.languages, '{|}|"', '', 'g'), ',')) AS s), '{}') AS languages,
|
||||
-- Term provided only by OpenDOAR:
|
||||
-- probably updating the TR it could be replaced by research_entity_types[]
|
||||
-- But a study on the vocabulary terms is needed
|
||||
-- REMOVED: ARRAY(SELECT trim(s) FROM unnest(string_to_array(d.od_contenttypes, '-')) AS s) AS odcontenttypes,
|
||||
|
||||
false AS inferred,
|
||||
false AS deletedbyinference,
|
||||
0.9 AS trust,
|
||||
|
@ -69,39 +74,59 @@ SELECT
|
|||
d.releasestartdate AS releasestartdate,
|
||||
d.releaseenddate AS releaseenddate,
|
||||
d.missionstatementurl AS missionstatementurl,
|
||||
d.dataprovider AS dataprovider,
|
||||
d.serviceprovider AS serviceprovider,
|
||||
-- the following 2 fields (provided by re3data) have been replaced by research_entity_types[]
|
||||
-- VALUE 'Research Data' : d.dataprovider AS dataprovider,
|
||||
-- VALUE 'Services' : d.serviceprovider AS serviceprovider,
|
||||
d.databaseaccesstype AS databaseaccesstype,
|
||||
d.datauploadtype AS datauploadtype,
|
||||
d.databaseaccessrestriction AS databaseaccessrestriction,
|
||||
d.datauploadrestriction AS datauploadrestriction,
|
||||
d.versioning AS versioning,
|
||||
-- REPLACED BY version_control : d.versioning AS versioning,
|
||||
d.version_control AS versioning,
|
||||
d.version_control AS versioncontrol,
|
||||
d.citationguidelineurl AS citationguidelineurl,
|
||||
d.qualitymanagementkind AS qualitymanagementkind,
|
||||
d.pidsystems AS pidsystems,
|
||||
array_to_string(array_agg(distinct dps.scheme), ' ') AS pidsystems,
|
||||
d.certificates AS certificates,
|
||||
ARRAY[]::text[] AS policies,
|
||||
dc.id AS collectedfromid,
|
||||
dc.officialname AS collectedfromname,
|
||||
d.typology||'@@@dnet:datasource_typologies' AS datasourcetype,
|
||||
d.typology||'@@@dnet:datasource_typologies_ui' AS datasourcetypeui,
|
||||
'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
|
||||
array_remove(
|
||||
array(
|
||||
select distinct cf
|
||||
from unnest(
|
||||
dc.id||'@@@'||dc.officialname || array_agg(distinct dds_cf.id||'@@@'||dds_cf.officialname)
|
||||
) as cf),
|
||||
NULL) AS collectedfrom,
|
||||
d._typology_to_remove_||'@@@dnet:datasource_typologies' AS datasourcetype,
|
||||
d._typology_to_remove_||'@@@dnet:datasource_typologies_ui' AS datasourcetypeui,
|
||||
d.eosc_type||'@@@dnet:eosc_types' AS eosctype,
|
||||
d.eosc_datasource_type||'@@@dnet:eosc_datasource_types' AS eoscdatasourcetype,
|
||||
d.issn AS issnPrinted,
|
||||
d.eissn AS issnOnline,
|
||||
d.lissn AS issnLinking,
|
||||
d.research_entity_types AS researchentitytypes,
|
||||
d.consenttermsofuse AS consenttermsofuse,
|
||||
d.fulltextdownload AS fulltextdownload,
|
||||
d.consenttermsofusedate AS consenttermsofusedate,
|
||||
de.jurisdiction||'@@@eosc:jurisdictions' AS jurisdiction,
|
||||
de.thematic AS thematic,
|
||||
de.knowledge_graph AS knowledgegraph,
|
||||
array(select unnest(de.content_policies)||'@@@eosc:contentpolicies') AS contentpolicies
|
||||
d.lastconsenttermsofusedate AS lastconsenttermsofusedate,
|
||||
d.jurisdiction||'@@@eosc:jurisdictions' AS jurisdiction,
|
||||
d.thematic AS thematic,
|
||||
array(select unnest(d.content_policies)||'@@@eosc:contentpolicies') AS contentpolicies,
|
||||
nullif(trim(d.submission_policy_url), '') AS submissionpolicyurl,
|
||||
nullif(trim(d.preservation_policy_url), '') AS preservationpolicyurl,
|
||||
array_remove(d.research_product_access_policies, '') AS researchproductaccesspolicies,
|
||||
array_remove(d.research_product_metadata_access_policies, '') AS researchproductmetadataaccesspolicies
|
||||
|
||||
FROM dsm_datasources d
|
||||
LEFT OUTER JOIN dsm_datasources_eosc de on (d.id = de.id)
|
||||
LEFT OUTER JOIN dsm_datasources dc on (d.collectedfrom = dc.id)
|
||||
LEFT OUTER JOIN dsm_api a ON (d.id = a.datasource)
|
||||
LEFT OUTER JOIN dsm_datasourcepids di ON (d.id = di.datasource)
|
||||
FROM dsm_services d
|
||||
LEFT OUTER JOIN dsm_services dc on (d.collectedfrom = dc.id)
|
||||
LEFT OUTER JOIN dsm_api a ON (d.id = a.service)
|
||||
LEFT OUTER JOIN dsm_servicepids dp ON (d.id = dp.service)
|
||||
LEFT OUTER JOIN dsm_identities di ON (dp.pid = di.pid)
|
||||
LEFT OUTER JOIN dsm_dedup_services dds ON (d.id = dds.id)
|
||||
LEFT OUTER JOIN dsm_services dds_dup ON (dds.duplicate = dds_dup.id)
|
||||
LEFT OUTER JOIN dsm_services dds_cf ON (dds_dup.collectedfrom = dds_cf.id)
|
||||
LEFT OUTER JOIN dsm_pid_systems dps ON (d.id = dps.service)
|
||||
|
||||
WHERE
|
||||
d.dedup_main_service = true
|
||||
|
||||
GROUP BY
|
||||
d.id,
|
||||
|
@ -119,23 +144,27 @@ GROUP BY
|
|||
d.releasestartdate,
|
||||
d.releaseenddate,
|
||||
d.missionstatementurl,
|
||||
d.dataprovider,
|
||||
d.serviceprovider,
|
||||
-- TODO REMOVED ???: d.dataprovider,
|
||||
-- TODO REMOVED ???: d.serviceprovider,
|
||||
d.databaseaccesstype,
|
||||
d.datauploadtype,
|
||||
d.databaseaccessrestriction,
|
||||
d.datauploadrestriction,
|
||||
d.versioning,
|
||||
-- REPLACED BY version_control : d.versioning,
|
||||
d.version_control,
|
||||
d.citationguidelineurl,
|
||||
d.qualitymanagementkind,
|
||||
d.pidsystems,
|
||||
-- REMOVED: d.qualitymanagementkind,
|
||||
d.certificates,
|
||||
dc.id,
|
||||
dc.officialname,
|
||||
d.issn,
|
||||
d.eissn,
|
||||
d.lissn,
|
||||
de.jurisdiction,
|
||||
de.thematic,
|
||||
de.knowledge_graph,
|
||||
de.content_policies
|
||||
d.jurisdiction,
|
||||
d.thematic,
|
||||
-- REMOVED ???: de.knowledge_graph,
|
||||
d.content_policies,
|
||||
d.submission_policy_url,
|
||||
d.preservation_policy_url,
|
||||
d.research_product_access_policies,
|
||||
d.research_product_metadata_access_policies
|
|
@ -12,8 +12,11 @@ import java.sql.Array;
|
|||
import java.sql.Date;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
@ -28,12 +31,7 @@ import com.fasterxml.jackson.core.type.TypeReference;
|
|||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
|
@ -63,22 +61,32 @@ public class MigrateDbEntitiesApplicationTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testProcessDatasource() throws Exception {
|
||||
final List<TypedField> fields = prepareMocks("datasources_resultset_entry.json");
|
||||
public void testProcessService() throws Exception {
|
||||
final List<TypedField> fields = prepareMocks("services_resultset_entry.json");
|
||||
|
||||
final List<Oaf> list = app.processDatasource(rs);
|
||||
final List<Oaf> list = app.processService(rs);
|
||||
assertEquals(1, list.size());
|
||||
verifyMocks(fields);
|
||||
|
||||
final Datasource ds = (Datasource) list.get(0);
|
||||
assertValidId(ds.getId());
|
||||
assertValidId(ds.getCollectedfrom().get(0).getKey());
|
||||
ds
|
||||
.getCollectedfrom()
|
||||
.stream()
|
||||
.map(KeyValue::getKey)
|
||||
.forEach(dsId -> assertValidId(dsId));
|
||||
|
||||
assertEquals(1, ds.getPid().size());
|
||||
assertEquals("r3d100010218", ds.getPid().get(0).getValue());
|
||||
assertEquals("re3data", ds.getPid().get(0).getQualifier().getClassid());
|
||||
assertEquals("dnet:pid_types", ds.getPid().get(0).getQualifier().getSchemeid());
|
||||
|
||||
assertEquals(getValueAsString("officialname", fields), ds.getOfficialname().getValue());
|
||||
assertEquals(getValueAsString("englishname", fields), ds.getEnglishname().getValue());
|
||||
assertEquals(getValueAsString("contactemail", fields), ds.getContactemail().getValue());
|
||||
assertEquals(getValueAsString("websiteurl", fields), ds.getWebsiteurl().getValue());
|
||||
assertEquals(getValueAsString("logourl", fields), ds.getLogourl());
|
||||
assertEquals(getValueAsString("contactemail", fields), ds.getContactemail().getValue());
|
||||
assertEquals(getValueAsString("namespaceprefix", fields), ds.getNamespaceprefix().getValue());
|
||||
assertEquals(getValueAsString("collectedfromname", fields), ds.getCollectedfrom().get(0).getValue());
|
||||
assertEquals(getValueAsString("officialname", fields), ds.getJournal().getName());
|
||||
assertEquals(getValueAsString("issnPrinted", fields), ds.getJournal().getIssnPrinted());
|
||||
assertEquals(getValueAsString("issnOnline", fields), ds.getJournal().getIssnOnline());
|
||||
|
@ -90,19 +98,98 @@ public class MigrateDbEntitiesApplicationTest {
|
|||
assertEquals("pubsrepository::journal", ds.getDatasourcetypeui().getClassid());
|
||||
assertEquals("dnet:datasource_typologies_ui", ds.getDatasourcetypeui().getSchemeid());
|
||||
|
||||
assertEquals("Data Source", ds.getEosctype().getClassid());
|
||||
assertEquals("Data Source", ds.getEosctype().getClassname());
|
||||
assertEquals("dnet:eosc_types", ds.getEosctype().getSchemeid());
|
||||
assertEquals("dnet:eosc_types", ds.getEosctype().getSchemename());
|
||||
|
||||
assertEquals("Journal archive", ds.getEoscdatasourcetype().getClassid());
|
||||
assertEquals("Journal archive", ds.getEoscdatasourcetype().getClassname());
|
||||
assertEquals("dnet:eosc_datasource_types", ds.getEoscdatasourcetype().getSchemeid());
|
||||
assertEquals("dnet:eosc_datasource_types", ds.getEoscdatasourcetype().getSchemename());
|
||||
|
||||
assertEquals("openaire4.0", ds.getOpenairecompatibility().getClassid());
|
||||
assertEquals("openaire4.0", ds.getOpenairecompatibility().getClassname());
|
||||
assertEquals("dnet:datasourceCompatibilityLevel", ds.getOpenairecompatibility().getSchemeid());
|
||||
assertEquals("dnet:datasourceCompatibilityLevel", ds.getOpenairecompatibility().getSchemename());
|
||||
|
||||
assertEquals(getValueAsDouble("latitude", fields).toString(), ds.getLatitude().getValue());
|
||||
assertEquals(getValueAsDouble("longitude", fields).toString(), ds.getLongitude().getValue());
|
||||
assertEquals(getValueAsString("dateofvalidation", fields), ds.getDateofvalidation());
|
||||
|
||||
assertEquals(getValueAsString("description", fields), ds.getDescription().getValue());
|
||||
|
||||
// TODO assertEquals(getValueAsString("subjects", fields), ds.getSubjects());
|
||||
|
||||
assertEquals("0.0", ds.getOdnumberofitems().getValue());
|
||||
assertEquals(getValueAsString("odnumberofitemsdate", fields), ds.getOdnumberofitemsdate());
|
||||
assertEquals(getValueAsString("odpolicies", fields), ds.getOdpolicies());
|
||||
|
||||
assertEquals(
|
||||
getValueAsList("odlanguages", fields),
|
||||
ds.getOdlanguages().stream().map(Field::getValue).collect(Collectors.toList()));
|
||||
assertEquals(getValueAsList("languages", fields), ds.getLanguages());
|
||||
assertEquals(
|
||||
getValueAsList("accessinfopackage", fields),
|
||||
ds.getAccessinfopackage().stream().map(Field::getValue).collect(Collectors.toList()));
|
||||
assertEquals(getValueAsString("releasestartdate", fields), ds.getReleasestartdate());
|
||||
assertEquals(getValueAsString("releaseenddate", fields), ds.getReleasestartdate());
|
||||
assertEquals(getValueAsString("missionstatementurl", fields), ds.getMissionstatementurl());
|
||||
|
||||
assertEquals(false, ds.getDataprovider().getValue());
|
||||
assertEquals(false, ds.getServiceprovider().getValue());
|
||||
|
||||
assertEquals(getValueAsString("databaseaccesstype", fields), ds.getDatabaseaccesstype());
|
||||
assertEquals(getValueAsString("datauploadtype", fields), ds.getDatauploadtype());
|
||||
assertEquals(getValueAsString("databaseaccessrestriction", fields), ds.getDatabaseaccessrestriction());
|
||||
assertEquals(getValueAsString("datauploadrestriction", fields), ds.getDatauploadrestriction());
|
||||
|
||||
assertEquals(false, ds.getVersioning().getValue());
|
||||
assertEquals(false, ds.getVersioncontrol());
|
||||
|
||||
assertEquals(getValueAsString("citationguidelineurl", fields), ds.getCitationguidelineurl());
|
||||
assertEquals(getValueAsString("pidsystems", fields), ds.getPidsystems());
|
||||
assertEquals(getValueAsString("certificates", fields), ds.getCertificates());
|
||||
|
||||
assertEquals(getValueAsList("researchentitytypes", fields), ds.getResearchentitytypes());
|
||||
|
||||
assertEquals("National", ds.getJurisdiction().getClassid());
|
||||
assertEquals("eosc:jurisdictions", ds.getJurisdiction().getSchemeid());
|
||||
|
||||
assertTrue(ds.getThematic());
|
||||
assertTrue(ds.getKnowledgegraph());
|
||||
|
||||
assertEquals(1, ds.getContentpolicies().size());
|
||||
assertEquals("Journal article", ds.getContentpolicies().get(0).getClassid());
|
||||
assertEquals("eosc:contentpolicies", ds.getContentpolicies().get(0).getSchemeid());
|
||||
HashSet<String> cpSchemeId = ds
|
||||
.getContentpolicies()
|
||||
.stream()
|
||||
.map(Qualifier::getSchemeid)
|
||||
.collect(Collectors.toCollection(HashSet::new));
|
||||
assertTrue(cpSchemeId.size() == 1);
|
||||
assertTrue(cpSchemeId.contains("eosc:contentpolicies"));
|
||||
HashSet<String> cpSchemeName = ds
|
||||
.getContentpolicies()
|
||||
.stream()
|
||||
.map(Qualifier::getSchemename)
|
||||
.collect(Collectors.toCollection(HashSet::new));
|
||||
assertTrue(cpSchemeName.size() == 1);
|
||||
assertTrue(cpSchemeName.contains("eosc:contentpolicies"));
|
||||
assertEquals(2, ds.getContentpolicies().size());
|
||||
assertEquals("Taxonomic classification", ds.getContentpolicies().get(0).getClassid());
|
||||
assertEquals("Resource collection", ds.getContentpolicies().get(1).getClassid());
|
||||
|
||||
assertEquals(getValueAsString("submissionpolicyurl", fields), ds.getSubmissionpolicyurl());
|
||||
assertEquals(getValueAsString("preservationpolicyurl", fields), ds.getPreservationpolicyurl());
|
||||
|
||||
assertEquals(
|
||||
getValueAsList("researchproductaccesspolicies", fields),
|
||||
ds.getResearchproductaccesspolicies());
|
||||
assertEquals(
|
||||
getValueAsList("researchproductmetadataaccesspolicies", fields),
|
||||
ds.getResearchproductmetadataaccesspolicies());
|
||||
|
||||
assertEquals(true, ds.getConsenttermsofuse());
|
||||
assertEquals(true, ds.getFulltextdownload());
|
||||
assertEquals("2022-03-11", ds.getConsenttermsofusedate());
|
||||
assertEquals("2022-03-11", ds.getLastconsenttermsofusedate());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -154,7 +241,7 @@ public class MigrateDbEntitiesApplicationTest {
|
|||
public void testProcessDatasourceOrganization() throws Exception {
|
||||
final List<TypedField> fields = prepareMocks("datasourceorganization_resultset_entry.json");
|
||||
|
||||
final List<Oaf> list = app.processDatasourceOrganization(rs);
|
||||
final List<Oaf> list = app.processServiceOrganization(rs);
|
||||
|
||||
assertEquals(2, list.size());
|
||||
verifyMocks(fields);
|
||||
|
@ -356,18 +443,31 @@ public class MigrateDbEntitiesApplicationTest {
|
|||
}
|
||||
|
||||
private Float getValueAsFloat(final String name, final List<TypedField> fields) {
|
||||
return new Float(getValueAs(name, fields).toString());
|
||||
final Object value = getValueAs(name, fields);
|
||||
return value != null ? new Float(value.toString()) : null;
|
||||
}
|
||||
|
||||
private Double getValueAsDouble(final String name, final List<TypedField> fields) {
|
||||
final Object value = getValueAs(name, fields);
|
||||
return value != null ? new Double(value.toString()) : null;
|
||||
}
|
||||
|
||||
private Integer getValueAsInt(final String name, final List<TypedField> fields) {
|
||||
final Object value = getValueAs(name, fields);
|
||||
return value != null ? new Integer(value.toString()) : null;
|
||||
}
|
||||
|
||||
private <T> T getValueAs(final String name, final List<TypedField> fields) {
|
||||
return fields
|
||||
final Optional<T> field = fields
|
||||
.stream()
|
||||
.filter(f -> f.getField().equals(name))
|
||||
.map(TypedField::getValue)
|
||||
.filter(Objects::nonNull)
|
||||
.map(o -> (T) o)
|
||||
.findFirst()
|
||||
.get();
|
||||
.map(TypedField::getValue)
|
||||
.map(o -> (T) o);
|
||||
if (!field.isPresent()) {
|
||||
return null;
|
||||
}
|
||||
return field.get();
|
||||
}
|
||||
|
||||
private List<String> getValueAsList(final String name, final List<TypedField> fields) {
|
||||
|
|
|
@ -5,14 +5,47 @@
|
|||
"value": "274269ac6f3b::2579-5449"
|
||||
},
|
||||
{
|
||||
"field": "identities",
|
||||
"field": "originalid",
|
||||
"type": "array",
|
||||
"value": [
|
||||
"274269ac6f3b::2579-5449",
|
||||
"fairsharing_::1562",
|
||||
"piwik:13",
|
||||
null
|
||||
null,
|
||||
"re3data_____::r3d100010213"
|
||||
]
|
||||
},
|
||||
{
|
||||
"field": "pid",
|
||||
"type": "array",
|
||||
"value": [
|
||||
"r3d100010218###re3data@@@dnet:pid_types"
|
||||
]
|
||||
},
|
||||
{
|
||||
"field": "datasourcetype",
|
||||
"type": "string",
|
||||
"value": "pubsrepository::journal@@@dnet:datasource_typologies"
|
||||
},
|
||||
{
|
||||
"field": "datasourcetypeui",
|
||||
"type": "string",
|
||||
"value": "pubsrepository::journal@@@dnet:datasource_typologies_ui"
|
||||
},
|
||||
{
|
||||
"field": "eosctype",
|
||||
"type": "string",
|
||||
"value": "Data Source@@@dnet:eosc_types"
|
||||
},
|
||||
{
|
||||
"field": "eoscdatasourcetype",
|
||||
"type": "string",
|
||||
"value": "Journal archive@@@dnet:eosc_datasource_types"
|
||||
},
|
||||
{
|
||||
"field": "openairecompatibility",
|
||||
"type": "string",
|
||||
"value": "openaire4.0@@@dnet:datasourceCompatibilityLevel"
|
||||
},
|
||||
{
|
||||
"field": "officialname",
|
||||
"type": "string",
|
||||
|
@ -23,16 +56,6 @@
|
|||
"type": "string",
|
||||
"value": "Jurnal Ilmiah Pendidikan Scholastic"
|
||||
},
|
||||
{
|
||||
"field": "contactemail",
|
||||
"type": "string",
|
||||
"value": "test@test.it"
|
||||
},
|
||||
{
|
||||
"field": "openairecompatibility",
|
||||
"type": "string",
|
||||
"value": "hostedBy@@@dnet:datasourceCompatibilityLevel"
|
||||
},
|
||||
{
|
||||
"field": "websiteurl",
|
||||
"type": "string",
|
||||
|
@ -44,11 +67,14 @@
|
|||
"value": null
|
||||
},
|
||||
{
|
||||
"field": "accessinfopackage",
|
||||
"type": "array",
|
||||
"value": [
|
||||
null
|
||||
]
|
||||
"field": "contactemail",
|
||||
"type": "string",
|
||||
"value": "test@test.it"
|
||||
},
|
||||
{
|
||||
"field": "namespaceprefix",
|
||||
"type": "string",
|
||||
"value": "ojs_25795449"
|
||||
},
|
||||
{
|
||||
"field": "latitude",
|
||||
|
@ -61,9 +87,19 @@
|
|||
"value": 0
|
||||
},
|
||||
{
|
||||
"field": "namespaceprefix",
|
||||
"field": "dateofvalidation",
|
||||
"type": "date",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"field": "description",
|
||||
"type": "string",
|
||||
"value": "ojs_25795449"
|
||||
"value": "veterinary medicine"
|
||||
},
|
||||
{
|
||||
"field": "subjects",
|
||||
"type": "array",
|
||||
"value": []
|
||||
},
|
||||
{
|
||||
"field": "odnumberofitems",
|
||||
|
@ -75,16 +111,6 @@
|
|||
"type": "date",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"field": "subjects",
|
||||
"type": "array",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"field": "description",
|
||||
"type": "string",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"field": "odpolicies",
|
||||
"type": "string",
|
||||
|
@ -93,44 +119,33 @@
|
|||
{
|
||||
"field": "odlanguages",
|
||||
"type": "array",
|
||||
"value": []
|
||||
},
|
||||
{
|
||||
"field": "odcontenttypes",
|
||||
"type": "array",
|
||||
"value": [
|
||||
"Journal articles"
|
||||
"English",
|
||||
"German",
|
||||
"French",
|
||||
"Danish",
|
||||
"Norwegian",
|
||||
"Swedish"
|
||||
]
|
||||
},
|
||||
{
|
||||
"field": "inferred",
|
||||
"type": "boolean",
|
||||
"value": false
|
||||
"field": "languages",
|
||||
"type": "array",
|
||||
"value": [
|
||||
"English",
|
||||
"German",
|
||||
"French",
|
||||
"Danish",
|
||||
"Norwegian",
|
||||
"Swedish"
|
||||
]
|
||||
},
|
||||
{
|
||||
"field": "deletedbyinference",
|
||||
"type": "boolean",
|
||||
"value": false
|
||||
},
|
||||
{
|
||||
"field": "trust",
|
||||
"type": "double",
|
||||
"value": 0.9
|
||||
},
|
||||
{
|
||||
"field": "inferenceprovenance",
|
||||
"type": "string",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"field": "dateofcollection",
|
||||
"type": "date",
|
||||
"value": "2020-01-21"
|
||||
},
|
||||
{
|
||||
"field": "dateofvalidation",
|
||||
"type": "date",
|
||||
"value": null
|
||||
"field": "accessinfopackage",
|
||||
"type": "array",
|
||||
"value": [
|
||||
"http://www.revista.vocesdelaeducacion.com.mx/index.php/index/oai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"field": "releasestartdate",
|
||||
|
@ -147,16 +162,6 @@
|
|||
"type": "string",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"field": "dataprovider",
|
||||
"type": "boolean",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"field": "serviceprovider",
|
||||
"type": "boolean",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"field": "databaseaccesstype",
|
||||
"type": "string",
|
||||
|
@ -183,12 +188,12 @@
|
|||
"value": null
|
||||
},
|
||||
{
|
||||
"field": "citationguidelineurl",
|
||||
"type": "string",
|
||||
"field": "versioncontrol",
|
||||
"type": "boolean",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"field": "qualitymanagementkind",
|
||||
"field": "citationguidelineurl",
|
||||
"type": "string",
|
||||
"value": null
|
||||
},
|
||||
|
@ -208,29 +213,38 @@
|
|||
"value": []
|
||||
},
|
||||
{
|
||||
"field": "collectedfromid",
|
||||
"type": "string",
|
||||
"value": "openaire____::SnVybmFsIEZha3VsdGFzIFNhc3RyYSBVbml2ZXJzaXRhcyBFa2FzYWt0aQ=="
|
||||
"field": "inferred",
|
||||
"type": "boolean",
|
||||
"value": false
|
||||
},
|
||||
{
|
||||
"field": "collectedfromname",
|
||||
"type": "string",
|
||||
"value": "Jurnal Fakultas Sastra Universitas Ekasakti"
|
||||
"field": "deletedbyinference",
|
||||
"type": "boolean",
|
||||
"value": false
|
||||
},
|
||||
{
|
||||
"field": "datasourcetype",
|
||||
"type": "string",
|
||||
"value": "pubsrepository::journal@@@dnet:datasource_typologies"
|
||||
"field": "trust",
|
||||
"type": "double",
|
||||
"value": 0.9
|
||||
},
|
||||
{
|
||||
"field": "datasourcetypeui",
|
||||
"field": "inferenceprovenance",
|
||||
"type": "string",
|
||||
"value": "pubsrepository::journal@@@dnet:datasource_typologies_ui"
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"field": "provenanceaction",
|
||||
"type": "not_used",
|
||||
"value": "sysimport:crosswalk:entityregistry@@@dnet:provenance_actions"
|
||||
"field": "dateofcollection",
|
||||
"type": "date",
|
||||
"value": "2020-01-21"
|
||||
},
|
||||
{
|
||||
"field": "collectedfrom",
|
||||
"type": "array",
|
||||
"value": [
|
||||
"openaire____::fairsharing@@@FAIRsharing.org",
|
||||
"openaire____::opendoar@@@OpenDOAR",
|
||||
"openaire____::re3data@@@Registry of Research Data Repository"
|
||||
]
|
||||
},
|
||||
{
|
||||
"field": "issnPrinted",
|
||||
|
@ -247,6 +261,20 @@
|
|||
"type": "string",
|
||||
"value": "2579-5447"
|
||||
},
|
||||
{
|
||||
"field": "researchentitytypes",
|
||||
"type": "array",
|
||||
"value": [
|
||||
"Research Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"field": "providedproducttypes",
|
||||
"type": "array",
|
||||
"value": [
|
||||
|
||||
]
|
||||
},
|
||||
{
|
||||
"field": "jurisdiction",
|
||||
"type": "string",
|
||||
|
@ -266,7 +294,32 @@
|
|||
"field": "contentpolicies",
|
||||
"type": "array",
|
||||
"value": [
|
||||
"Journal article@@@eosc:contentpolicies"
|
||||
"Taxonomic classification@@@eosc:contentpolicies",
|
||||
"Resource collection@@@eosc:contentpolicies"
|
||||
]
|
||||
},
|
||||
{
|
||||
"field": "submissionpolicyurl",
|
||||
"type": "string",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"field": "preservationpolicyurl",
|
||||
"type": "string",
|
||||
"value": "Permanent Archiving https://datadryad.org/stash/faq"
|
||||
},
|
||||
{
|
||||
"field": "researchproductaccesspolicies",
|
||||
"type": "array",
|
||||
"value": [
|
||||
"https://100percentit.com/legal/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"field": "researchproductmetadataaccesspolicies",
|
||||
"type": "array",
|
||||
"value": [
|
||||
"https://wenmr.science.uu.nl/conditions"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -283,5 +336,10 @@
|
|||
"field": "consenttermsofusedate",
|
||||
"type": "date",
|
||||
"value": "2022-03-11"
|
||||
},
|
||||
{
|
||||
"field": "lastconsenttermsofusedate",
|
||||
"type": "date",
|
||||
"value": "2022-03-11"
|
||||
}
|
||||
]
|
|
@ -535,6 +535,12 @@ public class XmlRecordFactory implements Serializable {
|
|||
if (ds.getDatasourcetypeui() != null) {
|
||||
metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", ds.getDatasourcetypeui()));
|
||||
}
|
||||
if (ds.getEosctype() != null) {
|
||||
metadata.add(XmlSerializationUtils.mapQualifier("eosctype", ds.getEosctype()));
|
||||
}
|
||||
if (ds.getEoscdatasourcetype() != null) {
|
||||
metadata.add(XmlSerializationUtils.mapQualifier("eoscdatasourcetype", ds.getEoscdatasourcetype()));
|
||||
}
|
||||
if (ds.getOpenairecompatibility() != null) {
|
||||
metadata
|
||||
.add(
|
||||
|
@ -583,6 +589,16 @@ public class XmlRecordFactory implements Serializable {
|
|||
metadata
|
||||
.add(XmlSerializationUtils.asXmlElement("description", ds.getDescription().getValue()));
|
||||
}
|
||||
if (ds.getSubjects() != null) {
|
||||
metadata
|
||||
.addAll(
|
||||
ds
|
||||
.getSubjects()
|
||||
.stream()
|
||||
.filter(Objects::nonNull)
|
||||
.map(sp -> XmlSerializationUtils.mapStructuredProperty("subjects", sp))
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
if (ds.getOdnumberofitems() != null) {
|
||||
metadata
|
||||
.add(
|
||||
|
@ -609,6 +625,16 @@ public class XmlRecordFactory implements Serializable {
|
|||
.map(c -> XmlSerializationUtils.asXmlElement("odlanguages", c.getValue()))
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
if (ds.getLanguages() != null) {
|
||||
metadata
|
||||
.addAll(
|
||||
ds
|
||||
.getLanguages()
|
||||
.stream()
|
||||
.filter(Objects::nonNull)
|
||||
.map(c -> XmlSerializationUtils.asXmlElement("languages", c))
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
if (ds.getOdcontenttypes() != null) {
|
||||
metadata
|
||||
.addAll(
|
||||
|
@ -689,18 +715,18 @@ public class XmlRecordFactory implements Serializable {
|
|||
XmlSerializationUtils
|
||||
.asXmlElement("versioning", ds.getVersioning().getValue().toString()));
|
||||
}
|
||||
if (ds.getVersioncontrol() != null) {
|
||||
metadata
|
||||
.add(
|
||||
XmlSerializationUtils
|
||||
.asXmlElement("versioncontrol", ds.getVersioncontrol().toString()));
|
||||
}
|
||||
if (ds.getCitationguidelineurl() != null) {
|
||||
metadata
|
||||
.add(
|
||||
XmlSerializationUtils
|
||||
.asXmlElement("citationguidelineurl", ds.getCitationguidelineurl().getValue()));
|
||||
}
|
||||
if (ds.getQualitymanagementkind() != null) {
|
||||
metadata
|
||||
.add(
|
||||
XmlSerializationUtils
|
||||
.asXmlElement("qualitymanagementkind", ds.getQualitymanagementkind().getValue()));
|
||||
}
|
||||
if (ds.getPidsystems() != null) {
|
||||
metadata
|
||||
.add(XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems().getValue()));
|
||||
|
@ -722,17 +748,24 @@ public class XmlRecordFactory implements Serializable {
|
|||
if (ds.getJournal() != null) {
|
||||
metadata.add(XmlSerializationUtils.mapJournal(ds.getJournal()));
|
||||
}
|
||||
if (ds.getSubjects() != null) {
|
||||
if (ds.getResearchentitytypes() != null) {
|
||||
metadata
|
||||
.addAll(
|
||||
ds
|
||||
.getSubjects()
|
||||
.getResearchentitytypes()
|
||||
.stream()
|
||||
.filter(Objects::nonNull)
|
||||
.map(sp -> XmlSerializationUtils.mapStructuredProperty("subjects", sp))
|
||||
.map(c -> XmlSerializationUtils.asXmlElement("researchentitytypes", c))
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
if (ds.getProvidedproducttypes() != null) {
|
||||
metadata
|
||||
.addAll(
|
||||
ds
|
||||
.getProvidedproducttypes()
|
||||
.stream()
|
||||
.map(c -> XmlSerializationUtils.asXmlElement("providedproducttypes", c))
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
if (ds.getJurisdiction() != null) {
|
||||
metadata.add(XmlSerializationUtils.mapQualifier("jurisdiction", ds.getJurisdiction()));
|
||||
}
|
||||
|
@ -741,11 +774,6 @@ public class XmlRecordFactory implements Serializable {
|
|||
metadata.add(XmlSerializationUtils.asXmlElement("thematic", ds.getThematic().toString()));
|
||||
}
|
||||
|
||||
if (ds.getKnowledgegraph() != null) {
|
||||
metadata
|
||||
.add(XmlSerializationUtils.asXmlElement("knowledgegraph", ds.getKnowledgegraph().toString()));
|
||||
}
|
||||
|
||||
if (ds.getContentpolicies() != null) {
|
||||
metadata
|
||||
.addAll(
|
||||
|
@ -756,7 +784,34 @@ public class XmlRecordFactory implements Serializable {
|
|||
.map(q -> XmlSerializationUtils.mapQualifier("contentpolicy", q))
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
if (ds.getSubmissionpolicyurl() != null) {
|
||||
metadata
|
||||
.add(XmlSerializationUtils.asXmlElement("submissionpolicyurl", ds.getSubmissionpolicyurl()));
|
||||
}
|
||||
if (ds.getPreservationpolicyurl() != null) {
|
||||
metadata
|
||||
.add(
|
||||
XmlSerializationUtils.asXmlElement("preservationpolicyurl", ds.getPreservationpolicyurl()));
|
||||
}
|
||||
if (ds.getResearchproductaccesspolicies() != null) {
|
||||
metadata
|
||||
.addAll(
|
||||
ds
|
||||
.getResearchproductaccesspolicies()
|
||||
.stream()
|
||||
.map(c -> XmlSerializationUtils.asXmlElement("researchproductaccesspolicies", c))
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
if (ds.getResearchproductmetadataaccesspolicies() != null) {
|
||||
metadata
|
||||
.addAll(
|
||||
ds
|
||||
.getResearchproductmetadataaccesspolicies()
|
||||
.stream()
|
||||
.map(
|
||||
c -> XmlSerializationUtils.asXmlElement("researchproductmetadataaccesspolicies", c))
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
break;
|
||||
case organization:
|
||||
final Organization o = (Organization) entity;
|
||||
|
|
|
@ -11,6 +11,7 @@ import java.util.List;
|
|||
import org.apache.commons.io.IOUtils;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.dom4j.Element;
|
||||
import org.dom4j.io.SAXReader;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
|
@ -142,7 +143,7 @@ public class XmlRecordFactoryTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testDatasource() throws IOException, DocumentException {
|
||||
public void testService() throws IOException, DocumentException {
|
||||
final ContextMapper contextMapper = new ContextMapper();
|
||||
|
||||
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
|
||||
|
@ -167,6 +168,14 @@ public class XmlRecordFactoryTest {
|
|||
assertEquals("true", doc.valueOf("//thematic"));
|
||||
assertEquals("Journal article", doc.valueOf("//contentpolicy/@classname"));
|
||||
assertEquals("Journal archive", doc.valueOf("//datasourcetypeui/@classname"));
|
||||
assertEquals("Data Source", doc.valueOf("//eosctype/@classname"));
|
||||
|
||||
final List pids = doc.selectNodes("//pid");
|
||||
assertEquals(1, pids.size());
|
||||
assertEquals("re3data", ((Element) pids.get(0)).attribute("classid").getValue());
|
||||
assertEquals(
|
||||
"Registry of research data repositories", ((Element) pids.get(0)).attribute("classname").getValue());
|
||||
assertEquals("dnet:pid_types", ((Element) pids.get(0)).attribute("schemeid").getValue());
|
||||
assertEquals("dnet:pid_types", ((Element) pids.get(0)).attribute("schemename").getValue());
|
||||
}
|
||||
}
|
||||
|
|
File diff suppressed because one or more lines are too long
2
pom.xml
2
pom.xml
|
@ -801,7 +801,7 @@
|
|||
<mockito-core.version>3.3.3</mockito-core.version>
|
||||
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
||||
<vtd.version>[2.12,3.0)</vtd.version>
|
||||
<dhp-schemas.version>[2.10.32]</dhp-schemas.version>
|
||||
<dhp-schemas.version>[2.12.0]</dhp-schemas.version>
|
||||
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
||||
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
||||
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
||||
|
|
Loading…
Reference in New Issue