2021-05-11 11:08:54 +02:00
|
|
|
|
|
|
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
|
|
|
|
|
|
|
import static org.junit.jupiter.api.Assertions.*;
|
|
|
|
|
|
|
|
import java.io.IOException;
|
|
|
|
import java.util.HashSet;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.stream.Collectors;
|
|
|
|
|
|
|
|
import org.apache.commons.io.IOUtils;
|
|
|
|
import org.junit.jupiter.api.Test;
|
|
|
|
|
|
|
|
import com.fasterxml.jackson.databind.DeserializationFeature;
|
|
|
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
|
|
|
|
|
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
2021-07-28 16:21:29 +02:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
|
|
import me.xuender.unidecode.Unidecode;
|
2021-05-11 11:08:54 +02:00
|
|
|
|
2021-08-11 12:13:22 +02:00
|
|
|
class OafMapperUtilsTest {
|
2021-05-11 11:08:54 +02:00
|
|
|
|
2021-05-14 10:58:12 +02:00
|
|
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
2021-05-11 11:08:54 +02:00
|
|
|
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
|
|
|
|
2021-07-28 16:21:29 +02:00
|
|
|
@Test
|
|
|
|
public void testUnidecode() {
|
|
|
|
|
|
|
|
assertEquals("Liu Ben Mu hiruzuSen tawa", Unidecode.decode("六本木ヒルズ森タワ"));
|
|
|
|
assertEquals("Nan Wu A Mi Tuo Fo", Unidecode.decode("南无阿弥陀佛"));
|
|
|
|
assertEquals("Yi Tiao Hui Zou Lu De Yu", Unidecode.decode("一条会走路的鱼"));
|
|
|
|
assertEquals("amidaniyorai", Unidecode.decode("あみだにょらい"));
|
|
|
|
assertEquals("T`owrk`iayi", Unidecode.decode("Թուրքիայի"));
|
|
|
|
assertEquals("Obzor tematiki", Unidecode.decode("Обзор тематики"));
|
|
|
|
assertEquals("GERMANSKIE IaZYKI", Unidecode.decode("ГЕРМАНСКИЕ ЯЗЫКИ"));
|
|
|
|
assertEquals("Diereunese tes ikanopoieses", Unidecode.decode("Διερεύνηση της ικανοποίησης"));
|
|
|
|
assertEquals("lqDy l'wly@", Unidecode.decode("القضايا الأولية"));
|
|
|
|
assertEquals("abc def ghi", Unidecode.decode("abc def ghi"));
|
|
|
|
}
|
|
|
|
|
2021-05-28 10:50:44 +02:00
|
|
|
@Test
|
2021-08-11 12:13:22 +02:00
|
|
|
void testDateValidation() {
|
2021-05-28 10:50:44 +02:00
|
|
|
|
2022-04-29 12:25:24 +02:00
|
|
|
assertNotNull(GraphCleaningFunctions.cleanDate("2016-05-07T12:41:19.202Z "));
|
|
|
|
assertNotNull(GraphCleaningFunctions.cleanDate("2020-09-10 11:08:52 "));
|
|
|
|
assertNotNull(GraphCleaningFunctions.cleanDate(" 2016-04-05"));
|
|
|
|
|
|
|
|
assertEquals("2016-04-05", GraphCleaningFunctions.cleanDate("2016 Apr 05"));
|
|
|
|
|
|
|
|
assertEquals("2009-05-08", GraphCleaningFunctions.cleanDate("May 8, 2009 5:57:51 PM"));
|
|
|
|
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct 7, 1970"));
|
|
|
|
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct 7, '70"));
|
|
|
|
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct. 7, 1970"));
|
|
|
|
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct. 7, 70"));
|
|
|
|
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon Jan 2 15:04:05 2006"));
|
|
|
|
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon Jan 2 15:04:05 MST 2006"));
|
|
|
|
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon Jan 02 15:04:05 -0700 2006"));
|
|
|
|
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Monday, 02-Jan-06 15:04:05 MST"));
|
|
|
|
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon, 02 Jan 2006 15:04:05 MST"));
|
|
|
|
assertEquals("2017-07-11", GraphCleaningFunctions.cleanDate("Tue, 11 Jul 2017 16:28:13 +0200 (CEST)"));
|
|
|
|
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon, 02 Jan 2006 15:04:05 -0700"));
|
|
|
|
assertEquals("2018-01-04", GraphCleaningFunctions.cleanDate("Thu, 4 Jan 2018 17:53:36 +0000"));
|
|
|
|
assertEquals("2015-08-10", GraphCleaningFunctions.cleanDate("Mon Aug 10 15:44:11 UTC+0100 2015"));
|
2021-06-11 16:53:01 +02:00
|
|
|
assertEquals(
|
|
|
|
"2015-07-03",
|
2022-04-29 12:25:24 +02:00
|
|
|
GraphCleaningFunctions.cleanDate("Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)"));
|
|
|
|
assertEquals("2012-09-17", GraphCleaningFunctions.cleanDate("September 17, 2012 10:09am"));
|
|
|
|
assertEquals("2012-09-17", GraphCleaningFunctions.cleanDate("September 17, 2012 at 10:09am PST-08"));
|
|
|
|
assertEquals("2012-09-17", GraphCleaningFunctions.cleanDate("September 17, 2012, 10:10:09"));
|
|
|
|
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("October 7, 1970"));
|
|
|
|
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("October 7th, 1970"));
|
|
|
|
assertEquals("2006-02-12", GraphCleaningFunctions.cleanDate("12 Feb 2006, 19:17"));
|
|
|
|
assertEquals("2006-02-12", GraphCleaningFunctions.cleanDate("12 Feb 2006 19:17"));
|
|
|
|
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("7 oct 70"));
|
|
|
|
assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("7 oct 1970"));
|
|
|
|
assertEquals("2013-02-03", GraphCleaningFunctions.cleanDate("03 February 2013"));
|
|
|
|
assertEquals("2013-07-01", GraphCleaningFunctions.cleanDate("1 July 2013"));
|
|
|
|
assertEquals("2013-02-03", GraphCleaningFunctions.cleanDate("2013-Feb-03"));
|
|
|
|
assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("3/31/2014"));
|
|
|
|
assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("03/31/2014"));
|
|
|
|
assertEquals("1971-08-21", GraphCleaningFunctions.cleanDate("08/21/71"));
|
|
|
|
assertEquals("1971-01-08", GraphCleaningFunctions.cleanDate("8/1/71"));
|
|
|
|
assertEquals("2014-08-04", GraphCleaningFunctions.cleanDate("4/8/2014 22:05"));
|
|
|
|
assertEquals("2014-08-04", GraphCleaningFunctions.cleanDate("04/08/2014 22:05"));
|
|
|
|
assertEquals("2014-08-04", GraphCleaningFunctions.cleanDate("4/8/14 22:05"));
|
|
|
|
assertEquals("2014-02-04", GraphCleaningFunctions.cleanDate("04/2/2014 03:00:51"));
|
|
|
|
assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 12:00:00 AM"));
|
|
|
|
assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 01:00:01 PM"));
|
|
|
|
assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 01:00 PM"));
|
|
|
|
assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 1:00 PM"));
|
|
|
|
assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 12:00 AM"));
|
|
|
|
assertEquals("2014-02-04", GraphCleaningFunctions.cleanDate("4/02/2014 03:00:51"));
|
|
|
|
assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("03/19/2012 10:11:59"));
|
|
|
|
assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("03/19/2012 10:11:59.3186369"));
|
|
|
|
assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("2014/3/31"));
|
|
|
|
assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("2014/03/31"));
|
|
|
|
assertEquals("2014-04-08", GraphCleaningFunctions.cleanDate("2014/4/8 22:05"));
|
|
|
|
assertEquals("2014-04-08", GraphCleaningFunctions.cleanDate("2014/04/08 22:05"));
|
|
|
|
assertEquals("2014-04-02", GraphCleaningFunctions.cleanDate("2014/04/2 03:00:51"));
|
|
|
|
assertEquals("2014-04-02", GraphCleaningFunctions.cleanDate("2014/4/02 03:00:51"));
|
|
|
|
assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("2012/03/19 10:11:59"));
|
|
|
|
assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("2012/03/19 10:11:59.3186369"));
|
|
|
|
assertEquals("2014-04-08", GraphCleaningFunctions.cleanDate("2014年04月08日"));
|
|
|
|
assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("2006-01-02T15:04:05+0000"));
|
|
|
|
assertEquals("2009-08-13", GraphCleaningFunctions.cleanDate("2009-08-12T22:15:09-07:00"));
|
|
|
|
assertEquals("2009-08-12", GraphCleaningFunctions.cleanDate("2009-08-12T22:15:09"));
|
|
|
|
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 17:24:37.3186369"));
|
|
|
|
assertEquals("2012-08-03", GraphCleaningFunctions.cleanDate("2012-08-03 18:31:59.257000000"));
|
|
|
|
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 17:24:37.123"));
|
|
|
|
assertEquals("2013-04-01", GraphCleaningFunctions.cleanDate("2013-04-01 22:43"));
|
|
|
|
assertEquals("2013-04-01", GraphCleaningFunctions.cleanDate("2013-04-01 22:43:22"));
|
|
|
|
assertEquals("2014-12-16", GraphCleaningFunctions.cleanDate("2014-12-16 06:20:00 UTC"));
|
|
|
|
assertEquals("2014-12-16", GraphCleaningFunctions.cleanDate("2014-12-16 06:20:00 GMT"));
|
|
|
|
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 05:24:37 PM"));
|
|
|
|
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 13:13:43 +0800"));
|
|
|
|
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 13:13:43 +0800 +08"));
|
|
|
|
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 13:13:44 +09:00"));
|
|
|
|
assertEquals("2012-08-03", GraphCleaningFunctions.cleanDate("2012-08-03 18:31:59.257000000 +0000 UTC"));
|
|
|
|
assertEquals("2015-09-30", GraphCleaningFunctions.cleanDate("2015-09-30 18:48:56.35272715 +0000 UTC"));
|
|
|
|
assertEquals("2015-02-18", GraphCleaningFunctions.cleanDate("2015-02-18 00:12:00 +0000 GMT"));
|
|
|
|
assertEquals("2015-02-18", GraphCleaningFunctions.cleanDate("2015-02-18 00:12:00 +0000 UTC"));
|
2021-06-11 16:53:01 +02:00
|
|
|
assertEquals(
|
2022-04-29 12:25:24 +02:00
|
|
|
"2015-02-08", GraphCleaningFunctions.cleanDate("2015-02-08 03:02:00 +0300 MSK m=+0.000000001"));
|
2021-06-11 16:53:01 +02:00
|
|
|
assertEquals(
|
2022-04-29 12:25:24 +02:00
|
|
|
"2015-02-08", GraphCleaningFunctions.cleanDate("2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001"));
|
|
|
|
assertEquals("2017-07-19", GraphCleaningFunctions.cleanDate("2017-07-19 03:21:51+00:00"));
|
|
|
|
assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26"));
|
|
|
|
assertEquals("2014-04-01", GraphCleaningFunctions.cleanDate("2014-04"));
|
|
|
|
assertEquals("2014-01-01", GraphCleaningFunctions.cleanDate("2014"));
|
|
|
|
assertEquals("2014-05-11", GraphCleaningFunctions.cleanDate("2014-05-11 08:20:13,787"));
|
|
|
|
assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("3.31.2014"));
|
|
|
|
assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("03.31.2014"));
|
|
|
|
assertEquals("1971-08-21", GraphCleaningFunctions.cleanDate("08.21.71"));
|
|
|
|
assertEquals("2014-03-01", GraphCleaningFunctions.cleanDate("2014.03"));
|
|
|
|
assertEquals("2014-03-30", GraphCleaningFunctions.cleanDate("2014.03.30"));
|
|
|
|
assertEquals("2014-06-01", GraphCleaningFunctions.cleanDate("20140601"));
|
|
|
|
assertEquals("2014-07-22", GraphCleaningFunctions.cleanDate("20140722105203"));
|
|
|
|
assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("1332151919"));
|
|
|
|
assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367189"));
|
|
|
|
assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367111222"));
|
|
|
|
assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367111222333"));
|
2021-05-28 10:50:44 +02:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2021-06-14 09:40:50 +02:00
|
|
|
@Test
|
2021-08-11 12:13:22 +02:00
|
|
|
void testDate() {
|
|
|
|
final String date = GraphCleaningFunctions.cleanDate("23-FEB-1998");
|
|
|
|
assertNotNull(date);
|
2023-12-14 11:43:58 +01:00
|
|
|
assertEquals("1998-02-23", date);
|
2021-06-14 09:40:50 +02:00
|
|
|
}
|
|
|
|
|
2021-05-11 11:08:54 +02:00
|
|
|
@Test
|
2021-08-11 12:13:22 +02:00
|
|
|
void testMergePubs() throws IOException {
|
2021-05-11 11:08:54 +02:00
|
|
|
Publication p1 = read("publication_1.json", Publication.class);
|
|
|
|
Publication p2 = read("publication_2.json", Publication.class);
|
|
|
|
Dataset d1 = read("dataset_1.json", Dataset.class);
|
|
|
|
Dataset d2 = read("dataset_2.json", Dataset.class);
|
|
|
|
|
2021-08-11 12:13:22 +02:00
|
|
|
assertEquals(1, p1.getCollectedfrom().size());
|
|
|
|
assertEquals(ModelConstants.CROSSREF_ID, p1.getCollectedfrom().get(0).getKey());
|
|
|
|
assertEquals(1, d2.getCollectedfrom().size());
|
2021-05-11 11:08:54 +02:00
|
|
|
assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
|
|
|
|
2021-08-11 12:13:22 +02:00
|
|
|
assertEquals(
|
|
|
|
ModelConstants.PUBLICATION_RESULTTYPE_CLASSID,
|
2023-12-14 11:43:58 +01:00
|
|
|
MergeUtils
|
|
|
|
.mergeResult(p1, d2)
|
2021-05-11 11:08:54 +02:00
|
|
|
.getResulttype()
|
2021-08-11 12:13:22 +02:00
|
|
|
.getClassid());
|
2021-05-11 11:08:54 +02:00
|
|
|
|
2021-08-11 12:13:22 +02:00
|
|
|
assertEquals(1, p2.getCollectedfrom().size());
|
2021-05-11 11:08:54 +02:00
|
|
|
assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
2021-08-11 12:13:22 +02:00
|
|
|
assertEquals(1, d1.getCollectedfrom().size());
|
2021-05-11 11:08:54 +02:00
|
|
|
assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
|
|
|
|
2021-08-11 12:13:22 +02:00
|
|
|
assertEquals(
|
|
|
|
ModelConstants.DATASET_RESULTTYPE_CLASSID,
|
2024-03-25 15:39:14 +01:00
|
|
|
((Result) MergeUtils
|
2024-03-22 16:34:03 +01:00
|
|
|
.merge(p2, d1))
|
2024-03-25 15:39:14 +01:00
|
|
|
.getResulttype()
|
|
|
|
.getClassid());
|
2021-05-11 11:08:54 +02:00
|
|
|
}
|
|
|
|
|
2022-02-11 18:05:18 +01:00
|
|
|
@Test
|
|
|
|
void testDelegatedAuthority() throws IOException {
|
|
|
|
Dataset d1 = read("dataset_2.json", Dataset.class);
|
|
|
|
Dataset d2 = read("dataset_delegated.json", Dataset.class);
|
|
|
|
|
|
|
|
assertEquals(1, d2.getCollectedfrom().size());
|
|
|
|
assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID));
|
|
|
|
|
2023-12-14 11:43:58 +01:00
|
|
|
Result res = MergeUtils.mergeResult(d1, d2);
|
2022-02-11 18:05:18 +01:00
|
|
|
|
|
|
|
assertEquals(d2, res);
|
|
|
|
|
|
|
|
System.out.println(OBJECT_MAPPER.writeValueAsString(res));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2021-05-11 11:08:54 +02:00
|
|
|
protected HashSet<String> cfId(List<KeyValue> collectedfrom) {
|
2021-08-11 12:13:22 +02:00
|
|
|
return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new));
|
2021-05-11 11:08:54 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException {
|
|
|
|
final String json = IOUtils.toString(getClass().getResourceAsStream(filename));
|
|
|
|
return OBJECT_MAPPER.readValue(json, clazz);
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|