Merge branch 'master' of https://code-repo.d4science.org/D-Net/dnet-hadoop
This commit is contained in:
commit
599828ce35
|
@ -6,14 +6,16 @@ import java.util.regex.Pattern;
|
||||||
|
|
||||||
public class FundRefCleaningRule {
|
public class FundRefCleaningRule {
|
||||||
|
|
||||||
public static String clean(final String fundrefId) {
|
public static final Pattern PATTERN = Pattern.compile("\\d+");
|
||||||
|
|
||||||
String s = fundrefId
|
public static String clean(final String fundRefId) {
|
||||||
|
|
||||||
|
String s = fundRefId
|
||||||
.toLowerCase()
|
.toLowerCase()
|
||||||
.replaceAll("\\s", "");
|
.replaceAll("\\s", "");
|
||||||
|
|
||||||
Matcher m = Pattern.compile("\\d+").matcher(s);
|
Matcher m = PATTERN.matcher(s);
|
||||||
if (m.matches()) {
|
if (m.find()) {
|
||||||
return m.group();
|
return m.group();
|
||||||
} else {
|
} else {
|
||||||
return "";
|
return "";
|
||||||
|
|
|
@ -13,11 +13,7 @@ import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
|
||||||
import org.apache.spark.sql.Encoders;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
import com.github.sisyphsu.dateparser.DateParserUtils;
|
import com.github.sisyphsu.dateparser.DateParserUtils;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
import com.google.common.collect.Sets;
|
import com.google.common.collect.Sets;
|
||||||
|
@ -39,6 +35,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
public static final String TITLE_FILTER_REGEX = String.format("(%s)|\\W|\\d", TITLE_TEST);
|
public static final String TITLE_FILTER_REGEX = String.format("(%s)|\\W|\\d", TITLE_TEST);
|
||||||
|
|
||||||
public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5;
|
public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5;
|
||||||
|
private static final String NAME_CLEANING_REGEX = "[\\r\\n\\t\\s]+";
|
||||||
|
|
||||||
public static <T extends Oaf> T cleanContext(T value, String contextId, String verifyParam) {
|
public static <T extends Oaf> T cleanContext(T value, String contextId, String verifyParam) {
|
||||||
if (ModelSupport.isSubClass(value, Result.class)) {
|
if (ModelSupport.isSubClass(value, Result.class)) {
|
||||||
|
@ -228,7 +225,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static <T extends Oaf> boolean filter(T value) {
|
public static <T extends Oaf> boolean filter(T value) {
|
||||||
if (Boolean.TRUE
|
if (!(value instanceof Relation) && (Boolean.TRUE
|
||||||
.equals(
|
.equals(
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(value)
|
.ofNullable(value)
|
||||||
|
@ -239,15 +236,16 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
d -> Optional
|
d -> Optional
|
||||||
.ofNullable(d.getInvisible())
|
.ofNullable(d.getInvisible())
|
||||||
.orElse(true))
|
.orElse(true))
|
||||||
.orElse(true))
|
.orElse(false))
|
||||||
.orElse(true))) {
|
.orElse(true)))) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (value instanceof Datasource) {
|
if (value instanceof Datasource) {
|
||||||
// nothing to evaluate here
|
// nothing to evaluate here
|
||||||
} else if (value instanceof Project) {
|
} else if (value instanceof Project) {
|
||||||
// nothing to evaluate here
|
final Project p = (Project) value;
|
||||||
|
return Objects.nonNull(p.getCode()) && StringUtils.isNotBlank(p.getCode().getValue());
|
||||||
} else if (value instanceof Organization) {
|
} else if (value instanceof Organization) {
|
||||||
// nothing to evaluate here
|
// nothing to evaluate here
|
||||||
} else if (value instanceof Relation) {
|
} else if (value instanceof Relation) {
|
||||||
|
@ -294,6 +292,13 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
} else if (value instanceof Result) {
|
} else if (value instanceof Result) {
|
||||||
Result r = (Result) value;
|
Result r = (Result) value;
|
||||||
|
|
||||||
|
if (Objects.nonNull(r.getFulltext())
|
||||||
|
&& (ModelConstants.SOFTWARE_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()) ||
|
||||||
|
ModelConstants.DATASET_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()))) {
|
||||||
|
r.setFulltext(null);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
if (Objects.nonNull(r.getDateofacceptance())) {
|
if (Objects.nonNull(r.getDateofacceptance())) {
|
||||||
Optional<String> date = cleanDateField(r.getDateofacceptance());
|
Optional<String> date = cleanDateField(r.getDateofacceptance());
|
||||||
if (date.isPresent()) {
|
if (date.isPresent()) {
|
||||||
|
@ -318,8 +323,18 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
|
.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
}
|
}
|
||||||
if (Objects.nonNull(r.getPublisher()) && StringUtils.isBlank(r.getPublisher().getValue())) {
|
if (Objects.nonNull(r.getPublisher())) {
|
||||||
|
if (StringUtils.isBlank(r.getPublisher().getValue())) {
|
||||||
r.setPublisher(null);
|
r.setPublisher(null);
|
||||||
|
} else {
|
||||||
|
r
|
||||||
|
.getPublisher()
|
||||||
|
.setValue(
|
||||||
|
r
|
||||||
|
.getPublisher()
|
||||||
|
.getValue()
|
||||||
|
.replaceAll(NAME_CLEANING_REGEX, " "));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (Objects.isNull(r.getLanguage()) || StringUtils.isBlank(r.getLanguage().getClassid())) {
|
if (Objects.isNull(r.getLanguage()) || StringUtils.isBlank(r.getLanguage().getClassid())) {
|
||||||
r
|
r
|
||||||
|
@ -486,6 +501,11 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
i.setDateofacceptance(null);
|
i.setDateofacceptance(null);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (StringUtils.isNotBlank(i.getFulltext()) &&
|
||||||
|
(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()) ||
|
||||||
|
ModelConstants.DATASET_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()))) {
|
||||||
|
i.setFulltext(null);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (Objects.isNull(r.getBestaccessright())
|
if (Objects.isNull(r.getBestaccessright())
|
||||||
|
@ -510,6 +530,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.filter(a -> StringUtils.isNotBlank(a.getFullname()))
|
.filter(a -> StringUtils.isNotBlank(a.getFullname()))
|
||||||
.filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", "")))
|
.filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", "")))
|
||||||
|
.map(GraphCleaningFunctions::cleanupAuthor)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
|
|
||||||
boolean nullRank = r
|
boolean nullRank = r
|
||||||
|
@ -604,6 +625,35 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static Author cleanupAuthor(Author author) {
|
||||||
|
if (StringUtils.isNotBlank(author.getFullname())) {
|
||||||
|
author
|
||||||
|
.setFullname(
|
||||||
|
author
|
||||||
|
.getFullname()
|
||||||
|
.replaceAll(NAME_CLEANING_REGEX, " ")
|
||||||
|
.replace("\"", "\\\""));
|
||||||
|
}
|
||||||
|
if (StringUtils.isNotBlank(author.getName())) {
|
||||||
|
author
|
||||||
|
.setName(
|
||||||
|
author
|
||||||
|
.getName()
|
||||||
|
.replaceAll(NAME_CLEANING_REGEX, " ")
|
||||||
|
.replace("\"", "\\\""));
|
||||||
|
}
|
||||||
|
if (StringUtils.isNotBlank(author.getSurname())) {
|
||||||
|
author
|
||||||
|
.setSurname(
|
||||||
|
author
|
||||||
|
.getSurname()
|
||||||
|
.replaceAll(NAME_CLEANING_REGEX, " ")
|
||||||
|
.replace("\"", "\\\""));
|
||||||
|
}
|
||||||
|
|
||||||
|
return author;
|
||||||
|
}
|
||||||
|
|
||||||
private static Optional<String> cleanDateField(Field<String> dateofacceptance) {
|
private static Optional<String> cleanDateField(Field<String> dateofacceptance) {
|
||||||
return Optional
|
return Optional
|
||||||
.ofNullable(dateofacceptance)
|
.ofNullable(dateofacceptance)
|
||||||
|
|
|
@ -6,13 +6,19 @@ import java.util.regex.Pattern;
|
||||||
|
|
||||||
public class GridCleaningRule {
|
public class GridCleaningRule {
|
||||||
|
|
||||||
|
public static final Pattern PATTERN = Pattern.compile("(?<grid>\\d{4,6}\\.[0-9a-z]{1,2})");
|
||||||
|
|
||||||
public static String clean(String grid) {
|
public static String clean(String grid) {
|
||||||
String s = grid
|
String s = grid
|
||||||
.replaceAll("\\s", "")
|
.replaceAll("\\s", "")
|
||||||
.toLowerCase();
|
.toLowerCase();
|
||||||
|
|
||||||
Matcher m = Pattern.compile("\\d{4,6}\\.[0-9a-z]{1,2}").matcher(s);
|
Matcher m = PATTERN.matcher(s);
|
||||||
return m.matches() ? "grid." + m.group() : "";
|
if (m.find()) {
|
||||||
|
return "grid." + m.group("grid");
|
||||||
|
}
|
||||||
|
|
||||||
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,10 +7,12 @@ import java.util.regex.Pattern;
|
||||||
// https://www.wikidata.org/wiki/Property:P213
|
// https://www.wikidata.org/wiki/Property:P213
|
||||||
public class ISNICleaningRule {
|
public class ISNICleaningRule {
|
||||||
|
|
||||||
|
public static final Pattern PATTERN = Pattern.compile("([0]{4}) ?([0-9]{4}) ?([0-9]{4}) ?([0-9]{3}[0-9X])");
|
||||||
|
|
||||||
public static String clean(final String isni) {
|
public static String clean(final String isni) {
|
||||||
|
|
||||||
Matcher m = Pattern.compile("([0]{4}) ?([0-9]{4}) ?([0-9]{4}) ?([0-9]{3}[0-9X])").matcher(isni);
|
Matcher m = PATTERN.matcher(isni);
|
||||||
if (m.matches()) {
|
if (m.find()) {
|
||||||
return String.join("", m.group(1), m.group(2), m.group(3), m.group(4));
|
return String.join("", m.group(1), m.group(2), m.group(3), m.group(4));
|
||||||
} else {
|
} else {
|
||||||
return "";
|
return "";
|
||||||
|
|
|
@ -6,10 +6,12 @@ import java.util.regex.Pattern;
|
||||||
|
|
||||||
public class PICCleaningRule {
|
public class PICCleaningRule {
|
||||||
|
|
||||||
|
public static final Pattern PATTERN = Pattern.compile("\\d{9}");
|
||||||
|
|
||||||
public static String clean(final String pic) {
|
public static String clean(final String pic) {
|
||||||
|
|
||||||
Matcher m = Pattern.compile("\\d{9}").matcher(pic);
|
Matcher m = PATTERN.matcher(pic);
|
||||||
if (m.matches()) {
|
if (m.find()) {
|
||||||
return m.group();
|
return m.group();
|
||||||
} else {
|
} else {
|
||||||
return "";
|
return "";
|
||||||
|
|
|
@ -1,13 +1,24 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
public class PmcCleaningRule {
|
public class PmcCleaningRule {
|
||||||
|
|
||||||
|
public static final Pattern PATTERN = Pattern.compile("PMC\\d{1,8}");
|
||||||
|
|
||||||
public static String clean(String pmc) {
|
public static String clean(String pmc) {
|
||||||
String s = pmc
|
String s = pmc
|
||||||
.replaceAll("\\s", "")
|
.replaceAll("\\s", "")
|
||||||
.toUpperCase();
|
.toUpperCase();
|
||||||
return s.matches("^PMC\\d{1,8}$") ? s : "";
|
|
||||||
|
final Matcher m = PATTERN.matcher(s);
|
||||||
|
|
||||||
|
if (m.find()) {
|
||||||
|
return m.group();
|
||||||
|
}
|
||||||
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,16 +1,25 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
// https://researchguides.stevens.edu/c.php?g=442331&p=6577176
|
// https://researchguides.stevens.edu/c.php?g=442331&p=6577176
|
||||||
public class PmidCleaningRule {
|
public class PmidCleaningRule {
|
||||||
|
|
||||||
|
public static final Pattern PATTERN = Pattern.compile("[1-9]{1,8}");
|
||||||
|
|
||||||
public static String clean(String pmid) {
|
public static String clean(String pmid) {
|
||||||
String s = pmid
|
String s = pmid
|
||||||
.toLowerCase()
|
.toLowerCase()
|
||||||
.replaceAll("\\s", "")
|
.replaceAll("\\s", "");
|
||||||
.trim()
|
|
||||||
.replaceAll("^0+", "");
|
final Matcher m = PATTERN.matcher(s);
|
||||||
return s.matches("^\\d{1,8}$") ? s : "";
|
|
||||||
|
if (m.find()) {
|
||||||
|
return m.group();
|
||||||
|
}
|
||||||
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,12 +7,21 @@ import java.util.regex.Pattern;
|
||||||
// https://ror.readme.io/docs/ror-identifier-pattern
|
// https://ror.readme.io/docs/ror-identifier-pattern
|
||||||
public class RorCleaningRule {
|
public class RorCleaningRule {
|
||||||
|
|
||||||
|
public static final String ROR_PREFIX = "https://ror.org/";
|
||||||
|
|
||||||
|
private static final Pattern PATTERN = Pattern.compile("(?<ror>0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2})");
|
||||||
|
|
||||||
public static String clean(String ror) {
|
public static String clean(String ror) {
|
||||||
String s = ror
|
String s = ror
|
||||||
.replaceAll("\\s", "")
|
.replaceAll("\\s", "")
|
||||||
.toLowerCase();
|
.toLowerCase();
|
||||||
Matcher m = Pattern.compile("0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2}").matcher(s);
|
|
||||||
return m.matches() ? "https://ror.org/" + m.group() : "";
|
Matcher m = PATTERN.matcher(s);
|
||||||
|
|
||||||
|
if (m.find()) {
|
||||||
|
return ROR_PREFIX + m.group("ror");
|
||||||
|
}
|
||||||
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
class GridCleaningRuleTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testCleaning() {
|
||||||
|
assertEquals("grid.493784.5", GridCleaningRule.clean("grid.493784.5"));
|
||||||
|
assertEquals("grid.493784.5x", GridCleaningRule.clean("grid.493784.5x"));
|
||||||
|
assertEquals("grid.493784.5x", GridCleaningRule.clean("493784.5x"));
|
||||||
|
assertEquals("", GridCleaningRule.clean("493x784.5x"));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,19 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
class ISNICleaningRuleTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testCleaning() {
|
||||||
|
assertEquals("0000000463436020", ISNICleaningRule.clean("0000 0004 6343 6020"));
|
||||||
|
assertEquals("0000000463436020", ISNICleaningRule.clean("0000000463436020"));
|
||||||
|
assertEquals("", ISNICleaningRule.clean("Q30256598"));
|
||||||
|
assertEquals("0000000493403529", ISNICleaningRule.clean("ISNI:0000000493403529"));
|
||||||
|
assertEquals("000000008614884X", ISNICleaningRule.clean("0000 0000 8614 884X"));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,19 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
class PICCleaningRuleTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testCleaning() {
|
||||||
|
assertEquals("887624982", PICCleaningRule.clean("887624982"));
|
||||||
|
assertEquals("", PICCleaningRule.clean("887 624982"));
|
||||||
|
assertEquals("887624982", PICCleaningRule.clean(" 887624982 "));
|
||||||
|
assertEquals("887624982", PICCleaningRule.clean(" 887624982x "));
|
||||||
|
assertEquals("887624982", PICCleaningRule.clean(" 88762498200 "));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,19 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
class PmcCleaningRuleTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testCleaning() {
|
||||||
|
assertEquals("PMC1234", PmcCleaningRule.clean("PMC1234"));
|
||||||
|
assertEquals("PMC1234", PmcCleaningRule.clean(" PMC1234"));
|
||||||
|
assertEquals("PMC12345678", PmcCleaningRule.clean("PMC12345678"));
|
||||||
|
assertEquals("PMC12345678", PmcCleaningRule.clean("PMC123456789"));
|
||||||
|
assertEquals("PMC12345678", PmcCleaningRule.clean("PMC 12345678"));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,18 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
class PmidCleaningRuleTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testCleaning() {
|
||||||
|
assertEquals("1234", PmidCleaningRule.clean("01234"));
|
||||||
|
assertEquals("1234567", PmidCleaningRule.clean("0123 4567"));
|
||||||
|
assertEquals("123", PmidCleaningRule.clean("0123x4567"));
|
||||||
|
assertEquals("", PmidCleaningRule.clean("abc"));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,17 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
class RorCleaningRuleTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testCleaning() {
|
||||||
|
assertEquals("https://ror.org/05rpz9w55", RorCleaningRule.clean("https://ror.org/05rpz9w55"));
|
||||||
|
assertEquals("https://ror.org/05rpz9w55", RorCleaningRule.clean("05rpz9w55"));
|
||||||
|
assertEquals("", RorCleaningRule.clean("05rpz9w_55"));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -251,6 +251,12 @@ public class GraphCleaningFunctionsTest {
|
||||||
pid.getQualifier().getClassname()));
|
pid.getQualifier().getClassname()));
|
||||||
});
|
});
|
||||||
|
|
||||||
|
assertTrue(
|
||||||
|
p_cleaned
|
||||||
|
.getAuthor()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(a -> "Brien, Tom".equals(a.getFullname())));
|
||||||
|
|
||||||
assertNotNull(p_cleaned.getSubject());
|
assertNotNull(p_cleaned.getSubject());
|
||||||
|
|
||||||
List<Subject> fos_subjects = p_cleaned
|
List<Subject> fos_subjects = p_cleaned
|
||||||
|
@ -285,6 +291,31 @@ public class GraphCleaningFunctionsTest {
|
||||||
System.out.println(MAPPER.writeValueAsString(p_cleaned));
|
System.out.println(MAPPER.writeValueAsString(p_cleaned));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testCleaning_dataset() throws Exception {
|
||||||
|
|
||||||
|
assertNotNull(vocabularies);
|
||||||
|
assertNotNull(mapping);
|
||||||
|
|
||||||
|
String json = IOUtils
|
||||||
|
.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/result_dataset.json"));
|
||||||
|
Dataset p_in = MAPPER.readValue(json, Dataset.class);
|
||||||
|
|
||||||
|
assertTrue(p_in instanceof Result);
|
||||||
|
assertTrue(p_in instanceof Dataset);
|
||||||
|
|
||||||
|
Dataset p_out = OafCleaner.apply(GraphCleaningFunctions.fixVocabularyNames(p_in), mapping);
|
||||||
|
|
||||||
|
assertNotNull(p_out);
|
||||||
|
|
||||||
|
assertNotNull(p_out.getPublisher());
|
||||||
|
assertNotNull(p_out.getPublisher().getValue());
|
||||||
|
|
||||||
|
Dataset p_cleaned = GraphCleaningFunctions.cleanup(p_out, vocabularies);
|
||||||
|
|
||||||
|
assertEquals("Best publisher in the world", p_cleaned.getPublisher().getValue());
|
||||||
|
}
|
||||||
|
|
||||||
private static void verify_keyword(Publication p_cleaned, String subject) {
|
private static void verify_keyword(Publication p_cleaned, String subject) {
|
||||||
Optional<Subject> s1 = p_cleaned
|
Optional<Subject> s1 = p_cleaned
|
||||||
.getSubject()
|
.getSubject()
|
||||||
|
@ -337,6 +368,15 @@ public class GraphCleaningFunctionsTest {
|
||||||
Assertions.assertEquals(true, GraphCleaningFunctions.filter(cleaned));
|
Assertions.assertEquals(true, GraphCleaningFunctions.filter(cleaned));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFilterProject() throws IOException {
|
||||||
|
String json = IOUtils
|
||||||
|
.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/project.json"));
|
||||||
|
Project p_in = MAPPER.readValue(json, Project.class);
|
||||||
|
|
||||||
|
Assertions.assertEquals(false, GraphCleaningFunctions.filter(p_in));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCleanDoiBoost2() throws IOException {
|
public void testCleanDoiBoost2() throws IOException {
|
||||||
String json = IOUtils
|
String json = IOUtils
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
{"measures": [{"id": "downloads", "unit": [{"dataInfo": {"provenanceaction": {"classid": "measure:usage_counts", "classname": "measure:usage_counts", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "count", "value": "1"}]}, {"id": "views", "unit": [{"dataInfo": {"provenanceaction": {"classid": "measure:usage_counts", "classname": "measure:usage_counts", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "deletedbyinference": false, "inferred": true, "inferenceprovenance": "update", "invisible": false, "trust": ""}, "key": "count", "value": "0"}]}], "id": "40|aka_________::591da07706352f1195afaeed4065f52e"}
|
|
@ -0,0 +1,28 @@
|
||||||
|
{
|
||||||
|
"resulttype": {
|
||||||
|
"classid": "dataset",
|
||||||
|
"classname": "dataset",
|
||||||
|
"schemeid": "dnet:result_typologies",
|
||||||
|
"schemename": "dnet:result_typologies"
|
||||||
|
},
|
||||||
|
"fulltext": [
|
||||||
|
{
|
||||||
|
"value" : "https://www.researchgate.net"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"publisher" : {
|
||||||
|
"value" : "Best publisher in the world"
|
||||||
|
},
|
||||||
|
"id": "50|CSC_________::2250a70c903c6ac6e4c01438259e9375",
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"instancetype": {
|
||||||
|
"classid": "Comment/debate",
|
||||||
|
"classname": "Comment/debate",
|
||||||
|
"schemeid": "dnet:publication_resource",
|
||||||
|
"schemename": "dnet:publication_resource"
|
||||||
|
},
|
||||||
|
"fulltext": "https://www.researchgate.net"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
Loading…
Reference in New Issue