dnet-hadoop/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/CleaningFunctionTest.java

107 lines
3.3 KiB
Java
Raw Normal View History

2020-06-09 17:20:40 +02:00
package eu.dnetlib.dhp.oa.graph.clean;
2020-06-09 19:52:53 +02:00
import static org.junit.jupiter.api.Assertions.*;
2020-06-09 17:20:40 +02:00
import static org.mockito.Mockito.lenient;
import java.io.IOException;
import java.util.List;
2020-06-09 19:52:53 +02:00
import java.util.Set;
import java.util.stream.Stream;
2020-06-09 17:20:40 +02:00
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.Publication;
2020-06-09 19:52:53 +02:00
import eu.dnetlib.dhp.schema.oaf.Qualifier;
2020-06-09 17:20:40 +02:00
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ExtendWith(MockitoExtension.class)
2020-06-13 13:06:04 +02:00
public class CleaningFunctionTest {
2020-06-09 17:20:40 +02:00
public static final ObjectMapper MAPPER = new ObjectMapper();
2020-06-09 19:52:53 +02:00
2020-06-09 17:20:40 +02:00
@Mock
private ISLookUpService isLookUpService;
private VocabularyGroup vocabularies;
2020-06-13 13:06:04 +02:00
private CleaningRuleMap mapping;
2020-06-09 17:20:40 +02:00
@BeforeEach
public void setUp() throws ISLookUpException, IOException {
lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs());
lenient()
.when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY))
.thenReturn(synonyms());
vocabularies = VocabularyGroup.loadVocsFromIS(isLookUpService);
2020-06-13 13:06:04 +02:00
mapping = CleaningRuleMap.create(vocabularies);
2020-06-09 17:20:40 +02:00
}
@Test
public void testCleaning() throws Exception {
2020-06-13 13:06:04 +02:00
assertNotNull(vocabularies);
assertNotNull(mapping);
2020-06-09 19:52:53 +02:00
2020-06-09 17:20:40 +02:00
String json = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/result.json"));
Publication p_in = MAPPER.readValue(json, Publication.class);
2020-06-13 13:06:04 +02:00
Publication p_out = OafCleaner.apply(p_in, mapping);
2020-06-09 17:20:40 +02:00
2020-06-09 19:52:53 +02:00
assertNotNull(p_out);
assertEquals("eng", p_out.getLanguage().getClassid());
assertEquals("English", p_out.getLanguage().getClassname());
assertEquals("0018", p_out.getInstance().get(0).getInstancetype().getClassid());
assertEquals("Annotation", p_out.getInstance().get(0).getInstancetype().getClassname());
assertEquals("CLOSED", p_out.getInstance().get(0).getAccessright().getClassid());
assertEquals("Closed Access", p_out.getInstance().get(0).getAccessright().getClassname());
Set<String> pidTerms = vocabularies.getTerms("dnet:pid_types");
assertTrue(
p_out
.getPid()
.stream()
.map(p -> p.getQualifier())
.allMatch(q -> pidTerms.contains(q.getClassid())));
2020-06-09 17:20:40 +02:00
// TODO add more assertions to verity the cleaned values
System.out.println(MAPPER.writeValueAsString(p_out));
2020-06-12 12:03:25 +02:00
/*
* assertTrue( p_out .getPid() .stream() .allMatch(sp -> StringUtils.isNotBlank(sp.getValue())));
*/
2020-06-09 17:20:40 +02:00
}
2020-06-09 19:52:53 +02:00
private Stream<Qualifier> getAuthorPidTypes(Publication pub) {
return pub
.getAuthor()
.stream()
.map(a -> a.getPid())
.flatMap(p -> p.stream())
.map(s -> s.getQualifier());
}
2020-06-09 17:20:40 +02:00
private List<String> vocs() throws IOException {
return IOUtils
2020-06-13 13:06:04 +02:00
.readLines(CleaningFunctionTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt"));
2020-06-09 17:20:40 +02:00
}
private List<String> synonyms() throws IOException {
return IOUtils
2020-06-13 13:06:04 +02:00
.readLines(CleaningFunctionTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt"));
2020-06-09 17:20:40 +02:00
}
}