forked from D-Net/dnet-hadoop
new tests
This commit is contained in:
parent
07837e51a9
commit
0e407b5f23
|
@ -2,12 +2,10 @@
|
|||
package eu.dnetlib.dhp.oa.graph.clean;
|
||||
|
||||
import java.io.*;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
|
@ -15,19 +13,20 @@ import org.apache.commons.io.FileUtils;
|
|||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.text.similarity.CosineDistance;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
|
||||
import org.apache.hadoop.security.WhitelistBasedResolver;
|
||||
import org.apache.neethi.Assertion;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.gson.Gson;
|
||||
import com.intuit.fuzzymatcher.component.MatchService;
|
||||
|
@ -38,9 +37,9 @@ import com.intuit.fuzzymatcher.domain.Match;
|
|||
import com.wcohen.ss.Levenstein;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.clean.authorpids.*;
|
||||
import jdk.nashorn.internal.ir.annotations.Ignore;
|
||||
import eu.dnetlib.dhp.oa.graph.clean.authorpids.constraints.ConstraintResolver;
|
||||
import eu.dnetlib.dhp.oa.graph.clean.authorpids.constraints.ConstraintResolverFactory;
|
||||
import me.xdrop.fuzzywuzzy.FuzzySearch;
|
||||
import net.sf.saxon.trans.Maker;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class CleanOrcidTest {
|
||||
|
@ -52,6 +51,7 @@ public class CleanOrcidTest {
|
|||
private static Path workingDir;
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(CleanOrcidTest.class);
|
||||
final String whitelist = "{\"whitelist\":[{\"criteria\":[{\"verb\":\"shorterorequal\",\"field\":\"oname\",\"value\":\"2\"},{\"verb\":\"shorterorequal\",\"field\":\"osurname\",\"value\":\"2\"}]},{\"criteria\":[{\"verb\":\"shorterorequal\", \"field\":\"name\", \"value\":\"2\"},{\"verb\":\"shorterorequal\", \"field\":\"surname\", \"value\":\"2\"}]}, {\"criteria\":[{\"verb\":\"equals\", \"field\":\"oname\", \"value\":\"Given Names Deactivated\"},{\"verb\":\"equals\", \"field\":\"osurname\", \"value\":\"Family Name Deactivated\"}]}]}";
|
||||
|
||||
// needed for fuzzywuzzy to get a lower bound ratio under which the authors are most probably different
|
||||
String[][] wrong = {
|
||||
|
@ -149,6 +149,12 @@ public class CleanOrcidTest {
|
|||
},
|
||||
{
|
||||
"20", MakeReportSparkJob.handleNameSurname("Adelle Craig")
|
||||
},
|
||||
{
|
||||
"21", MakeReportSparkJob.handleNameSurname("Ramziddin M")
|
||||
},
|
||||
{
|
||||
"20", MakeReportSparkJob.handleNameSurname("R. Mansurov")
|
||||
}
|
||||
|
||||
};
|
||||
|
@ -181,6 +187,118 @@ public class CleanOrcidTest {
|
|||
spark.stop();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void loadBlackList() {
|
||||
|
||||
WhiteList loadedWhiteList = new Gson().fromJson(whitelist, WhiteList.class);
|
||||
ConstraintResolver resolver = ConstraintResolverFactory.newInstance();
|
||||
|
||||
loadedWhiteList.getWhitelist().forEach(c -> {
|
||||
try {
|
||||
c.setSelection(resolver);
|
||||
} catch (InvocationTargetException e) {
|
||||
e.printStackTrace();
|
||||
} catch (NoSuchMethodException e) {
|
||||
e.printStackTrace();
|
||||
} catch (InstantiationException e) {
|
||||
e.printStackTrace();
|
||||
} catch (IllegalAccessException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
});
|
||||
|
||||
Map<String, String> param = new HashMap<>();
|
||||
param.put("oname", "Miriam");
|
||||
param.put("name", "Miriam");
|
||||
param.put("osurname", "Miriam");
|
||||
param.put("surname", "Miriam");
|
||||
loadedWhiteList.getWhitelist().forEach(c -> Assertions.assertFalse(c.verifyCriteria(param)));
|
||||
|
||||
param.put("oname", "P");
|
||||
param.put("osurname", "tj");
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, loadedWhiteList
|
||||
.getWhitelist()
|
||||
.stream()
|
||||
.map(c -> c.verifyCriteria(param))
|
||||
.filter(Boolean::valueOf)
|
||||
.collect(Collectors.toList())
|
||||
.size());
|
||||
|
||||
param.put("oname", "Given Names Deactivated");
|
||||
param.put("osurname", "Family Name Deactivated");
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, loadedWhiteList
|
||||
.getWhitelist()
|
||||
.stream()
|
||||
.map(c -> c.verifyCriteria(param))
|
||||
.filter(Boolean::valueOf)
|
||||
.collect(Collectors.toList())
|
||||
.size());
|
||||
|
||||
param.put("name", "P");
|
||||
param.put("surname", "tj");
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
2, loadedWhiteList
|
||||
.getWhitelist()
|
||||
.stream()
|
||||
.map(c -> c.verifyCriteria(param))
|
||||
.filter(Boolean::valueOf)
|
||||
.collect(Collectors.toList())
|
||||
.size());
|
||||
|
||||
param.put("oname", "Given Names Deactivated");
|
||||
param.put("osurname", "Family Name Deactivated");
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
2, loadedWhiteList
|
||||
.getWhitelist()
|
||||
.stream()
|
||||
.map(c -> c.verifyCriteria(param))
|
||||
.filter(Boolean::valueOf)
|
||||
.collect(Collectors.toList())
|
||||
.size());
|
||||
|
||||
//
|
||||
// Assertions
|
||||
// .assertEquals(
|
||||
// 0, loadedWhiteList
|
||||
// .getWhitelist()
|
||||
// .stream()
|
||||
// .map(c -> c.verifyCriteria("Family Names Deactivated"))
|
||||
// .filter(v -> v > 1)
|
||||
// .collect(Collectors.toList())
|
||||
// .size());
|
||||
//
|
||||
// Assertions
|
||||
// .assertEquals(
|
||||
// 1, loadedWhiteList
|
||||
// .getWhitelist()
|
||||
// .stream()
|
||||
// .map(c -> c.verifyCriteria("Family Name Deactivated"))
|
||||
// .filter(v -> v > 1)
|
||||
// .collect(Collectors.toList())
|
||||
// .size());
|
||||
//
|
||||
// Assertions
|
||||
// .assertEquals(
|
||||
// 1, loadedWhiteList
|
||||
// .getWhitelist()
|
||||
// .stream()
|
||||
// .map(c -> c.verifyCriteria("Given Names Deactivated"))
|
||||
// .filter(v -> v > 1)
|
||||
// .collect(Collectors.toList())
|
||||
// .size());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void loadOrcid() {
|
||||
|
||||
|
@ -201,11 +319,30 @@ public class CleanOrcidTest {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void serializeConstraint() throws JsonProcessingException {
|
||||
WhiteList whiteList = new WhiteList();
|
||||
|
||||
SelectionConstraints sc = new SelectionConstraints();
|
||||
|
||||
Constraints c = new Constraints();
|
||||
c.setVerb("verb");
|
||||
c.setValue("value");
|
||||
c.setField("field");
|
||||
|
||||
sc.setCriteria(Arrays.asList(c, c));
|
||||
|
||||
whiteList.setWhitelist(Arrays.asList(sc));
|
||||
|
||||
System.out.println(OBJECT_MAPPER.writeValueAsString(whiteList));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void makeReportTest() throws Exception {
|
||||
final String inputPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/clean/part-00000.gz")
|
||||
.getPath();
|
||||
final String inputPath = "";
|
||||
// getClass()
|
||||
// .getResource("/eu/dnetlib/dhp/oa/graph/clean/part-00000.gz")
|
||||
// .getPath();
|
||||
|
||||
final String preparedInfoPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/oa/graph/clean/part-00000-prepared.json.gz")
|
||||
|
@ -221,7 +358,8 @@ public class CleanOrcidTest {
|
|||
"-inputPath", inputPath,
|
||||
"-preparedInfoPath", preparedInfoPath,
|
||||
"-orcidInputPath", orcidInputPath,
|
||||
"-graphTableClassName", "eu.dnetlib.dhp.schema.oaf.Publication"
|
||||
"-graphTableClassName", "eu.dnetlib.dhp.schema.oaf.Publication",
|
||||
"-whitelist", whitelist
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
@ -311,7 +449,7 @@ public class CleanOrcidTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testMakeReport() {
|
||||
public void testMakeReport() throws IOException {
|
||||
ResultInfo ri = new ResultInfo();
|
||||
ri.setName("Prasanth");
|
||||
ri.setSurname("Manohar");
|
||||
|
@ -321,9 +459,39 @@ public class CleanOrcidTest {
|
|||
oa.setSurname("Ramesh");
|
||||
|
||||
Tuple2<ResultInfo, OrcidAuthotitative> t2 = new Tuple2<ResultInfo, OrcidAuthotitative>(ri, oa);
|
||||
Tuple2<String, ReportInfo> tmp = MakeReportSparkJob.getStringReportInfoFuzzyTuple2(t2);
|
||||
Tuple2<String, ReportInfo> tmp = MakeReportSparkJob.getStringReportInfoTuple2(t2);
|
||||
|
||||
System.out.println(new Gson().toJson(tmp._2(), ReportInfo.class));
|
||||
|
||||
ri.setName("Sophia");
|
||||
ri.setSurname("Hooper");
|
||||
|
||||
oa.setName("Man");
|
||||
oa.setSurname("Yang");
|
||||
oa.setCreditName("Man Yang (previous known as Sophia Yang Hooper)");
|
||||
|
||||
WhiteList wl = new Gson().fromJson(whitelist, WhiteList.class);
|
||||
ConstraintResolver resolver = ConstraintResolverFactory.newInstance();
|
||||
|
||||
wl.getWhitelist().forEach(c -> {
|
||||
try {
|
||||
c.setSelection(resolver);
|
||||
} catch (InvocationTargetException e) {
|
||||
e.printStackTrace();
|
||||
} catch (NoSuchMethodException e) {
|
||||
e.printStackTrace();
|
||||
} catch (InstantiationException e) {
|
||||
e.printStackTrace();
|
||||
} catch (IllegalAccessException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
});
|
||||
|
||||
System.out
|
||||
.println(
|
||||
OBJECT_MAPPER
|
||||
.writeValueAsString(
|
||||
MakeReportSparkJob.getStringReportInfoFuzzyTuple2(new Tuple2<>(ri, oa), wl)._2()));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue