dnet-dedup/dnet-dedup-test/src/test/java/eu/dnetlib/pace/DedupLocalTest.java

127 lines
3.9 KiB
Java

package eu.dnetlib.pace;
import eu.dnetlib.Deduper;
import eu.dnetlib.jobs.SparkCreateDedupEntity;
import eu.dnetlib.jobs.SparkCreateMergeRels;
import eu.dnetlib.jobs.SparkCreateSimRels;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.utils.Utility;
import eu.dnetlib.support.ArgumentApplicationParser;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
@Ignore
public class DedupLocalTest extends DedupTestUtils {
SparkSession spark;
DedupConfig config;
JavaSparkContext context;
final String entitiesPath = "/Users/miconis/IdeaProjects/DnetDedup/dnet-dedup/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/examples/openorgs.to.fix.json";
final String workingPath = "/tmp/working_dir";
final String numPartitions = "10";
final String dedupConfPath = "/eu/dnetlib/pace/config/organization.strict.conf.json";
@Before
public void setup() {
config = DedupConfig.load(Utility.readFromClasspath("/eu/dnetlib/pace/config/organization.strict.conf.json", DedupLocalTest.class));
spark = SparkSession
.builder()
.appName("Deduplication")
.master("local[*]")
.getOrCreate();
context = JavaSparkContext.fromSparkContext(spark.sparkContext());
}
@Test
public void createSimRelTest() throws Exception {
ArgumentApplicationParser parser = new ArgumentApplicationParser(Utility.readFromClasspath("/eu/dnetlib/pace/parameters/createSimRels_parameters.json", SparkCreateSimRels.class));
parser.parseArgument(
new String[] {
"-e", entitiesPath,
"-w", workingPath,
"-np", numPartitions,
"-dc", dedupConfPath
});
new SparkCreateSimRels(
parser,
spark
).run();
}
@Test
public void createMergeRelTest() throws Exception {
ArgumentApplicationParser parser = new ArgumentApplicationParser(Utility.readFromClasspath("/eu/dnetlib/pace/parameters/createMergeRels_parameters.json", SparkCreateMergeRels.class));
parser.parseArgument(
new String[] {
"-e", entitiesPath,
"-w", workingPath,
"-np", numPartitions,
"-dc", dedupConfPath
});
new SparkCreateMergeRels(
parser,
spark
).run();
}
@Test
public void createDedupEntityTest() throws Exception {
ArgumentApplicationParser parser = new ArgumentApplicationParser(Utility.readFromClasspath("/eu/dnetlib/pace/parameters/createDedupEntity_parameters.json", SparkCreateDedupEntity.class));
parser.parseArgument(
new String[] {
"-e", entitiesPath,
"-w", workingPath,
"-np", numPartitions,
"-dc", dedupConfPath
});
new SparkCreateDedupEntity(
parser,
spark
).run();
}
@Test
public void deduplicationTest() {
Deduper.createSimRels(
config,
spark,
entitiesPath,
"/tmp/deduptest/publication_simrel"
);
Deduper.createMergeRels(
config,
entitiesPath,
"/tmp/deduptest/publication_mergerel",
"/tmp/deduptest/publication_simrel",
spark
);
Deduper.createDedupEntity(
config,
"/tmp/deduptest/publication_mergerel",
entitiesPath,
spark,
"/tmp/deduptest/dedupentity"
);
}
}