127 lines
3.9 KiB
Java
127 lines
3.9 KiB
Java
package eu.dnetlib.pace;
|
|
|
|
import eu.dnetlib.Deduper;
|
|
import eu.dnetlib.jobs.SparkCreateDedupEntity;
|
|
import eu.dnetlib.jobs.SparkCreateMergeRels;
|
|
import eu.dnetlib.jobs.SparkCreateSimRels;
|
|
import eu.dnetlib.pace.config.DedupConfig;
|
|
import eu.dnetlib.pace.utils.Utility;
|
|
import eu.dnetlib.support.ArgumentApplicationParser;
|
|
import org.apache.spark.api.java.JavaSparkContext;
|
|
import org.apache.spark.sql.SparkSession;
|
|
import org.junit.Before;
|
|
import org.junit.Ignore;
|
|
import org.junit.Test;
|
|
|
|
@Ignore
|
|
public class DedupLocalTest extends DedupTestUtils {
|
|
|
|
SparkSession spark;
|
|
DedupConfig config;
|
|
JavaSparkContext context;
|
|
|
|
final String entitiesPath = "/Users/miconis/IdeaProjects/DnetDedup/dnet-dedup/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/examples/openorgs.to.fix.json";
|
|
final String workingPath = "/tmp/working_dir";
|
|
final String numPartitions = "10";
|
|
final String dedupConfPath = "/eu/dnetlib/pace/config/organization.strict.conf.json";
|
|
|
|
@Before
|
|
public void setup() {
|
|
|
|
config = DedupConfig.load(Utility.readFromClasspath("/eu/dnetlib/pace/config/organization.strict.conf.json", DedupLocalTest.class));
|
|
|
|
spark = SparkSession
|
|
.builder()
|
|
.appName("Deduplication")
|
|
.master("local[*]")
|
|
.getOrCreate();
|
|
context = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
|
|
|
}
|
|
|
|
@Test
|
|
public void createSimRelTest() throws Exception {
|
|
|
|
ArgumentApplicationParser parser = new ArgumentApplicationParser(Utility.readFromClasspath("/eu/dnetlib/pace/parameters/createSimRels_parameters.json", SparkCreateSimRels.class));
|
|
|
|
parser.parseArgument(
|
|
new String[] {
|
|
"-e", entitiesPath,
|
|
"-w", workingPath,
|
|
"-np", numPartitions,
|
|
"-dc", dedupConfPath
|
|
});
|
|
|
|
new SparkCreateSimRels(
|
|
parser,
|
|
spark
|
|
).run();
|
|
}
|
|
|
|
@Test
|
|
public void createMergeRelTest() throws Exception {
|
|
|
|
ArgumentApplicationParser parser = new ArgumentApplicationParser(Utility.readFromClasspath("/eu/dnetlib/pace/parameters/createMergeRels_parameters.json", SparkCreateMergeRels.class));
|
|
|
|
parser.parseArgument(
|
|
new String[] {
|
|
"-e", entitiesPath,
|
|
"-w", workingPath,
|
|
"-np", numPartitions,
|
|
"-dc", dedupConfPath
|
|
});
|
|
|
|
new SparkCreateMergeRels(
|
|
parser,
|
|
spark
|
|
).run();
|
|
}
|
|
|
|
@Test
|
|
public void createDedupEntityTest() throws Exception {
|
|
|
|
ArgumentApplicationParser parser = new ArgumentApplicationParser(Utility.readFromClasspath("/eu/dnetlib/pace/parameters/createDedupEntity_parameters.json", SparkCreateDedupEntity.class));
|
|
|
|
parser.parseArgument(
|
|
new String[] {
|
|
"-e", entitiesPath,
|
|
"-w", workingPath,
|
|
"-np", numPartitions,
|
|
"-dc", dedupConfPath
|
|
});
|
|
|
|
new SparkCreateDedupEntity(
|
|
parser,
|
|
spark
|
|
).run();
|
|
}
|
|
|
|
@Test
|
|
public void deduplicationTest() {
|
|
|
|
Deduper.createSimRels(
|
|
config,
|
|
spark,
|
|
entitiesPath,
|
|
"/tmp/deduptest/publication_simrel"
|
|
);
|
|
|
|
Deduper.createMergeRels(
|
|
config,
|
|
entitiesPath,
|
|
"/tmp/deduptest/publication_mergerel",
|
|
"/tmp/deduptest/publication_simrel",
|
|
spark
|
|
);
|
|
|
|
Deduper.createDedupEntity(
|
|
config,
|
|
"/tmp/deduptest/publication_mergerel",
|
|
entitiesPath,
|
|
spark,
|
|
"/tmp/deduptest/dedupentity"
|
|
);
|
|
|
|
}
|
|
|
|
} |