dnet-dedup/dnet-dedup-test/src/test/java/eu/dnetlib/pace/DedupLocalTest.java

75 lines
2.1 KiB
Java

package eu.dnetlib.pace;
import eu.dnetlib.Deduper;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.pace.tree.support.TreeProcessor;
import eu.dnetlib.pace.tree.support.TreeStats;
import eu.dnetlib.pace.util.MapDocumentUtil;
import eu.dnetlib.pace.utils.Utility;
import eu.dnetlib.support.ConnectedComponent;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import scala.Tuple2;
import java.net.URL;
import java.util.List;
import java.util.Map;
public class DedupLocalTest extends DedupTestUtils {
SparkSession spark;
DedupConfig config;
JavaSparkContext context;
final String entitiesPath = "/Users/miconis/IdeaProjects/DnetDedup/dnet-dedup/dnet-dedup-test/src/test/resources/eu/dnetlib/pace/examples/organization";
@Before
public void setup() {
config = DedupConfig.load(Utility.readFromClasspath("/eu/dnetlib/pace/config/organization.current.conf.json", DedupLocalTest.class));
spark = SparkSession
.builder()
.appName("Deduplication")
.master("local[*]")
.getOrCreate();
context = JavaSparkContext.fromSparkContext(spark.sparkContext());
}
@Test
public void deduplicationTest() {
Deduper.createSimRels(
config,
spark,
entitiesPath,
"/tmp/deduptest/organization_simrel"
);
Deduper.createMergeRels(
config,
entitiesPath,
"/tmp/deduptest/organization_mergerel",
"/tmp/deduptest/organization_simrel",
spark
);
Deduper.createDedupEntity(
config,
"/tmp/deduptest/organization_mergerel",
entitiesPath,
spark,
"/tmp/deduptest/dedupentity"
);
}
}