dnet-hadoop/dhp-workflows/dhp-dedup/src/test/java/eu/dnetlib/dedup/SparkCreateDedupTest.java

87 lines
2.5 KiB
Java

package eu.dnetlib.dedup;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hashing;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.oaf.Publication;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import java.io.File;
import java.io.IOException;
import java.util.List;
public class SparkCreateDedupTest {
String configuration;
String entity = "organization";
@Before
public void setUp() throws IOException {
configuration = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dedup/conf/org.curr.conf.json"));
}
@Test
@Ignore
public void createSimRelsTest() throws Exception {
SparkCreateSimRels.main(new String[] {
"-mt", "local[*]",
"-s", "/Users/miconis/dumps",
"-e", entity,
"-c", ArgumentApplicationParser.compressArgument(configuration),
"-t", "/tmp/dedup",
});
}
@Test
@Ignore
public void createCCTest() throws Exception {
SparkCreateConnectedComponent.main(new String[] {
"-mt", "local[*]",
"-s", "/Users/miconis/dumps",
"-e", entity,
"-c", ArgumentApplicationParser.compressArgument(configuration),
"-t", "/tmp/dedup",
});
}
@Test
@Ignore
public void dedupRecordTest() throws Exception {
SparkCreateDedupRecord.main(new String[] {
"-mt", "local[*]",
"-s", "/Users/miconis/dumps",
"-e", entity,
"-c", ArgumentApplicationParser.compressArgument(configuration),
"-d", "/tmp/dedup",
});
}
@Test
public void printConfiguration() throws Exception {
System.out.println(ArgumentApplicationParser.compressArgument(configuration));
}
@Test
public void testHashCode() {
final String s1 = "20|grid________::6031f94bef015a37783268ec1e75f17f";
final String s2 = "20|nsf_________::b12be9edf414df8ee66b4c52a2d8da46";
final HashFunction hashFunction = Hashing.murmur3_128();
System.out.println( s1.hashCode());
System.out.println(hashFunction.hashUnencodedChars(s1).asLong());
System.out.println( s2.hashCode());
System.out.println(hashFunction.hashUnencodedChars(s2).asLong());
}
}