AriadnePlus/dnet-ariadneplus-graphdb-pu.../test/java/eu/dnetlib/ariadneplus/FindMissingRecordsInIndexTe...

80 lines
3.2 KiB
Java

package eu.dnetlib.ariadneplus;
import eu.dnetlib.ariadneplus.elasticsearch.BulkUpload;
import eu.dnetlib.ariadneplus.reader.ResourceManager;
import eu.dnetlib.ariadneplus.reader.RunSPARQLQueryService;
import eu.dnetlib.ariadneplus.reader.json.ParseRDFJSON;
import eu.dnetlib.ariadneplus.reader.utils.ESUtils;
import org.apache.http.HttpHost;
import org.elasticsearch.action.get.GetRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
import org.junit.After;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.springframework.core.io.ClassPathResource;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.List;
import java.util.Properties;
@Ignore
public class FindMissingRecordsInIndexTest {
private RunSPARQLQueryService runSPQRLQuery;
private final static String STAGING_PROPERTIES = "application.properties";
private final static String PROD_PROPERTIES = "application-prod-DO-NOT-COMMIT.properties";
private RestHighLevelClient client;
private Properties appProps;
@Before
public void setUp() throws IOException {
final ClassPathResource resource = new ClassPathResource(STAGING_PROPERTIES);
appProps = new Properties();
appProps.load(resource.getInputStream());
runSPQRLQuery = new RunSPARQLQueryService();
runSPQRLQuery.setupReadOnlyConnection(
appProps.getProperty("graphdb.serverUrl"),
appProps.getProperty("graphdb.repository"));
client = new RestHighLevelClient(
RestClient.builder(
new HttpHost(appProps.getProperty("elasticsearch.hostname"), 9200, "http")));
}
@After
public void tearDown() throws IOException {
client.close();
}
@Test
public void findMissingRecordsTest() throws NoSuchAlgorithmException, IOException {
String datasource = "ads";
String collectionId = "archives";
List<String> uris = runSPQRLQuery.selectRecordIds(datasource, collectionId);
System.out.println("Got list of ids, they are "+uris.size());
for(String uri : uris){
//compute the md5 and query the index to check if there is a record with that uri. If not, print it
MessageDigest digest = MessageDigest.getInstance("SHA-256");
byte[] encodedhash = digest.digest(
uri.getBytes(StandardCharsets.UTF_8));
String idES = ESUtils.bytesToHex(encodedhash);
// High level API
GetRequest getRequest = new GetRequest(
appProps.getProperty("elasticsearch.indexname"),
idES);
getRequest.fetchSourceContext(new FetchSourceContext(false));
getRequest.storedFields("_none_");
boolean exists = client.exists(getRequest, RequestOptions.DEFAULT);
if(!exists) System.out.println(uri);
}
System.out.println("Done");
}
}