80 lines
3.2 KiB
Java
80 lines
3.2 KiB
Java
package eu.dnetlib.ariadneplus;
|
|
|
|
import eu.dnetlib.ariadneplus.elasticsearch.BulkUpload;
|
|
import eu.dnetlib.ariadneplus.reader.ResourceManager;
|
|
import eu.dnetlib.ariadneplus.reader.RunSPARQLQueryService;
|
|
import eu.dnetlib.ariadneplus.reader.json.ParseRDFJSON;
|
|
import eu.dnetlib.ariadneplus.reader.utils.ESUtils;
|
|
import org.apache.http.HttpHost;
|
|
import org.elasticsearch.action.get.GetRequest;
|
|
import org.elasticsearch.client.RequestOptions;
|
|
import org.elasticsearch.client.RestClient;
|
|
import org.elasticsearch.client.RestHighLevelClient;
|
|
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
|
|
import org.junit.After;
|
|
import org.junit.Before;
|
|
import org.junit.Ignore;
|
|
import org.junit.Test;
|
|
import org.springframework.core.io.ClassPathResource;
|
|
|
|
import java.io.IOException;
|
|
import java.nio.charset.StandardCharsets;
|
|
import java.security.MessageDigest;
|
|
import java.security.NoSuchAlgorithmException;
|
|
import java.util.List;
|
|
import java.util.Properties;
|
|
|
|
@Ignore
|
|
public class FindMissingRecordsInIndexTest {
|
|
|
|
private RunSPARQLQueryService runSPQRLQuery;
|
|
|
|
private final static String STAGING_PROPERTIES = "application.properties";
|
|
private final static String PROD_PROPERTIES = "application-prod-DO-NOT-COMMIT.properties";
|
|
|
|
private RestHighLevelClient client;
|
|
private Properties appProps;
|
|
@Before
|
|
public void setUp() throws IOException {
|
|
final ClassPathResource resource = new ClassPathResource(STAGING_PROPERTIES);
|
|
appProps = new Properties();
|
|
appProps.load(resource.getInputStream());
|
|
runSPQRLQuery = new RunSPARQLQueryService();
|
|
runSPQRLQuery.setupReadOnlyConnection(
|
|
appProps.getProperty("graphdb.serverUrl"),
|
|
appProps.getProperty("graphdb.repository"));
|
|
client = new RestHighLevelClient(
|
|
RestClient.builder(
|
|
new HttpHost(appProps.getProperty("elasticsearch.hostname"), 9200, "http")));
|
|
}
|
|
|
|
@After
|
|
public void tearDown() throws IOException {
|
|
client.close();
|
|
}
|
|
@Test
|
|
public void findMissingRecordsTest() throws NoSuchAlgorithmException, IOException {
|
|
String datasource = "ads";
|
|
String collectionId = "archives";
|
|
List<String> uris = runSPQRLQuery.selectRecordIds(datasource, collectionId);
|
|
System.out.println("Got list of ids, they are "+uris.size());
|
|
for(String uri : uris){
|
|
//compute the md5 and query the index to check if there is a record with that uri. If not, print it
|
|
MessageDigest digest = MessageDigest.getInstance("SHA-256");
|
|
byte[] encodedhash = digest.digest(
|
|
uri.getBytes(StandardCharsets.UTF_8));
|
|
String idES = ESUtils.bytesToHex(encodedhash);
|
|
// High level API
|
|
GetRequest getRequest = new GetRequest(
|
|
appProps.getProperty("elasticsearch.indexname"),
|
|
idES);
|
|
getRequest.fetchSourceContext(new FetchSourceContext(false));
|
|
getRequest.storedFields("_none_");
|
|
|
|
boolean exists = client.exists(getRequest, RequestOptions.DEFAULT);
|
|
if(!exists) System.out.println(uri);
|
|
}
|
|
System.out.println("Done");
|
|
}
|
|
}
|