[beta] OpenAIRE Affiliation Inference #452

Merged
claudio.atzori merged 4 commits from affRoFromRawString into beta 2024-07-17 10:24:39 +02:00
3 changed files with 44 additions and 35 deletions
Showing only changes of commit a2b708bb71 - Show all commits

View File

@ -5,7 +5,6 @@ import java.io.StringReader;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.solr.ExternalReference;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.DocumentException; import org.dom4j.DocumentException;
@ -31,6 +30,7 @@ import eu.dnetlib.dhp.schema.solr.Context;
import eu.dnetlib.dhp.schema.solr.Country; import eu.dnetlib.dhp.schema.solr.Country;
import eu.dnetlib.dhp.schema.solr.Datasource; import eu.dnetlib.dhp.schema.solr.Datasource;
import eu.dnetlib.dhp.schema.solr.EoscIfGuidelines; import eu.dnetlib.dhp.schema.solr.EoscIfGuidelines;
import eu.dnetlib.dhp.schema.solr.ExternalReference;
import eu.dnetlib.dhp.schema.solr.Instance; import eu.dnetlib.dhp.schema.solr.Instance;
import eu.dnetlib.dhp.schema.solr.Journal; import eu.dnetlib.dhp.schema.solr.Journal;
import eu.dnetlib.dhp.schema.solr.Measure; import eu.dnetlib.dhp.schema.solr.Measure;
@ -562,10 +562,16 @@ public class ProvisionModelSupport {
.orElse(null); .orElse(null);
} }
private static List<ExternalReference> mapExternalReference(List<eu.dnetlib.dhp.schema.oaf.ExternalReference> externalReference) { private static List<ExternalReference> mapExternalReference(
return Optional.ofNullable(externalReference) List<eu.dnetlib.dhp.schema.oaf.ExternalReference> externalReference) {
.map(ext -> ext.stream() return Optional
.map(e -> ExternalReference.newInstance( .ofNullable(externalReference)
.map(
ext -> ext
.stream()
.map(
e -> ExternalReference
.newInstance(
e.getSitename(), e.getSitename(),
e.getLabel(), e.getLabel(),
e.getAlternateLabel(), e.getAlternateLabel(),

View File

@ -1,12 +1,13 @@
package eu.dnetlib.dhp.oa.provision; package eu.dnetlib.dhp.oa.provision;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.URI; import java.net.URI;
import java.nio.file.Path; import java.nio.file.Path;
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery;
@ -32,14 +33,13 @@ import org.junit.jupiter.api.io.TempDir;
import org.mockito.Mock; import org.mockito.Mock;
import org.mockito.Mockito; import org.mockito.Mockito;
import org.mockito.junit.jupiter.MockitoExtension; import org.mockito.junit.jupiter.MockitoExtension;
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import static org.junit.jupiter.api.Assertions.assertEquals; import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ExtendWith(MockitoExtension.class) @ExtendWith(MockitoExtension.class)
public class SolrConfigExploreTest { public class SolrConfigExploreTest {
@ -180,7 +180,8 @@ public class SolrConfigExploreTest {
new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize) new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
.run(isLookupClient); .run(isLookupClient);
Assertions.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus()); Assertions
.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
String[] queryStrings = { String[] queryStrings = {
"cancer", "cancer",
@ -200,7 +201,8 @@ public class SolrConfigExploreTest {
// System.out.println(rsp.getExplainMap()); // System.out.println(rsp.getExplainMap());
for (SolrDocument doc : rsp.getResults()) { for (SolrDocument doc : rsp.getResults()) {
log.info( log
.info(
doc.get("score") + "\t" + doc.get("score") + "\t" +
doc.get("__indexrecordidentifier") + "\t" + doc.get("__indexrecordidentifier") + "\t" +
doc.get("resultidentifier") + "\t" + doc.get("resultidentifier") + "\t" +

View File

@ -85,7 +85,8 @@ public class SolrConfigTest extends SolrTest {
new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize) new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
.run(isLookupClient); .run(isLookupClient);
Assertions.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus()); Assertions
.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
String[] queryStrings = { String[] queryStrings = {
"cancer", "cancer",