[BulkTag]added tagging for the organization relevant for the community. #461
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
package eu.dnetlib.pace.tree;
|
package eu.dnetlib.pace.tree;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -11,37 +12,37 @@ import eu.dnetlib.pace.tree.support.ComparatorClass;
|
||||||
@ComparatorClass("countryMatch")
|
@ComparatorClass("countryMatch")
|
||||||
public class CountryMatch extends AbstractStringComparator {
|
public class CountryMatch extends AbstractStringComparator {
|
||||||
|
|
||||||
public CountryMatch(Map<String, String> params) {
|
public CountryMatch(Map<String, String> params) {
|
||||||
super(params, new com.wcohen.ss.JaroWinkler());
|
super(params, new com.wcohen.ss.JaroWinkler());
|
||||||
}
|
}
|
||||||
|
|
||||||
public CountryMatch(final double weight) {
|
public CountryMatch(final double weight) {
|
||||||
super(weight, new com.wcohen.ss.JaroWinkler());
|
super(weight, new com.wcohen.ss.JaroWinkler());
|
||||||
}
|
}
|
||||||
|
|
||||||
protected CountryMatch(final double weight, final AbstractStringDistance ssalgo) {
|
protected CountryMatch(final double weight, final AbstractStringDistance ssalgo) {
|
||||||
super(weight, ssalgo);
|
super(weight, ssalgo);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public double distance(final String a, final String b, final Config conf) {
|
public double distance(final String a, final String b, final Config conf) {
|
||||||
if (a.isEmpty() || b.isEmpty()) {
|
if (a.isEmpty() || b.isEmpty()) {
|
||||||
return -1.0; // return -1 if a field is missing
|
return -1.0; // return -1 if a field is missing
|
||||||
}
|
}
|
||||||
if (a.equalsIgnoreCase("unknown") || b.equalsIgnoreCase("unknown")) {
|
if (a.equalsIgnoreCase("unknown") || b.equalsIgnoreCase("unknown")) {
|
||||||
return -1.0; // return -1 if a country is UNKNOWN
|
return -1.0; // return -1 if a country is UNKNOWN
|
||||||
}
|
}
|
||||||
|
|
||||||
return a.equals(b) ? 1.0 : 0;
|
return a.equals(b) ? 1.0 : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public double getWeight() {
|
public double getWeight() {
|
||||||
return super.weight;
|
return super.weight;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected double normalize(final double d) {
|
protected double normalize(final double d) {
|
||||||
return d;
|
return d;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,10 +33,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.bulktag.community.*;
|
import eu.dnetlib.dhp.bulktag.community.*;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Context;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
@ -114,27 +111,35 @@ public class SparkBulkTagJob {
|
||||||
extendCommunityConfigurationForEOSC(spark, inputPath, cc);
|
extendCommunityConfigurationForEOSC(spark, inputPath, cc);
|
||||||
execBulkTag(
|
execBulkTag(
|
||||||
spark, inputPath, outputPath, protoMap, cc);
|
spark, inputPath, outputPath, protoMap, cc);
|
||||||
|
execEntityTag(
|
||||||
|
spark, inputPath + "organization", outputPath + "organization",
|
||||||
|
Utils.getCommunityOrganization(baseURL), Organization.class, TaggingConstants.CLASS_ID_ORGANIZATION,
|
||||||
|
TaggingConstants.CLASS_NAME_BULKTAG_ORGANIZATION);
|
||||||
|
execEntityTag(
|
||||||
|
spark, inputPath + "project", outputPath + "project", Utils.getCommunityProjects(baseURL),
|
||||||
|
Project.class, TaggingConstants.CLASS_ID_PROJECT, TaggingConstants.CLASS_NAME_BULKTAG_PROJECT);
|
||||||
execDatasourceTag(spark, inputPath, outputPath, Utils.getDatasourceCommunities(baseURL));
|
execDatasourceTag(spark, inputPath, outputPath, Utils.getDatasourceCommunities(baseURL));
|
||||||
execProjectTag(spark, inputPath, outputPath, Utils.getCommunityProjects(baseURL));
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void execProjectTag(SparkSession spark, String inputPath, String outputPath,
|
private static <E extends OafEntity> void execEntityTag(SparkSession spark, String inputPath, String outputPath,
|
||||||
CommunityEntityMap communityProjects) {
|
CommunityEntityMap communityEntity, Class<E> entityClass,
|
||||||
Dataset<Project> projects = readPath(spark, inputPath + "project", Project.class);
|
String classID, String calssName) {
|
||||||
|
Dataset<E> entity = readPath(spark, inputPath, entityClass);
|
||||||
Dataset<EntityCommunities> pc = spark
|
Dataset<EntityCommunities> pc = spark
|
||||||
.createDataset(
|
.createDataset(
|
||||||
communityProjects
|
communityEntity
|
||||||
.keySet()
|
.keySet()
|
||||||
.stream()
|
.stream()
|
||||||
.map(k -> EntityCommunities.newInstance(k, communityProjects.get(k)))
|
.map(k -> EntityCommunities.newInstance(k, communityEntity.get(k)))
|
||||||
.collect(Collectors.toList()),
|
.collect(Collectors.toList()),
|
||||||
Encoders.bean(EntityCommunities.class));
|
Encoders.bean(EntityCommunities.class));
|
||||||
|
|
||||||
projects
|
entity
|
||||||
.joinWith(pc, projects.col("id").equalTo(pc.col("entityId")), "left")
|
.joinWith(pc, entity.col("id").equalTo(pc.col("entityId")), "left")
|
||||||
.map((MapFunction<Tuple2<Project, EntityCommunities>, Project>) t2 -> {
|
.map((MapFunction<Tuple2<E, EntityCommunities>, E>) t2 -> {
|
||||||
Project ds = t2._1();
|
E ds = t2._1();
|
||||||
if (t2._2() != null) {
|
if (t2._2() != null) {
|
||||||
List<String> context = Optional
|
List<String> context = Optional
|
||||||
.ofNullable(ds.getContext())
|
.ofNullable(ds.getContext())
|
||||||
|
@ -156,8 +161,8 @@ public class SparkBulkTagJob {
|
||||||
false, TaggingConstants.BULKTAG_DATA_INFO_TYPE, true, false,
|
false, TaggingConstants.BULKTAG_DATA_INFO_TYPE, true, false,
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.qualifier(
|
.qualifier(
|
||||||
TaggingConstants.CLASS_ID_DATASOURCE,
|
classID,
|
||||||
TaggingConstants.CLASS_NAME_BULKTAG_DATASOURCE,
|
calssName,
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
ModelConstants.DNET_PROVENANCE_ACTIONS,
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS),
|
ModelConstants.DNET_PROVENANCE_ACTIONS),
|
||||||
"1")));
|
"1")));
|
||||||
|
@ -166,17 +171,17 @@ public class SparkBulkTagJob {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
return ds;
|
return ds;
|
||||||
}, Encoders.bean(Project.class))
|
}, Encoders.bean(entityClass))
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.json(outputPath + "project");
|
.json(outputPath);
|
||||||
|
|
||||||
readPath(spark, outputPath + "project", Project.class)
|
readPath(spark, outputPath, entityClass)
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.json(inputPath + "project");
|
.json(inputPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void execDatasourceTag(SparkSession spark, String inputPath, String outputPath,
|
private static void execDatasourceTag(SparkSession spark, String inputPath, String outputPath,
|
||||||
|
|
|
@ -13,6 +13,9 @@ public class TaggingConstants {
|
||||||
public static final String CLASS_ID_CZENODO = "community:zenodocommunity";
|
public static final String CLASS_ID_CZENODO = "community:zenodocommunity";
|
||||||
public static final String CLASS_ID_ADVANCED_CONSTRAINT = "community:advconstraint";
|
public static final String CLASS_ID_ADVANCED_CONSTRAINT = "community:advconstraint";
|
||||||
|
|
||||||
|
public static final String CLASS_ID_PROJECT = "community:project";
|
||||||
|
public static final String CLASS_ID_ORGANIZATION = "community:organization";
|
||||||
|
|
||||||
public static final String ZENODO_COMMUNITY_INDICATOR = "zenodo.org/communities/";
|
public static final String ZENODO_COMMUNITY_INDICATOR = "zenodo.org/communities/";
|
||||||
|
|
||||||
public static final String CLASS_NAME_BULKTAG_SUBJECT = "Bulktagging for Community - Subject";
|
public static final String CLASS_NAME_BULKTAG_SUBJECT = "Bulktagging for Community - Subject";
|
||||||
|
@ -20,5 +23,8 @@ public class TaggingConstants {
|
||||||
public static final String CLASS_NAME_BULKTAG_ZENODO = "Bulktagging for Community - Zenodo";
|
public static final String CLASS_NAME_BULKTAG_ZENODO = "Bulktagging for Community - Zenodo";
|
||||||
public static final String CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT = "Bulktagging for Community - Advanced Constraints";
|
public static final String CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT = "Bulktagging for Community - Advanced Constraints";
|
||||||
|
|
||||||
|
public static final String CLASS_NAME_BULKTAG_PROJECT = "Bulktagging for Community - Project";
|
||||||
|
public static final String CLASS_NAME_BULKTAG_ORGANIZATION = "Bulktagging for Community - Organization";
|
||||||
|
|
||||||
public static final String TAGGING_TRUST = "0.8";
|
public static final String TAGGING_TRUST = "0.8";
|
||||||
}
|
}
|
||||||
|
|
|
@ -465,6 +465,138 @@ public class BulkTagJobTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void organizationTag() throws Exception {
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/")
|
||||||
|
.getPath();
|
||||||
|
LocalFileSystem fs = FileSystem.getLocal(new Configuration());
|
||||||
|
fs
|
||||||
|
.copyFromLocalFile(
|
||||||
|
false, new org.apache.hadoop.fs.Path(getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/bulktag/pathMap/")
|
||||||
|
.getPath()),
|
||||||
|
new org.apache.hadoop.fs.Path(workingDir.toString() + "/data/bulktagging/protoMap"));
|
||||||
|
SparkBulkTagJob
|
||||||
|
.main(
|
||||||
|
new String[] {
|
||||||
|
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-sourcePath", sourcePath,
|
||||||
|
"-taggingConf", taggingConf,
|
||||||
|
|
||||||
|
"-outputPath", workingDir.toString() + "/",
|
||||||
|
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||||
|
|
||||||
|
"-pathMap", workingDir.toString() + "/data/bulktagging/protoMap/pathMap",
|
||||||
|
"-nameNode", "local"
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<Organization> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/organization")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Organization.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(4, tmp.count());
|
||||||
|
org.apache.spark.sql.Dataset<Organization> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(Organization.class));
|
||||||
|
|
||||||
|
verificationDataset.createOrReplaceTempView("organization");
|
||||||
|
|
||||||
|
String query = "select id, MyT.id community, MyD.provenanceaction.classid provenance, MyD.provenanceaction.classname name "
|
||||||
|
+ "from organization "
|
||||||
|
+ "lateral view explode(context) c as MyT "
|
||||||
|
+ "lateral view explode(MyT.datainfo) d as MyD "
|
||||||
|
+ "where MyD.inferenceprovenance = 'bulktagging'";
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query);
|
||||||
|
|
||||||
|
idExplodeCommunity.show(false);
|
||||||
|
|
||||||
|
Assertions.assertEquals(3, idExplodeCommunity.count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
3, idExplodeCommunity.filter("provenance = 'community:organization'").count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
3,
|
||||||
|
idExplodeCommunity
|
||||||
|
.filter("name = 'Bulktagging for Community - Organization'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'netherlands'").count());
|
||||||
|
Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'beopen'").count());
|
||||||
|
Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'mes'").count());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void projectTag() throws Exception {
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/")
|
||||||
|
.getPath();
|
||||||
|
LocalFileSystem fs = FileSystem.getLocal(new Configuration());
|
||||||
|
fs
|
||||||
|
.copyFromLocalFile(
|
||||||
|
false, new org.apache.hadoop.fs.Path(getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/bulktag/pathMap/")
|
||||||
|
.getPath()),
|
||||||
|
new org.apache.hadoop.fs.Path(workingDir.toString() + "/data/bulktagging/protoMap"));
|
||||||
|
SparkBulkTagJob
|
||||||
|
.main(
|
||||||
|
new String[] {
|
||||||
|
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-sourcePath", sourcePath,
|
||||||
|
"-taggingConf", taggingConf,
|
||||||
|
|
||||||
|
"-outputPath", workingDir.toString() + "/",
|
||||||
|
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||||
|
|
||||||
|
"-pathMap", workingDir.toString() + "/data/bulktagging/protoMap/pathMap",
|
||||||
|
"-nameNode", "local"
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<Project> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/project")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Project.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(4, tmp.count());
|
||||||
|
org.apache.spark.sql.Dataset<Project> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(Project.class));
|
||||||
|
|
||||||
|
verificationDataset.createOrReplaceTempView("project");
|
||||||
|
|
||||||
|
String query = "select id, MyT.id community, MyD.provenanceaction.classid provenance, MyD.provenanceaction.classname name "
|
||||||
|
+ "from project "
|
||||||
|
+ "lateral view explode(context) c as MyT "
|
||||||
|
+ "lateral view explode(MyT.datainfo) d as MyD "
|
||||||
|
+ "where MyD.inferenceprovenance = 'bulktagging'";
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query);
|
||||||
|
|
||||||
|
idExplodeCommunity.show(false);
|
||||||
|
|
||||||
|
Assertions.assertEquals(4, idExplodeCommunity.count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
4, idExplodeCommunity.filter("provenance = 'community:project'").count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
4,
|
||||||
|
idExplodeCommunity
|
||||||
|
.filter("name = 'Bulktagging for Community - Project'")
|
||||||
|
.count());
|
||||||
|
|
||||||
|
Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'enermaps'").count());
|
||||||
|
Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'clarin'").count());
|
||||||
|
Assertions.assertEquals(2, idExplodeCommunity.filter("community = 'dh-ch'").count());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void bulktagByZenodoCommunityTest() throws Exception {
|
void bulktagByZenodoCommunityTest() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -5,7 +5,6 @@ import java.io.StringReader;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.solr.ExternalReference;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
import org.dom4j.DocumentException;
|
import org.dom4j.DocumentException;
|
||||||
|
@ -31,6 +30,7 @@ import eu.dnetlib.dhp.schema.solr.Context;
|
||||||
import eu.dnetlib.dhp.schema.solr.Country;
|
import eu.dnetlib.dhp.schema.solr.Country;
|
||||||
import eu.dnetlib.dhp.schema.solr.Datasource;
|
import eu.dnetlib.dhp.schema.solr.Datasource;
|
||||||
import eu.dnetlib.dhp.schema.solr.EoscIfGuidelines;
|
import eu.dnetlib.dhp.schema.solr.EoscIfGuidelines;
|
||||||
|
import eu.dnetlib.dhp.schema.solr.ExternalReference;
|
||||||
import eu.dnetlib.dhp.schema.solr.Instance;
|
import eu.dnetlib.dhp.schema.solr.Instance;
|
||||||
import eu.dnetlib.dhp.schema.solr.Journal;
|
import eu.dnetlib.dhp.schema.solr.Journal;
|
||||||
import eu.dnetlib.dhp.schema.solr.Measure;
|
import eu.dnetlib.dhp.schema.solr.Measure;
|
||||||
|
@ -562,10 +562,16 @@ public class ProvisionModelSupport {
|
||||||
.orElse(null);
|
.orElse(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<ExternalReference> mapExternalReference(List<eu.dnetlib.dhp.schema.oaf.ExternalReference> externalReference) {
|
private static List<ExternalReference> mapExternalReference(
|
||||||
return Optional.ofNullable(externalReference)
|
List<eu.dnetlib.dhp.schema.oaf.ExternalReference> externalReference) {
|
||||||
.map(ext -> ext.stream()
|
return Optional
|
||||||
.map(e -> ExternalReference.newInstance(
|
.ofNullable(externalReference)
|
||||||
|
.map(
|
||||||
|
ext -> ext
|
||||||
|
.stream()
|
||||||
|
.map(
|
||||||
|
e -> ExternalReference
|
||||||
|
.newInstance(
|
||||||
e.getSitename(),
|
e.getSitename(),
|
||||||
e.getLabel(),
|
e.getLabel(),
|
||||||
e.getAlternateLabel(),
|
e.getAlternateLabel(),
|
||||||
|
@ -573,8 +579,8 @@ public class ProvisionModelSupport {
|
||||||
mapCodeLabel(e.getQualifier()),
|
mapCodeLabel(e.getQualifier()),
|
||||||
e.getRefidentifier(),
|
e.getRefidentifier(),
|
||||||
e.getQuery()))
|
e.getQuery()))
|
||||||
.collect(Collectors.toList()))
|
.collect(Collectors.toList()))
|
||||||
.orElse(Lists.newArrayList());
|
.orElse(Lists.newArrayList());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<Context> asContext(List<eu.dnetlib.dhp.schema.oaf.Context> ctxList,
|
private static List<Context> asContext(List<eu.dnetlib.dhp.schema.oaf.Context> ctxList,
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.provision;
|
package eu.dnetlib.dhp.oa.provision;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.solr.client.solrj.SolrQuery;
|
import org.apache.solr.client.solrj.SolrQuery;
|
||||||
|
@ -32,14 +33,13 @@ import org.junit.jupiter.api.io.TempDir;
|
||||||
import org.mockito.Mock;
|
import org.mockito.Mock;
|
||||||
import org.mockito.Mockito;
|
import org.mockito.Mockito;
|
||||||
import org.mockito.junit.jupiter.MockitoExtension;
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
|
||||||
|
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
@ExtendWith(MockitoExtension.class)
|
@ExtendWith(MockitoExtension.class)
|
||||||
public class SolrConfigExploreTest {
|
public class SolrConfigExploreTest {
|
||||||
|
@ -91,7 +91,7 @@ public class SolrConfigExploreTest {
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
conf.setAppName(XmlIndexingJobTest.class.getSimpleName());
|
conf.setAppName(XmlIndexingJobTest.class.getSimpleName());
|
||||||
conf.registerKryoClasses(new Class[] {
|
conf.registerKryoClasses(new Class[] {
|
||||||
SerializableSolrInputDocument.class
|
SerializableSolrInputDocument.class
|
||||||
});
|
});
|
||||||
|
|
||||||
conf.setMaster("local[1]");
|
conf.setMaster("local[1]");
|
||||||
|
@ -101,10 +101,10 @@ public class SolrConfigExploreTest {
|
||||||
conf.set("spark.sql.warehouse.dir", workingDir.resolve("spark").toString());
|
conf.set("spark.sql.warehouse.dir", workingDir.resolve("spark").toString());
|
||||||
|
|
||||||
spark = SparkSession
|
spark = SparkSession
|
||||||
.builder()
|
.builder()
|
||||||
.appName(SolrConfigExploreTest.class.getSimpleName())
|
.appName(SolrConfigExploreTest.class.getSimpleName())
|
||||||
.config(conf)
|
.config(conf)
|
||||||
.getOrCreate();
|
.getOrCreate();
|
||||||
|
|
||||||
// random unassigned HTTP port
|
// random unassigned HTTP port
|
||||||
final int jettyPort = 0;
|
final int jettyPort = 0;
|
||||||
|
@ -134,35 +134,35 @@ public class SolrConfigExploreTest {
|
||||||
|
|
||||||
log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString());
|
log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString());
|
||||||
log
|
log
|
||||||
.info(
|
.info(
|
||||||
CollectionAdminRequest.ClusterStatus
|
CollectionAdminRequest.ClusterStatus
|
||||||
.getClusterStatus()
|
.getClusterStatus()
|
||||||
.process(miniCluster.getSolrClient())
|
.process(miniCluster.getSolrClient())
|
||||||
.toString());
|
.toString());
|
||||||
|
|
||||||
NamedList<Object> res = createCollection(
|
NamedList<Object> res = createCollection(
|
||||||
miniCluster.getSolrClient(), SHADOW_COLLECTION, 4, 2, 20, CONFIG_NAME);
|
miniCluster.getSolrClient(), SHADOW_COLLECTION, 4, 2, 20, CONFIG_NAME);
|
||||||
res.forEach(o -> log.info(o.toString()));
|
res.forEach(o -> log.info(o.toString()));
|
||||||
|
|
||||||
// miniCluster.getSolrClient().setDefaultCollection(SHADOW_COLLECTION);
|
// miniCluster.getSolrClient().setDefaultCollection(SHADOW_COLLECTION);
|
||||||
|
|
||||||
res = createCollection(
|
res = createCollection(
|
||||||
miniCluster.getSolrClient(), PUBLIC_COLLECTION, 4, 2, 20, CONFIG_NAME);
|
miniCluster.getSolrClient(), PUBLIC_COLLECTION, 4, 2, 20, CONFIG_NAME);
|
||||||
res.forEach(o -> log.info(o.toString()));
|
res.forEach(o -> log.info(o.toString()));
|
||||||
|
|
||||||
admin = new SolrAdminApplication(miniCluster.getZkClient().getZkServerAddress());
|
admin = new SolrAdminApplication(miniCluster.getZkClient().getZkServerAddress());
|
||||||
CollectionAdminResponse rsp = (CollectionAdminResponse) admin
|
CollectionAdminResponse rsp = (CollectionAdminResponse) admin
|
||||||
.createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, PUBLIC_COLLECTION);
|
.createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, PUBLIC_COLLECTION);
|
||||||
assertEquals(0, rsp.getStatus());
|
assertEquals(0, rsp.getStatus());
|
||||||
rsp = (CollectionAdminResponse) admin.createAlias(ProvisionConstants.SHADOW_ALIAS_NAME, SHADOW_COLLECTION);
|
rsp = (CollectionAdminResponse) admin.createAlias(ProvisionConstants.SHADOW_ALIAS_NAME, SHADOW_COLLECTION);
|
||||||
assertEquals(0, rsp.getStatus());
|
assertEquals(0, rsp.getStatus());
|
||||||
|
|
||||||
log
|
log
|
||||||
.info(
|
.info(
|
||||||
CollectionAdminRequest.ClusterStatus
|
CollectionAdminRequest.ClusterStatus
|
||||||
.getClusterStatus()
|
.getClusterStatus()
|
||||||
.process(miniCluster.getSolrClient())
|
.process(miniCluster.getSolrClient())
|
||||||
.toString());
|
.toString());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -180,7 +180,8 @@ public class SolrConfigExploreTest {
|
||||||
|
|
||||||
new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
|
new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
|
||||||
.run(isLookupClient);
|
.run(isLookupClient);
|
||||||
Assertions.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
|
Assertions
|
||||||
|
.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
|
||||||
|
|
||||||
String[] queryStrings = {
|
String[] queryStrings = {
|
||||||
"cancer",
|
"cancer",
|
||||||
|
@ -200,7 +201,8 @@ public class SolrConfigExploreTest {
|
||||||
// System.out.println(rsp.getExplainMap());
|
// System.out.println(rsp.getExplainMap());
|
||||||
|
|
||||||
for (SolrDocument doc : rsp.getResults()) {
|
for (SolrDocument doc : rsp.getResults()) {
|
||||||
log.info(
|
log
|
||||||
|
.info(
|
||||||
doc.get("score") + "\t" +
|
doc.get("score") + "\t" +
|
||||||
doc.get("__indexrecordidentifier") + "\t" +
|
doc.get("__indexrecordidentifier") + "\t" +
|
||||||
doc.get("resultidentifier") + "\t" +
|
doc.get("resultidentifier") + "\t" +
|
||||||
|
@ -216,7 +218,7 @@ public class SolrConfigExploreTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards,
|
protected static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards,
|
||||||
int replicationFactor, int maxShardsPerNode, String configName) throws Exception {
|
int replicationFactor, int maxShardsPerNode, String configName) throws Exception {
|
||||||
ModifiableSolrParams modParams = new ModifiableSolrParams();
|
ModifiableSolrParams modParams = new ModifiableSolrParams();
|
||||||
modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name());
|
modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name());
|
||||||
modParams.set("name", name);
|
modParams.set("name", name);
|
||||||
|
|
|
@ -85,7 +85,8 @@ public class SolrConfigTest extends SolrTest {
|
||||||
|
|
||||||
new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
|
new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
|
||||||
.run(isLookupClient);
|
.run(isLookupClient);
|
||||||
Assertions.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
|
Assertions
|
||||||
|
.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
|
||||||
|
|
||||||
String[] queryStrings = {
|
String[] queryStrings = {
|
||||||
"cancer",
|
"cancer",
|
||||||
|
|
Loading…
Reference in New Issue