[Annotation] extention of bulk tagging to accomodate graph annotation

This commit is contained in:
Miriam Baglioni 2024-08-05 17:08:52 +02:00
parent 740cfa77fb
commit b5130583e5
29 changed files with 340 additions and 181 deletions

View File

@ -1,3 +1,4 @@
package eu.dnetlib.pace.tree;
import java.util.Map;
@ -11,37 +12,37 @@ import eu.dnetlib.pace.tree.support.ComparatorClass;
@ComparatorClass("countryMatch")
public class CountryMatch extends AbstractStringComparator {
public CountryMatch(Map<String, String> params) {
super(params, new com.wcohen.ss.JaroWinkler());
}
public CountryMatch(Map<String, String> params) {
super(params, new com.wcohen.ss.JaroWinkler());
}
public CountryMatch(final double weight) {
super(weight, new com.wcohen.ss.JaroWinkler());
}
public CountryMatch(final double weight) {
super(weight, new com.wcohen.ss.JaroWinkler());
}
protected CountryMatch(final double weight, final AbstractStringDistance ssalgo) {
super(weight, ssalgo);
}
protected CountryMatch(final double weight, final AbstractStringDistance ssalgo) {
super(weight, ssalgo);
}
@Override
public double distance(final String a, final String b, final Config conf) {
if (a.isEmpty() || b.isEmpty()) {
return -1.0; // return -1 if a field is missing
}
if (a.equalsIgnoreCase("unknown") || b.equalsIgnoreCase("unknown")) {
return -1.0; // return -1 if a country is UNKNOWN
}
@Override
public double distance(final String a, final String b, final Config conf) {
if (a.isEmpty() || b.isEmpty()) {
return -1.0; // return -1 if a field is missing
}
if (a.equalsIgnoreCase("unknown") || b.equalsIgnoreCase("unknown")) {
return -1.0; // return -1 if a country is UNKNOWN
}
return a.equals(b) ? 1.0 : 0;
}
return a.equals(b) ? 1.0 : 0;
}
@Override
public double getWeight() {
return super.weight;
}
@Override
public double getWeight() {
return super.weight;
}
@Override
protected double normalize(final double d) {
return d;
}
@Override
protected double normalize(final double d) {
return d;
}
}

View File

@ -0,0 +1,2 @@
sdgPath=/tmp/sdg_20240627_oaid_csv
outputPath=/tmp/miriam/sdgnodoi

View File

@ -0,0 +1,100 @@
{"affiliationId":{"schema":"RINGGOLD","value":"8244"},"departmentName":"Biology","endDate":"2019-05-19","orcid":"0000-0001-6291-9619","roleTitle":"Undergraduate Research Assistant","startDate":"2017-01-26"}
{"affiliationId":{"schema":"GRID","value":"grid.445941.9"},"departmentName":"Department architectural-building constructions","endDate":"","orcid":"0000-0001-6291-9619","roleTitle":"Assistant professor","startDate":"2014-09"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/02jx3x895"},"departmentName":"Learning and Leadership","endDate":"2012-09-01","orcid":"0000-0002-3210-3034","roleTitle":"Leturer"}
{"affiliationId":{"schema":"RINGGOLD","value":"445071"},"departmentName":"Fisheries and Aquaculture","endDate":"2006-01-012012-08-23","orcid":"0000-0002-9030-7609","roleTitle":"Technical Officer"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/05a28rw58"},"departmentName":"Institute of Environmental Engineering","endDate":"2023-11-012024-01-31","orcid":"0000-0002-9030-7609","roleTitle":"Visiting Researcher"}
{"affiliationId":{"schema":"","value":""},"departmentName":"Faculty of Engineering and Informatics","endDate":"2021-12-20","orcid":"0000-0003-0305-8980","roleTitle":"Lecturer"}
{"affiliationId":{"schema":"RINGGOLD","value":"26066"},"departmentName":"Obstetrics and Gynaecology","orcid":"0000-0003-0305-8980","roleTitle":"MD, PhD"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/00ey9xa07"},"departmentName":"Harbin Sport University","endDate":"2024-06-01","orcid":"0000-0003-0305-8980","roleTitle":"Student"}
{"affiliationId":{"schema":"","value":""},"departmentName":"KIPP DC Schools","endDate":"2016-01-012017-01-01","orcid":"0009-0004-7554-419X","roleTitle":"Middle School Science Teacher"}
{"affiliationId":{"schema":"RINGGOLD","value":"19374"},"departmentName":"Music Therapy","endDate":"","orcid":"0000-0002-5115-9762","roleTitle":"Dementia Program Director","startDate":"2017-10-01"}
{"affiliationId":{"schema":"RINGGOLD","value":"9144"},"departmentName":"Interdisciplinary Center for Scientific Computing - IWR","endDate":"2009-01-152012-10-31","orcid":"0000-0002-2004-4153","roleTitle":"PhD student"}
{"departmentName":"2nd Air Supply Maintenance Center Command","endDate":"2012-08-30","orcid":"0000-0002-4389-9744","roleTitle":"Production Planning Group Supervisor","startDate":"2008-09-01"}
{"affiliationId":{"schema":"","value":""},"departmentName":"MITAKY High-Tech Co., Ltd.","endDate":"2020-07-01","orcid":"0000-0001-7628-743X","roleTitle":"President & CEO"}
{"departmentName":"Ancash","endDate":"","orcid":"0000-0002-3861-2833","startDate":""}
{"departmentName":"CALDAS","endDate":"2010-06-17","orcid":"0000-0003-1077-4053","roleTitle":"COORDINADOR ESTUDIO DE RADIO Y TV","startDate":"1999-03-17"}
{"departmentName":"Institute of Sociogenesis and Social Dynamics","endDate":"","orcid":"0000-0001-6881-7760","startDate":""}
{"affiliationId":{"schema":"RINGGOLD","value":"27004"},"departmentName":"Biology","endDate":"1999-09-02","orcid":"0000-0002-8553-169X","roleTitle":"MCf Microbiology "}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/041nas322"},"departmentName":"Bonn Center for Dependency and Slavery Studies","endDate":"2019-01-012023-01-01","orcid":"0009-0002-9250-948X","roleTitle":"Predoctoral Research Associate / wiss. Mitarbeiter"}
{"departmentName":"Процессы и аппараты химических и пищевых производств","endDate":"","orcid":"0000-0002-1805-5670","roleTitle":"старший преподаватель","startDate":"2015-09-03"}
{"affiliationId":{"schema":"RINGGOLD","value":"95414"},"departmentName":"Department of Science, Technology and International relations","endDate":"2021-03-09","orcid":"0000-0002-7616-2482","roleTitle":"Researcher"}
{"affiliationId":{"schema":"RINGGOLD","value":"28730"},"departmentName":"Forensic Medicine & Toxicology","endDate":"2019-08-26","orcid":"0000-0001-7369-1744","roleTitle":"Senior Resident","startDate":"2016-08-27"}
{"departmentName":"Department of Functional & Comparative Genomics","endDate":"2015-10","orcid":"0000-0001-8059-8919","roleTitle":"Postdoctoral Research Associate, Fluorescence Chemical Sensors","startDate":"2014-08"}
{"affiliationId":{"schema":"RINGGOLD","value":"16771"},"departmentName":"Catedra UNESCO de Gestion y Politica Universitaria","endDate":"2016","orcid":"0000-0001-9437-6700","roleTitle":"Investigador","startDate":"2001"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/05v3pg621"},"departmentName":"Department of Multimedia Animation and Application","endDate":"","orcid":"0000-0003-2513-7065","roleTitle":"Professor","startDate":"2012-02-01"}
{"affiliationId":{"schema":"RINGGOLD","value":"246714"},"departmentName":"Delta Dental of Wisconsin","endDate":"","orcid":"0000-0003-2675-3206","roleTitle":"VP & Science Officer","startDate":"2006"}
{"affiliationId":{"schema":"RINGGOLD","value":"146895"},"departmentName":"Facultad de Ciencias Económicas, administrativas y Contables ","endDate":"2015-02-01","orcid":"0000-0001-9384-6395","roleTitle":"Coordinadora de Investigaciones-Facultad CEAC"}
{"affiliationId":{"schema":"RINGGOLD","value":"8367"},"departmentName":"Microbiology, Immunology and Tropical Medicine","endDate":"2019-10-01","orcid":"0000-0002-2349-263X","roleTitle":"Assistant Professor"}
{"affiliationId":{"schema":"RINGGOLD","value":"129705"},"departmentName":"Institut de Recherche en Informatique Fondamentale","orcid":"0000-0003-0287-6252","roleTitle":""}
{"affiliationId":{"schema":"RINGGOLD","value":"119726"},"departmentName":"Computer Engineering/MIS","endDate":"2018-09-15","orcid":"0000-0003-0014-5106","roleTitle":"Assoc.Prof.Dr."}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/00wbwde85"},"departmentName":"school","endDate":"2021-03-01","orcid":"0009-0007-7585-0594","roleTitle":"mahasiswa"}
{"affiliationId":{"schema":"RINGGOLD","value":"9171"},"departmentName":"Computer Science Dept.","endDate":"","orcid":"0000-0002-3202-2904","startDate":"2002-04-01"}
{"affiliationId":{"schema":"GRID","value":"grid.474837.b"},"departmentName":"Gastroenterology","endDate":"","orcid":"0000-0003-0705-5760","startDate":""}
{"affiliationId":{"schema":"","value":""},"departmentName":"School of Physical and Occupational Therapy","endDate":"2018-03-012021-12-31","orcid":"0000-0002-8406-5228","roleTitle":"Postdoctoral Fellow"}
{"affiliationId":{"schema":"","value":""},"departmentName":"Xinqiao Hospital","orcid":"0000-0001-6200-2309","roleTitle":""}
{"affiliationId":{"schema":"","value":""},"departmentName":"DIPARTIMENTO DI PATOLOGIA CHIRURGICA, MEDICA, MOLECOLARE E DELL'AREA CRITICA","endDate":"2018-10-012021-10-01","orcid":"0000-0002-5588-2608","roleTitle":"Ricercatori a tempo determinato"}
{"affiliationId":{"schema":"","value":""},"departmentName":"ECE","endDate":"2021-01-012022-02-01","orcid":"0000-0002-8729-0287","roleTitle":"Visiting Scholar"}
{"departmentName":"Mantenimiento","endDate":"1998-01-01","orcid":"0000-0002-8663-2716","roleTitle":"Project coordinator","startDate":"1994-01-01"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/012wtwr40"},"departmentName":"Centro Universitário Newton Paiva","endDate":"2012-07","orcid":"0000-0002-1725-1805","roleTitle":"Professor Assistente","startDate":"2005-09"}
{"departmentName":"DFC","endDate":"","orcid":"0000-0003-3764-9500","roleTitle":"Professora Substituta","startDate":"2018-02-01"}
{"affiliationId":{"schema":"RINGGOLD","value":"10107"},"departmentName":"Automated Lab - Women's & Children's Hospital site","endDate":"1995-09-052018-09-09","orcid":"0000-0002-5594-9737","roleTitle":"Medical Scientist"}
{"affiliationId":{"schema":"RINGGOLD","value":"33784"},"departmentName":"Computer Science","endDate":"2023-06-04","orcid":"0009-0000-6585-6246","roleTitle":"Visiting Assistant Professor","startDate":"2023-01-09"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/0280a3n32"},"departmentName":"Research","endDate":"2022-06","orcid":"0000-0002-0846-9503","roleTitle":"Research Assistant","startDate":"2019-06"}
{"affiliationId":{"schema":"GRID","value":"grid.8657.c"},"departmentName":"Finnish Meteorological Institute","endDate":"2019-06-01","orcid":"0000-0002-4826-2929","roleTitle":""}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/05290cv24"},"departmentName":"Dipartimento di Informatica e Tecnologie dell'Informazione","endDate":"2022-11-012025-11-01","orcid":"0009-0000-6476-8092","roleTitle":"PhD Student"}
{"affiliationId":{"schema":"GRID","value":"grid.495082.2"},"departmentName":"Laboratory of water bodies sanitaric microbiology and human microbial ecology","endDate":"2017-01-01","orcid":"0000-0003-1194-7251","roleTitle":"Senior reseacher "}
{"affiliationId":{"schema":"RINGGOLD","value":"150713"},"departmentName":"Education","endDate":"2013-09-01","orcid":"0000-0002-2489-1202","roleTitle":"Doctor of Education"}
{"departmentName":"Office of Risk Management","endDate":"","orcid":"0000-0003-2772-313X","roleTitle":"Senior Policy Advisor","startDate":"2014-09-04"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/04pe1sa24"},"departmentName":"Facultad de Estudios Globales y Hospitalidad","endDate":"","orcid":"0009-0003-4270-4196","roleTitle":"Docente en las Carreras de Licenciatura en Turismo y Relaciones Internacionales","startDate":"2023-06-12"}
{"affiliationId":{"schema":"","value":""},"departmentName":"Civil & Mechanical Engineering","endDate":"2014-06-012020-06-30","orcid":"0000-0001-6598-2525","roleTitle":"Assistant Professor"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/02ytfzr55"},"departmentName":"Department of Civil Engineering ","endDate":"2022-03-212024-02-27","orcid":"0000-0002-9572-1358","roleTitle":"Temporary Faculty"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/04wdt0z89"},"departmentName":"library","endDate":"2024-03-01","orcid":"0009-0002-8124-1772","roleTitle":"library it"}
{"departmentName":"Kalil e Pires Advogados","endDate":"","orcid":"0009-0001-3403-0297","roleTitle":"Estagiário","startDate":"2023-03-13"}
{"affiliationId":{"schema":"","value":""},"departmentName":"Coronel Institute of Occupational Health","endDate":"2019-01-01","orcid":"0000-0002-0461-4013","roleTitle":"Principal Investigator"}
{"affiliationId":{"schema":"GRID","value":"grid.5801.c"},"departmentName":"Health Sciences and Technology","orcid":"0000-0002-1651-0457","roleTitle":"Doctoral student"}
{"departmentName":"Prefeitura de Teresina","endDate":"","orcid":"0000-0002-8148-4179","startDate":""}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/00k8rrx20"},"departmentName":"Prosseguir","endDate":"2023-05-012024-05-01","orcid":"0000-0001-5147-3455","roleTitle":"Coordenadora Pedagógica Regional do Prosseguir em Manaus"}
{"affiliationId":{"schema":"RINGGOLD","value":"381864"},"departmentName":"Pharmaron Beijing Co Ltd","endDate":"","orcid":"0000-0003-2165-740X","startDate":"2015-10-27"}
{"affiliationId":{"schema":"RINGGOLD","value":"183390"},"departmentName":"Instituto Tecnológico Superior de Irapuato","endDate":"","orcid":"0000-0003-2101-5917","startDate":"2018-11-01"}
{"affiliationId":{"schema":"RINGGOLD","value":"384754"},"departmentName":"SynCat@Beijing","orcid":"0000-0002-1050-2165","roleTitle":"Vice Director"}
{"affiliationId":{"schema":"RINGGOLD","value":"282795"},"departmentName":"Setor de Coleções Científicas","endDate":"","orcid":"0000-0003-3755-0025","roleTitle":"Estagiária","startDate":"2019-11"}
{"affiliationId":{"schema":"RINGGOLD","value":"434589"},"departmentName":"Chemistry","endDate":"2015-05-25","orcid":"0000-0001-5861-4425","roleTitle":"Lecturer"}
{"affiliationId":{"schema":"","value":""},"departmentName":"Cajamarca","endDate":"2023-11-15","orcid":"0000-0003-1524-3315","roleTitle":"Asistente Administrativo Provincial"}
{"affiliationId":{"schema":"","value":""},"departmentName":"Seduc ma","endDate":"2012-01-24","orcid":"0000-0003-3142-356X","roleTitle":""}
{"departmentName":"Rede Particular de Ensino","endDate":"2021","orcid":"0000-0002-4771-2131","roleTitle":"Professora de Artes Cênicas","startDate":"2018"}
{"affiliationId":{"schema":"FUNDREF","value":"http://dx.doi.org/10.13039/100009042"},"departmentName":"Derecho del Trabajo y de la Seguridad Social","endDate":"2002-12-01","orcid":"0000-0002-1275-5289","roleTitle":"Catedrático de Derecho del Trabajo y de la Seguridad Social"}
{"departmentName":"Department of Ethology","endDate":"2011-01-02","orcid":"0000-0003-1436-7324","roleTitle":"Research Assistant","startDate":"2007-09-01"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/04ka8rx28"},"departmentName":"Mechanical Engineering","orcid":"0009-0006-6397-2183","roleTitle":""}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/002qhr126"},"departmentName":"Theatre","orcid":"0009-0001-6531-9624","roleTitle":"theacher"}
{"affiliationId":{"schema":"","value":""},"departmentName":"Institute of Molecular Medicine, Renji Hospital, School of Medicine.","endDate":"2021-01-27","orcid":"0000-0003-0399-1201","roleTitle":"Associate Professor"}
{"affiliationId":{"schema":"","value":""},"departmentName":"Cell Biology","endDate":"2013-11-012014-07-01","orcid":"0000-0003-1489-4757","roleTitle":"Research Scolarship for Undergraduate Students"}
{"affiliationId":{"schema":"RINGGOLD","value":"346985"},"departmentName":"Maternidade","endDate":"","orcid":"0000-0002-6985-9679","roleTitle":"Enfermeira/UTI neonatal","startDate":"2019-02-26"}
{"affiliationId":{"schema":"RINGGOLD","value":"41726"},"departmentName":"Area Team - Biodiversity","endDate":"2006-01-012008-01-01","orcid":"0000-0002-6553-3786","roleTitle":"Catchment Biodiversity Technical Officer"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/04z7qrj66"},"departmentName":"Merchant Marine College","orcid":"0009-0003-6812-3576","roleTitle":""}
{"affiliationId":{"schema":"","value":""},"departmentName":"Lima","endDate":"2022-03-25","orcid":"0000-0002-9262-5619","roleTitle":"Docente Universitario"}
{"affiliationId":{"schema":"","value":""},"departmentName":"University of Bristol","orcid":"0000-0002-9793-3485","roleTitle":""}
{"departmentName":"US Geological Survey, Ecosystems Mission Area, Cooperative Fish and Wildlife Research Units Program","endDate":"","orcid":"0000-0002-8638-6682","startDate":"2011-06-01"}
{"affiliationId":{"schema":"RINGGOLD","value":"28666"},"departmentName":"English","endDate":"","orcid":"0000-0001-5361-109X","roleTitle":"Assistant Professor","startDate":"2019-07-05"}
{"affiliationId":{"schema":"","value":""},"departmentName":"Centre for Earth System Science","endDate":"2010-03-012012-11-01","orcid":"0000-0001-5323-4431","roleTitle":"Researcher and Executive Officer"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/05bjd0w70"},"departmentName":"Education","endDate":"2013-08-15","orcid":"0000-0001-5960-0586","roleTitle":"Associate Professor and Chair, Department of Education","startDate":"2002-08-15"}
{"affiliationId":{"schema":"RINGGOLD","value":"632513"},"departmentName":"Board ","endDate":"2020-01-01","orcid":"0000-0002-4222-4518","roleTitle":"Boardmember"}
{"affiliationId":{"schema":"GRID","value":"grid.22657.34"},"departmentName":"Faculty of Food Technology","endDate":"2017-01-01","orcid":"0000-0003-2606-8380","roleTitle":"guest scientific assistant, guest researcher, guest lecturer"}
{"affiliationId":{"schema":"FUNDREF","value":"http://dx.doi.org/10.13039/501100008331"},"departmentName":"Radiology","endDate":"","orcid":"0000-0001-6249-450X","startDate":"2016-03-01"}
{"departmentName":"кафедра физики","endDate":"","orcid":"0000-0001-6786-838X","roleTitle":"доцент","startDate":"1981-11-28"}
{"affiliationId":{"schema":"RINGGOLD","value":"6429"},"departmentName":"Molecular and Cellular Physiology","endDate":"2010-01-012013-01-01","orcid":"0000-0002-5538-0464","roleTitle":"Research-Associate"}
{"affiliationId":{"schema":"RINGGOLD","value":"48455"},"departmentName":"Clinical Biochem","endDate":"2016-12-30","orcid":"0000-0002-9563-8044","roleTitle":"Associate Professor","startDate":"2008"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/01xf75524"},"departmentName":"Molecular Oncology","endDate":"2022-01-01","orcid":"0000-0003-0928-003X","roleTitle":""}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/022kthw22"},"departmentName":"Anesthesiology and Perioperative Medicine","endDate":"2022-07-01","orcid":"0000-0001-7410-7271","roleTitle":"Postdoctoral Researcher"}
{"affiliationId":{"schema":"RINGGOLD","value":"125792"},"departmentName":"Environmental Management and Toxicology","endDate":"2006-06-20","orcid":"0000-0001-7855-4183","roleTitle":"Instructional/Tutorial Facilitator"}
{"departmentName":"2004 2007 | Teacher of Fiqh and Usul-al-Fiqh | Islamic University | Moscow, Russia","endDate":"","orcid":"0000-0001-8386-4426","startDate":""}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/0190ak572"},"departmentName":"Biology","endDate":"2024-06-01","orcid":"0009-0001-6766-7876","roleTitle":"Research assistant"}
{"affiliationId":{"schema":"RINGGOLD","value":"16763"},"departmentName":"Education","orcid":"0000-0003-2355-4682","roleTitle":"Profesora titular de Universidad/ Senior Lecturer "}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/0406jsq08"},"departmentName":"Farmácia","endDate":"2023-03-01","orcid":"0009-0009-1661-5046","roleTitle":"Residente farmacêutico"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/02aqsxs83"},"departmentName":"School of Biological Sciences","endDate":"2021-08-16","orcid":"0000-0002-1696-1952","roleTitle":"Assistant Professor"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/00s582s04"},"departmentName":"cajamarca","endDate":"2024-01-01","orcid":"0009-0001-0970-2741","roleTitle":"BACHILLER"}
{"affiliationId":{"schema":"RINGGOLD","value":"186027"},"departmentName":"РЯиК","endDate":"","orcid":"0000-0002-1000-5441","roleTitle":"старший преподаватель","startDate":"2007-09-01"}
{"affiliationId":{"schema":"RINGGOLD","value":"47910"},"departmentName":"Faculty of Life Science and Technology","endDate":"2012-07-082015-10-01","orcid":"0000-0001-7533-998X","roleTitle":"Lecturer"}
{"affiliationId":{"schema":"RINGGOLD","value":"2234"},"departmentName":"Education","endDate":"","orcid":"0000-0001-6123-8483","roleTitle":"Assistant Professor","startDate":"2019-01-07"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/04qkymg17"},"departmentName":"genera surgical ward","endDate":"2013-10-012015-02-01","orcid":"0009-0009-7638-0453","roleTitle":"Registered Nurse"}
{"affiliationId":{"schema":"RINGGOLD","value":"575342"},"departmentName":"GBUZ Naucno-prakticeskij psihonevrologiceskij centr imeni Z P Solov'eva Departamenta zdravoohranenia goroda Moskvy","endDate":"2022-11-01","orcid":"0000-0002-0344-9765","roleTitle":""}
{"affiliationId":{"schema":"","value":""},"departmentName":"SERVICE DE CHIRURGIE ORTHOPÉDIQUE ET TRAUMATOLOGIE","endDate":"2024-02-01","orcid":"0009-0003-2119-1766","roleTitle":"CHIRURGIEN ORTHOPEDISTE"}

View File

@ -0,0 +1 @@
{"doi":"https://doi.org/10.1007/978-3-030-75768-7","OpenAlexId":"https://openalex.org/W2124362779"}

View File

@ -8,7 +8,6 @@ import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
@ -23,7 +22,6 @@ import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
@ -32,6 +30,8 @@ import eu.dnetlib.dhp.api.model.CommunityEntityMap;
import eu.dnetlib.dhp.api.model.EntityCommunities;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.bulktag.community.*;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Context;
@ -93,9 +93,10 @@ public class SparkBulkTagJob {
ProtoMap protoMap = new Gson().fromJson(temp, ProtoMap.class);
log.info("pathMap: {}", new Gson().toJson(protoMap));
SelectionConstraints taggingConstraints = new Gson()
.fromJson(parser.get("taggingCriteria"), SelectionConstraints.class);
taggingConstraints.setSelection(VerbResolverFactory.newInstance());
TaggingConstraints taggingConstraints = new Gson()
.fromJson(parser.get("taggingCriteria"), TaggingConstraints.class);
taggingConstraints.getTags().forEach(t -> t.setSelection(VerbResolverFactory.newInstance()));
SparkConf conf = new SparkConf();
CommunityConfiguration cc;
@ -277,13 +278,8 @@ public class SparkBulkTagJob {
String outputPath,
ProtoMap protoMappingParams,
CommunityConfiguration communityConfiguration,
SelectionConstraints taggingConstraints) {
TaggingConstraints taggingConstraints) {
try {
System.out.println(new ObjectMapper().writeValueAsString(protoMappingParams));
} catch (JsonProcessingException e) {
throw new RuntimeException(e);
}
ModelSupport.entityTypes
.keySet()
.parallelStream()
@ -295,30 +291,22 @@ public class SparkBulkTagJob {
readPath(spark, inputPath + e.name(), resultClazz)
.map(patchResult(), Encoders.bean(resultClazz))
.filter(Objects::nonNull)
.map((MapFunction<R, Tagging>) value -> resultTagger
.map(
(MapFunction<R, R>) value -> resultTagger
.enrichContextCriteria(
value, communityConfiguration, protoMappingParams, taggingConstraints),
Encoders.bean(Tagging.class))
Encoders.bean(resultClazz))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + e.name());// writing the tagging in the working dir for entity
readPath(spark, outputPath + e.name(), Tagging.class)
.map((MapFunction<Tagging, R>) t -> (R) t.getResult(), Encoders.bean(resultClazz) )// copy the tagging in the actual result output path
readPath(spark, outputPath + e.name(), resultClazz)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(inputPath + e.name());
readPath(spark, outputPath + e.name(), Tagging.class)
.map((MapFunction<Tagging, String>) t -> t.getTag(), Encoders.STRING() )// copy the tagging in the actual result output path
.filter(Objects::nonNull)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json("/user/miriam.baglioni/graphTagging/" + e.name());
});
}

View File

@ -1,32 +1,34 @@
package eu.dnetlib.dhp.bulktag;
import java.io.Serializable;
import eu.dnetlib.dhp.schema.oaf.Result;
public class Tagging <R extends Result> implements Serializable {
private String tag;
private R result;
public class Tagging<R extends Result> implements Serializable {
private String tag;
private R result;
public String getTag() {
return tag;
}
public String getTag() {
return tag;
}
public void setTag(String tag) {
this.tag = tag;
}
public void setTag(String tag) {
this.tag = tag;
}
public R getResult() {
return result;
}
public R getResult() {
return result;
}
public void setResult(R result) {
this.result = result;
}
public void setResult(R result) {
this.result = result;
}
public static <R extends Result> Tagging newInstance(R result, String tag){
Tagging t = new Tagging<>();
t.result = result;
t.tag = tag;
return t;
}
public static <R extends Result> Tagging newInstance(R result, String tag) {
Tagging t = new Tagging<>();
t.result = result;
t.tag = tag;
return t;
}
}

View File

@ -10,8 +10,6 @@ import java.lang.reflect.Method;
import java.util.*;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.bulktag.Tagging;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -21,6 +19,7 @@ import com.jayway.jsonpath.DocumentContext;
import com.jayway.jsonpath.JsonPath;
import com.jayway.jsonpath.PathNotFoundException;
import eu.dnetlib.dhp.bulktag.Tagging;
import eu.dnetlib.dhp.bulktag.actions.MapModel;
import eu.dnetlib.dhp.bulktag.actions.Parameters;
import eu.dnetlib.dhp.bulktag.eosc.EoscIFTag;
@ -93,18 +92,18 @@ public class ResultTagger implements Serializable {
}
public <R extends Result> Tagging enrichContextCriteria(
final R result, final CommunityConfiguration conf, final Map<String, MapModel> criteria, SelectionConstraints taggingConstraints)
public <R extends Result> R enrichContextCriteria(
final R result, final CommunityConfiguration conf, final Map<String, MapModel> criteria,
TaggingConstraints taggingConstraints)
throws InvocationTargetException, NoSuchMethodException {
// Verify if the entity is deletedbyinference. In case verify if to clean the context list
// from all the zenodo communities
if (result.getDataInfo().getDeletedbyinference()) {
clearContext(result);
return Tagging.newInstance(result, null);
return result;
}
String retString = null;
final Map<String, List<String>> param = getParamMap(result, criteria);
// Execute the EOSCTag for the services
@ -123,8 +122,11 @@ public class ResultTagger implements Serializable {
}
//adding code for tagging of results searching supplementaryMaterial
if(taggingConstraints.getCriteria().stream().anyMatch(crit -> crit.verifyCriteria(param)))
retString = "supplementary";
final Set<String> tags = new HashSet<>();
taggingConstraints.getTags().forEach(t -> {
if (t.getCriteria().stream().anyMatch(crit -> crit.verifyCriteria(param)))
tags.add(t.getTagId());
});
// communities contains all the communities to be not added to the context
final Set<String> removeCommunities = new HashSet<>();
@ -253,10 +255,26 @@ public class ResultTagger implements Serializable {
clearContext(result);
/* Verify if there is something to bulktag */
if (communities.isEmpty()) {
return Tagging.newInstance(result, retString);
if (communities.isEmpty() && tags.isEmpty()) {
return result;
}
tags.forEach(t -> {
Context con = new Context();
con.setId(t);
List<DataInfo> dataInfoList = Arrays
.asList(
OafMapperUtils
.dataInfo(
false, ANNOTATION_DATA_INFO_TYPE, true, false,
OafMapperUtils
.qualifier(
CLASS_ID_ANNOTATION, CLASS_NAME_ANNOTATION, DNET_PROVENANCE_ACTIONS,
DNET_PROVENANCE_ACTIONS),
TAGGING_TRUST));
result.getContext().add(con);
});
result.getContext().forEach(c -> {
final String cId = c.getId();
if (communities.contains(cId)) {
@ -321,7 +339,7 @@ public class ResultTagger implements Serializable {
result.getContext().stream().map(Context::getId).collect(Collectors.toSet()));
if (communities.isEmpty())
return Tagging.newInstance(result, retString);
return result;
List<Context> toaddcontext = communities
.stream()
@ -381,7 +399,7 @@ public class ResultTagger implements Serializable {
.collect(Collectors.toList());
result.getContext().addAll(toaddcontext);
return Tagging.newInstance(result, retString);
return result;
}
}

View File

@ -8,6 +8,10 @@ public class TaggingConstants {
public static final String BULKTAG_DATA_INFO_TYPE = "bulktagging";
public static final String ANNOTATION_DATA_INFO_TYPE = "annotation";
public static final String CLASS_ID_ANNOTATION = "graph:annotation";
public static final String CLASS_NAME_ANNOTATION = "Graph Annotation";
public static final String CLASS_ID_SUBJECT = "community:subject";
public static final String CLASS_ID_DATASOURCE = "community:datasource";
public static final String CLASS_ID_CZENODO = "community:zenodocommunity";

View File

@ -0,0 +1,14 @@
package eu.dnetlib.dhp.bulktag.community;
public class TaggingConstraint extends SelectionConstraints {
private String tagId;
public String getTagId() {
return tagId;
}
public void setTagId(String tagId) {
this.tagId = tagId;
}
}

View File

@ -0,0 +1,16 @@
package eu.dnetlib.dhp.bulktag.community;
import java.util.List;
public class TaggingConstraints {
private List<TaggingConstraint> tags;
public List<TaggingConstraint> getTags() {
return tags;
}
public void setTags(List<TaggingConstraint> tags) {
this.tags = tags;
}
}

View File

@ -0,0 +1,4 @@
sourcePath=/tmp/miriam/12_graph_copy
pathMap=/data/bulktagging/pathMap
baseURL=https://services.openaire.eu/openaire/community/
taggingCriteria={"tags":[{"id":"SM","criteria":[{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary material for"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary document for"},{"verb":"starts_with_caseinsensitive","field":"title","value":"figure"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary figure"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplemental figure"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary table"},{"verb":"starts_with_caseinsensitive","field":"title","value":"table for"}]}]}]}

View File

@ -122,9 +122,9 @@ public class BulkTagJobTest {
"-taggingConf", taggingConf,
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -162,9 +162,9 @@ public class BulkTagJobTest {
"-taggingConf", taggingConf,
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -271,8 +271,8 @@ public class BulkTagJobTest {
"-pathMap", workingDir.toString() + "/data/bulktagging/protoMap",
"-nameNode", "local",
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/"
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/"
});
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
@ -354,9 +354,9 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -438,7 +438,7 @@ public class BulkTagJobTest {
"-pathMap", workingDir.toString() + "/data/bulktagging/protoMap/pathMap",
"-nameNode", "local",
"-taggingCriteria", taggingCriteria
"-taggingCriteria", taggingCriteria
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -496,9 +496,9 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -620,9 +620,9 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -751,9 +751,9 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -854,9 +854,9 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -900,9 +900,9 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -957,9 +957,9 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1009,9 +1009,9 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1231,9 +1231,9 @@ public class BulkTagJobTest {
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1352,9 +1352,9 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1475,9 +1475,9 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1518,9 +1518,9 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1566,9 +1566,9 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1606,9 +1606,9 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1684,9 +1684,9 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1725,9 +1725,9 @@ public class BulkTagJobTest {
// "-baseURL", "https://services.openaire.eu/openaire/community/",
"-pathMap", pathMap,
"-taggingConf", taggingConf,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1769,9 +1769,9 @@ public class BulkTagJobTest {
"/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf_publicationdate.xml")),
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1874,9 +1874,9 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", workingDir.toString() + "/data/bulktagging/protoMap/pathMap",
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
}

View File

@ -5,7 +5,6 @@ import java.io.StringReader;
import java.util.*;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.solr.ExternalReference;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document;
import org.dom4j.DocumentException;
@ -31,6 +30,7 @@ import eu.dnetlib.dhp.schema.solr.Context;
import eu.dnetlib.dhp.schema.solr.Country;
import eu.dnetlib.dhp.schema.solr.Datasource;
import eu.dnetlib.dhp.schema.solr.EoscIfGuidelines;
import eu.dnetlib.dhp.schema.solr.ExternalReference;
import eu.dnetlib.dhp.schema.solr.Instance;
import eu.dnetlib.dhp.schema.solr.Journal;
import eu.dnetlib.dhp.schema.solr.Measure;
@ -562,10 +562,16 @@ public class ProvisionModelSupport {
.orElse(null);
}
private static List<ExternalReference> mapExternalReference(List<eu.dnetlib.dhp.schema.oaf.ExternalReference> externalReference) {
return Optional.ofNullable(externalReference)
.map(ext -> ext.stream()
.map(e -> ExternalReference.newInstance(
private static List<ExternalReference> mapExternalReference(
List<eu.dnetlib.dhp.schema.oaf.ExternalReference> externalReference) {
return Optional
.ofNullable(externalReference)
.map(
ext -> ext
.stream()
.map(
e -> ExternalReference
.newInstance(
e.getSitename(),
e.getLabel(),
e.getAlternateLabel(),
@ -573,8 +579,8 @@ public class ProvisionModelSupport {
mapCodeLabel(e.getQualifier()),
e.getRefidentifier(),
e.getQuery()))
.collect(Collectors.toList()))
.orElse(Lists.newArrayList());
.collect(Collectors.toList()))
.orElse(Lists.newArrayList());
}
private static List<Context> asContext(List<eu.dnetlib.dhp.schema.oaf.Context> ctxList,

View File

@ -1,12 +1,13 @@
package eu.dnetlib.dhp.oa.provision;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.nio.file.Path;
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.solr.client.solrj.SolrQuery;
@ -32,14 +33,13 @@ import org.junit.jupiter.api.io.TempDir;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.junit.jupiter.MockitoExtension;
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.junit.jupiter.api.Assertions.assertEquals;
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ExtendWith(MockitoExtension.class)
public class SolrConfigExploreTest {
@ -91,7 +91,7 @@ public class SolrConfigExploreTest {
SparkConf conf = new SparkConf();
conf.setAppName(XmlIndexingJobTest.class.getSimpleName());
conf.registerKryoClasses(new Class[] {
SerializableSolrInputDocument.class
SerializableSolrInputDocument.class
});
conf.setMaster("local[1]");
@ -101,10 +101,10 @@ public class SolrConfigExploreTest {
conf.set("spark.sql.warehouse.dir", workingDir.resolve("spark").toString());
spark = SparkSession
.builder()
.appName(SolrConfigExploreTest.class.getSimpleName())
.config(conf)
.getOrCreate();
.builder()
.appName(SolrConfigExploreTest.class.getSimpleName())
.config(conf)
.getOrCreate();
// random unassigned HTTP port
final int jettyPort = 0;
@ -134,35 +134,35 @@ public class SolrConfigExploreTest {
log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString());
log
.info(
CollectionAdminRequest.ClusterStatus
.getClusterStatus()
.process(miniCluster.getSolrClient())
.toString());
.info(
CollectionAdminRequest.ClusterStatus
.getClusterStatus()
.process(miniCluster.getSolrClient())
.toString());
NamedList<Object> res = createCollection(
miniCluster.getSolrClient(), SHADOW_COLLECTION, 4, 2, 20, CONFIG_NAME);
miniCluster.getSolrClient(), SHADOW_COLLECTION, 4, 2, 20, CONFIG_NAME);
res.forEach(o -> log.info(o.toString()));
// miniCluster.getSolrClient().setDefaultCollection(SHADOW_COLLECTION);
res = createCollection(
miniCluster.getSolrClient(), PUBLIC_COLLECTION, 4, 2, 20, CONFIG_NAME);
miniCluster.getSolrClient(), PUBLIC_COLLECTION, 4, 2, 20, CONFIG_NAME);
res.forEach(o -> log.info(o.toString()));
admin = new SolrAdminApplication(miniCluster.getZkClient().getZkServerAddress());
CollectionAdminResponse rsp = (CollectionAdminResponse) admin
.createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, PUBLIC_COLLECTION);
.createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, PUBLIC_COLLECTION);
assertEquals(0, rsp.getStatus());
rsp = (CollectionAdminResponse) admin.createAlias(ProvisionConstants.SHADOW_ALIAS_NAME, SHADOW_COLLECTION);
assertEquals(0, rsp.getStatus());
log
.info(
CollectionAdminRequest.ClusterStatus
.getClusterStatus()
.process(miniCluster.getSolrClient())
.toString());
.info(
CollectionAdminRequest.ClusterStatus
.getClusterStatus()
.process(miniCluster.getSolrClient())
.toString());
}
@ -180,7 +180,8 @@ public class SolrConfigExploreTest {
new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
.run(isLookupClient);
Assertions.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
Assertions
.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
String[] queryStrings = {
"cancer",
@ -200,7 +201,8 @@ public class SolrConfigExploreTest {
// System.out.println(rsp.getExplainMap());
for (SolrDocument doc : rsp.getResults()) {
log.info(
log
.info(
doc.get("score") + "\t" +
doc.get("__indexrecordidentifier") + "\t" +
doc.get("resultidentifier") + "\t" +
@ -216,7 +218,7 @@ public class SolrConfigExploreTest {
}
protected static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards,
int replicationFactor, int maxShardsPerNode, String configName) throws Exception {
int replicationFactor, int maxShardsPerNode, String configName) throws Exception {
ModifiableSolrParams modParams = new ModifiableSolrParams();
modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name());
modParams.set("name", name);

View File

@ -85,7 +85,8 @@ public class SolrConfigTest extends SolrTest {
new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
.run(isLookupClient);
Assertions.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
Assertions
.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
String[] queryStrings = {
"cancer",