[Annotation] extention of bulk tagging to accomodate graph annotation
This commit is contained in:
parent
740cfa77fb
commit
b5130583e5
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.pace.tree;
|
||||
|
||||
import java.util.Map;
|
||||
|
@ -11,37 +12,37 @@ import eu.dnetlib.pace.tree.support.ComparatorClass;
|
|||
@ComparatorClass("countryMatch")
|
||||
public class CountryMatch extends AbstractStringComparator {
|
||||
|
||||
public CountryMatch(Map<String, String> params) {
|
||||
super(params, new com.wcohen.ss.JaroWinkler());
|
||||
}
|
||||
public CountryMatch(Map<String, String> params) {
|
||||
super(params, new com.wcohen.ss.JaroWinkler());
|
||||
}
|
||||
|
||||
public CountryMatch(final double weight) {
|
||||
super(weight, new com.wcohen.ss.JaroWinkler());
|
||||
}
|
||||
public CountryMatch(final double weight) {
|
||||
super(weight, new com.wcohen.ss.JaroWinkler());
|
||||
}
|
||||
|
||||
protected CountryMatch(final double weight, final AbstractStringDistance ssalgo) {
|
||||
super(weight, ssalgo);
|
||||
}
|
||||
protected CountryMatch(final double weight, final AbstractStringDistance ssalgo) {
|
||||
super(weight, ssalgo);
|
||||
}
|
||||
|
||||
@Override
|
||||
public double distance(final String a, final String b, final Config conf) {
|
||||
if (a.isEmpty() || b.isEmpty()) {
|
||||
return -1.0; // return -1 if a field is missing
|
||||
}
|
||||
if (a.equalsIgnoreCase("unknown") || b.equalsIgnoreCase("unknown")) {
|
||||
return -1.0; // return -1 if a country is UNKNOWN
|
||||
}
|
||||
@Override
|
||||
public double distance(final String a, final String b, final Config conf) {
|
||||
if (a.isEmpty() || b.isEmpty()) {
|
||||
return -1.0; // return -1 if a field is missing
|
||||
}
|
||||
if (a.equalsIgnoreCase("unknown") || b.equalsIgnoreCase("unknown")) {
|
||||
return -1.0; // return -1 if a country is UNKNOWN
|
||||
}
|
||||
|
||||
return a.equals(b) ? 1.0 : 0;
|
||||
}
|
||||
return a.equals(b) ? 1.0 : 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getWeight() {
|
||||
return super.weight;
|
||||
}
|
||||
@Override
|
||||
public double getWeight() {
|
||||
return super.weight;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected double normalize(final double d) {
|
||||
return d;
|
||||
}
|
||||
@Override
|
||||
protected double normalize(final double d) {
|
||||
return d;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
sdgPath=/tmp/sdg_20240627_oaid_csv
|
||||
outputPath=/tmp/miriam/sdgnodoi
|
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,100 @@
|
|||
{"affiliationId":{"schema":"RINGGOLD","value":"8244"},"departmentName":"Biology","endDate":"2019-05-19","orcid":"0000-0001-6291-9619","roleTitle":"Undergraduate Research Assistant","startDate":"2017-01-26"}
|
||||
{"affiliationId":{"schema":"GRID","value":"grid.445941.9"},"departmentName":"Department architectural-building constructions","endDate":"","orcid":"0000-0001-6291-9619","roleTitle":"Assistant professor","startDate":"2014-09"}
|
||||
{"affiliationId":{"schema":"ROR","value":"https://ror.org/02jx3x895"},"departmentName":"Learning and Leadership","endDate":"2012-09-01","orcid":"0000-0002-3210-3034","roleTitle":"Leturer"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"445071"},"departmentName":"Fisheries and Aquaculture","endDate":"2006-01-012012-08-23","orcid":"0000-0002-9030-7609","roleTitle":"Technical Officer"}
|
||||
{"affiliationId":{"schema":"ROR","value":"https://ror.org/05a28rw58"},"departmentName":"Institute of Environmental Engineering","endDate":"2023-11-012024-01-31","orcid":"0000-0002-9030-7609","roleTitle":"Visiting Researcher"}
|
||||
{"affiliationId":{"schema":"","value":""},"departmentName":"Faculty of Engineering and Informatics","endDate":"2021-12-20","orcid":"0000-0003-0305-8980","roleTitle":"Lecturer"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"26066"},"departmentName":"Obstetrics and Gynaecology","orcid":"0000-0003-0305-8980","roleTitle":"MD, PhD"}
|
||||
{"affiliationId":{"schema":"ROR","value":"https://ror.org/00ey9xa07"},"departmentName":"Harbin Sport University","endDate":"2024-06-01","orcid":"0000-0003-0305-8980","roleTitle":"Student"}
|
||||
{"affiliationId":{"schema":"","value":""},"departmentName":"KIPP DC Schools","endDate":"2016-01-012017-01-01","orcid":"0009-0004-7554-419X","roleTitle":"Middle School Science Teacher"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"19374"},"departmentName":"Music Therapy","endDate":"","orcid":"0000-0002-5115-9762","roleTitle":"Dementia Program Director","startDate":"2017-10-01"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"9144"},"departmentName":"Interdisciplinary Center for Scientific Computing - IWR","endDate":"2009-01-152012-10-31","orcid":"0000-0002-2004-4153","roleTitle":"PhD student"}
|
||||
{"departmentName":"2nd Air Supply Maintenance Center Command","endDate":"2012-08-30","orcid":"0000-0002-4389-9744","roleTitle":"Production Planning Group Supervisor","startDate":"2008-09-01"}
|
||||
{"affiliationId":{"schema":"","value":""},"departmentName":"MITAKY High-Tech Co., Ltd.","endDate":"2020-07-01","orcid":"0000-0001-7628-743X","roleTitle":"President & CEO"}
|
||||
{"departmentName":"Ancash","endDate":"","orcid":"0000-0002-3861-2833","startDate":""}
|
||||
{"departmentName":"CALDAS","endDate":"2010-06-17","orcid":"0000-0003-1077-4053","roleTitle":"COORDINADOR ESTUDIO DE RADIO Y TV","startDate":"1999-03-17"}
|
||||
{"departmentName":"Institute of Sociogenesis and Social Dynamics","endDate":"","orcid":"0000-0001-6881-7760","startDate":""}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"27004"},"departmentName":"Biology","endDate":"1999-09-02","orcid":"0000-0002-8553-169X","roleTitle":"MCf Microbiology "}
|
||||
{"affiliationId":{"schema":"ROR","value":"https://ror.org/041nas322"},"departmentName":"Bonn Center for Dependency and Slavery Studies","endDate":"2019-01-012023-01-01","orcid":"0009-0002-9250-948X","roleTitle":"Predoctoral Research Associate / wiss. Mitarbeiter"}
|
||||
{"departmentName":"Процессы и аппараты химических и пищевых производств","endDate":"","orcid":"0000-0002-1805-5670","roleTitle":"старший преподаватель","startDate":"2015-09-03"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"95414"},"departmentName":"Department of Science, Technology and International relations","endDate":"2021-03-09","orcid":"0000-0002-7616-2482","roleTitle":"Researcher"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"28730"},"departmentName":"Forensic Medicine & Toxicology","endDate":"2019-08-26","orcid":"0000-0001-7369-1744","roleTitle":"Senior Resident","startDate":"2016-08-27"}
|
||||
{"departmentName":"Department of Functional & Comparative Genomics","endDate":"2015-10","orcid":"0000-0001-8059-8919","roleTitle":"Postdoctoral Research Associate, Fluorescence Chemical Sensors","startDate":"2014-08"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"16771"},"departmentName":"Catedra UNESCO de Gestion y Politica Universitaria","endDate":"2016","orcid":"0000-0001-9437-6700","roleTitle":"Investigador","startDate":"2001"}
|
||||
{"affiliationId":{"schema":"ROR","value":"https://ror.org/05v3pg621"},"departmentName":"Department of Multimedia Animation and Application","endDate":"","orcid":"0000-0003-2513-7065","roleTitle":"Professor","startDate":"2012-02-01"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"246714"},"departmentName":"Delta Dental of Wisconsin","endDate":"","orcid":"0000-0003-2675-3206","roleTitle":"VP & Science Officer","startDate":"2006"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"146895"},"departmentName":"Facultad de Ciencias Económicas, administrativas y Contables ","endDate":"2015-02-01","orcid":"0000-0001-9384-6395","roleTitle":"Coordinadora de Investigaciones-Facultad CEAC"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"8367"},"departmentName":"Microbiology, Immunology and Tropical Medicine","endDate":"2019-10-01","orcid":"0000-0002-2349-263X","roleTitle":"Assistant Professor"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"129705"},"departmentName":"Institut de Recherche en Informatique Fondamentale","orcid":"0000-0003-0287-6252","roleTitle":""}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"119726"},"departmentName":"Computer Engineering/MIS","endDate":"2018-09-15","orcid":"0000-0003-0014-5106","roleTitle":"Assoc.Prof.Dr."}
|
||||
{"affiliationId":{"schema":"ROR","value":"https://ror.org/00wbwde85"},"departmentName":"school","endDate":"2021-03-01","orcid":"0009-0007-7585-0594","roleTitle":"mahasiswa"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"9171"},"departmentName":"Computer Science Dept.","endDate":"","orcid":"0000-0002-3202-2904","startDate":"2002-04-01"}
|
||||
{"affiliationId":{"schema":"GRID","value":"grid.474837.b"},"departmentName":"Gastroenterology","endDate":"","orcid":"0000-0003-0705-5760","startDate":""}
|
||||
{"affiliationId":{"schema":"","value":""},"departmentName":"School of Physical and Occupational Therapy","endDate":"2018-03-012021-12-31","orcid":"0000-0002-8406-5228","roleTitle":"Postdoctoral Fellow"}
|
||||
{"affiliationId":{"schema":"","value":""},"departmentName":"Xinqiao Hospital","orcid":"0000-0001-6200-2309","roleTitle":""}
|
||||
{"affiliationId":{"schema":"","value":""},"departmentName":"DIPARTIMENTO DI PATOLOGIA CHIRURGICA, MEDICA, MOLECOLARE E DELL'AREA CRITICA","endDate":"2018-10-012021-10-01","orcid":"0000-0002-5588-2608","roleTitle":"Ricercatori a tempo determinato"}
|
||||
{"affiliationId":{"schema":"","value":""},"departmentName":"ECE","endDate":"2021-01-012022-02-01","orcid":"0000-0002-8729-0287","roleTitle":"Visiting Scholar"}
|
||||
{"departmentName":"Mantenimiento","endDate":"1998-01-01","orcid":"0000-0002-8663-2716","roleTitle":"Project coordinator","startDate":"1994-01-01"}
|
||||
{"affiliationId":{"schema":"ROR","value":"https://ror.org/012wtwr40"},"departmentName":"Centro Universitário Newton Paiva","endDate":"2012-07","orcid":"0000-0002-1725-1805","roleTitle":"Professor Assistente","startDate":"2005-09"}
|
||||
{"departmentName":"DFC","endDate":"","orcid":"0000-0003-3764-9500","roleTitle":"Professora Substituta","startDate":"2018-02-01"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"10107"},"departmentName":"Automated Lab - Women's & Children's Hospital site","endDate":"1995-09-052018-09-09","orcid":"0000-0002-5594-9737","roleTitle":"Medical Scientist"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"33784"},"departmentName":"Computer Science","endDate":"2023-06-04","orcid":"0009-0000-6585-6246","roleTitle":"Visiting Assistant Professor","startDate":"2023-01-09"}
|
||||
{"affiliationId":{"schema":"ROR","value":"https://ror.org/0280a3n32"},"departmentName":"Research","endDate":"2022-06","orcid":"0000-0002-0846-9503","roleTitle":"Research Assistant","startDate":"2019-06"}
|
||||
{"affiliationId":{"schema":"GRID","value":"grid.8657.c"},"departmentName":"Finnish Meteorological Institute","endDate":"2019-06-01","orcid":"0000-0002-4826-2929","roleTitle":""}
|
||||
{"affiliationId":{"schema":"ROR","value":"https://ror.org/05290cv24"},"departmentName":"Dipartimento di Informatica e Tecnologie dell'Informazione","endDate":"2022-11-012025-11-01","orcid":"0009-0000-6476-8092","roleTitle":"PhD Student"}
|
||||
{"affiliationId":{"schema":"GRID","value":"grid.495082.2"},"departmentName":"Laboratory of water bodies sanitaric microbiology and human microbial ecology","endDate":"2017-01-01","orcid":"0000-0003-1194-7251","roleTitle":"Senior reseacher "}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"150713"},"departmentName":"Education","endDate":"2013-09-01","orcid":"0000-0002-2489-1202","roleTitle":"Doctor of Education"}
|
||||
{"departmentName":"Office of Risk Management","endDate":"","orcid":"0000-0003-2772-313X","roleTitle":"Senior Policy Advisor","startDate":"2014-09-04"}
|
||||
{"affiliationId":{"schema":"ROR","value":"https://ror.org/04pe1sa24"},"departmentName":"Facultad de Estudios Globales y Hospitalidad","endDate":"","orcid":"0009-0003-4270-4196","roleTitle":"Docente en las Carreras de Licenciatura en Turismo y Relaciones Internacionales","startDate":"2023-06-12"}
|
||||
{"affiliationId":{"schema":"","value":""},"departmentName":"Civil & Mechanical Engineering","endDate":"2014-06-012020-06-30","orcid":"0000-0001-6598-2525","roleTitle":"Assistant Professor"}
|
||||
{"affiliationId":{"schema":"ROR","value":"https://ror.org/02ytfzr55"},"departmentName":"Department of Civil Engineering ","endDate":"2022-03-212024-02-27","orcid":"0000-0002-9572-1358","roleTitle":"Temporary Faculty"}
|
||||
{"affiliationId":{"schema":"ROR","value":"https://ror.org/04wdt0z89"},"departmentName":"library","endDate":"2024-03-01","orcid":"0009-0002-8124-1772","roleTitle":"library it"}
|
||||
{"departmentName":"Kalil e Pires Advogados","endDate":"","orcid":"0009-0001-3403-0297","roleTitle":"Estagiário","startDate":"2023-03-13"}
|
||||
{"affiliationId":{"schema":"","value":""},"departmentName":"Coronel Institute of Occupational Health","endDate":"2019-01-01","orcid":"0000-0002-0461-4013","roleTitle":"Principal Investigator"}
|
||||
{"affiliationId":{"schema":"GRID","value":"grid.5801.c"},"departmentName":"Health Sciences and Technology","orcid":"0000-0002-1651-0457","roleTitle":"Doctoral student"}
|
||||
{"departmentName":"Prefeitura de Teresina","endDate":"","orcid":"0000-0002-8148-4179","startDate":""}
|
||||
{"affiliationId":{"schema":"ROR","value":"https://ror.org/00k8rrx20"},"departmentName":"Prosseguir","endDate":"2023-05-012024-05-01","orcid":"0000-0001-5147-3455","roleTitle":"Coordenadora Pedagógica Regional do Prosseguir em Manaus"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"381864"},"departmentName":"Pharmaron Beijing Co Ltd","endDate":"","orcid":"0000-0003-2165-740X","startDate":"2015-10-27"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"183390"},"departmentName":"Instituto Tecnológico Superior de Irapuato","endDate":"","orcid":"0000-0003-2101-5917","startDate":"2018-11-01"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"384754"},"departmentName":"SynCat@Beijing","orcid":"0000-0002-1050-2165","roleTitle":"Vice Director"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"282795"},"departmentName":"Setor de Coleções Científicas","endDate":"","orcid":"0000-0003-3755-0025","roleTitle":"Estagiária","startDate":"2019-11"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"434589"},"departmentName":"Chemistry","endDate":"2015-05-25","orcid":"0000-0001-5861-4425","roleTitle":"Lecturer"}
|
||||
{"affiliationId":{"schema":"","value":""},"departmentName":"Cajamarca","endDate":"2023-11-15","orcid":"0000-0003-1524-3315","roleTitle":"Asistente Administrativo Provincial"}
|
||||
{"affiliationId":{"schema":"","value":""},"departmentName":"Seduc ma","endDate":"2012-01-24","orcid":"0000-0003-3142-356X","roleTitle":""}
|
||||
{"departmentName":"Rede Particular de Ensino","endDate":"2021","orcid":"0000-0002-4771-2131","roleTitle":"Professora de Artes Cênicas","startDate":"2018"}
|
||||
{"affiliationId":{"schema":"FUNDREF","value":"http://dx.doi.org/10.13039/100009042"},"departmentName":"Derecho del Trabajo y de la Seguridad Social","endDate":"2002-12-01","orcid":"0000-0002-1275-5289","roleTitle":"Catedrático de Derecho del Trabajo y de la Seguridad Social"}
|
||||
{"departmentName":"Department of Ethology","endDate":"2011-01-02","orcid":"0000-0003-1436-7324","roleTitle":"Research Assistant","startDate":"2007-09-01"}
|
||||
{"affiliationId":{"schema":"ROR","value":"https://ror.org/04ka8rx28"},"departmentName":"Mechanical Engineering","orcid":"0009-0006-6397-2183","roleTitle":""}
|
||||
{"affiliationId":{"schema":"ROR","value":"https://ror.org/002qhr126"},"departmentName":"Theatre","orcid":"0009-0001-6531-9624","roleTitle":"theacher"}
|
||||
{"affiliationId":{"schema":"","value":""},"departmentName":"Institute of Molecular Medicine, Renji Hospital, School of Medicine.","endDate":"2021-01-27","orcid":"0000-0003-0399-1201","roleTitle":"Associate Professor"}
|
||||
{"affiliationId":{"schema":"","value":""},"departmentName":"Cell Biology","endDate":"2013-11-012014-07-01","orcid":"0000-0003-1489-4757","roleTitle":"Research Scolarship for Undergraduate Students"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"346985"},"departmentName":"Maternidade","endDate":"","orcid":"0000-0002-6985-9679","roleTitle":"Enfermeira/UTI neonatal","startDate":"2019-02-26"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"41726"},"departmentName":"Area Team - Biodiversity","endDate":"2006-01-012008-01-01","orcid":"0000-0002-6553-3786","roleTitle":"Catchment Biodiversity Technical Officer"}
|
||||
{"affiliationId":{"schema":"ROR","value":"https://ror.org/04z7qrj66"},"departmentName":"Merchant Marine College","orcid":"0009-0003-6812-3576","roleTitle":""}
|
||||
{"affiliationId":{"schema":"","value":""},"departmentName":"Lima","endDate":"2022-03-25","orcid":"0000-0002-9262-5619","roleTitle":"Docente Universitario"}
|
||||
{"affiliationId":{"schema":"","value":""},"departmentName":"University of Bristol","orcid":"0000-0002-9793-3485","roleTitle":""}
|
||||
{"departmentName":"US Geological Survey, Ecosystems Mission Area, Cooperative Fish and Wildlife Research Units Program","endDate":"","orcid":"0000-0002-8638-6682","startDate":"2011-06-01"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"28666"},"departmentName":"English","endDate":"","orcid":"0000-0001-5361-109X","roleTitle":"Assistant Professor","startDate":"2019-07-05"}
|
||||
{"affiliationId":{"schema":"","value":""},"departmentName":"Centre for Earth System Science","endDate":"2010-03-012012-11-01","orcid":"0000-0001-5323-4431","roleTitle":"Researcher and Executive Officer"}
|
||||
{"affiliationId":{"schema":"ROR","value":"https://ror.org/05bjd0w70"},"departmentName":"Education","endDate":"2013-08-15","orcid":"0000-0001-5960-0586","roleTitle":"Associate Professor and Chair, Department of Education","startDate":"2002-08-15"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"632513"},"departmentName":"Board ","endDate":"2020-01-01","orcid":"0000-0002-4222-4518","roleTitle":"Boardmember"}
|
||||
{"affiliationId":{"schema":"GRID","value":"grid.22657.34"},"departmentName":"Faculty of Food Technology","endDate":"2017-01-01","orcid":"0000-0003-2606-8380","roleTitle":"guest scientific assistant, guest researcher, guest lecturer"}
|
||||
{"affiliationId":{"schema":"FUNDREF","value":"http://dx.doi.org/10.13039/501100008331"},"departmentName":"Radiology","endDate":"","orcid":"0000-0001-6249-450X","startDate":"2016-03-01"}
|
||||
{"departmentName":"кафедра физики","endDate":"","orcid":"0000-0001-6786-838X","roleTitle":"доцент","startDate":"1981-11-28"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"6429"},"departmentName":"Molecular and Cellular Physiology","endDate":"2010-01-012013-01-01","orcid":"0000-0002-5538-0464","roleTitle":"Research-Associate"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"48455"},"departmentName":"Clinical Biochem","endDate":"2016-12-30","orcid":"0000-0002-9563-8044","roleTitle":"Associate Professor","startDate":"2008"}
|
||||
{"affiliationId":{"schema":"ROR","value":"https://ror.org/01xf75524"},"departmentName":"Molecular Oncology","endDate":"2022-01-01","orcid":"0000-0003-0928-003X","roleTitle":""}
|
||||
{"affiliationId":{"schema":"ROR","value":"https://ror.org/022kthw22"},"departmentName":"Anesthesiology and Perioperative Medicine","endDate":"2022-07-01","orcid":"0000-0001-7410-7271","roleTitle":"Postdoctoral Researcher"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"125792"},"departmentName":"Environmental Management and Toxicology","endDate":"2006-06-20","orcid":"0000-0001-7855-4183","roleTitle":"Instructional/Tutorial Facilitator"}
|
||||
{"departmentName":"2004 – 2007 | Teacher of Fiqh and Usul-al-Fiqh | Islamic University | Moscow, Russia","endDate":"","orcid":"0000-0001-8386-4426","startDate":""}
|
||||
{"affiliationId":{"schema":"ROR","value":"https://ror.org/0190ak572"},"departmentName":"Biology","endDate":"2024-06-01","orcid":"0009-0001-6766-7876","roleTitle":"Research assistant"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"16763"},"departmentName":"Education","orcid":"0000-0003-2355-4682","roleTitle":"Profesora titular de Universidad/ Senior Lecturer "}
|
||||
{"affiliationId":{"schema":"ROR","value":"https://ror.org/0406jsq08"},"departmentName":"Farmácia","endDate":"2023-03-01","orcid":"0009-0009-1661-5046","roleTitle":"Residente farmacêutico"}
|
||||
{"affiliationId":{"schema":"ROR","value":"https://ror.org/02aqsxs83"},"departmentName":"School of Biological Sciences","endDate":"2021-08-16","orcid":"0000-0002-1696-1952","roleTitle":"Assistant Professor"}
|
||||
{"affiliationId":{"schema":"ROR","value":"https://ror.org/00s582s04"},"departmentName":"cajamarca","endDate":"2024-01-01","orcid":"0009-0001-0970-2741","roleTitle":"BACHILLER"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"186027"},"departmentName":"РЯиК","endDate":"","orcid":"0000-0002-1000-5441","roleTitle":"старший преподаватель","startDate":"2007-09-01"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"47910"},"departmentName":"Faculty of Life Science and Technology","endDate":"2012-07-082015-10-01","orcid":"0000-0001-7533-998X","roleTitle":"Lecturer"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"2234"},"departmentName":"Education","endDate":"","orcid":"0000-0001-6123-8483","roleTitle":"Assistant Professor","startDate":"2019-01-07"}
|
||||
{"affiliationId":{"schema":"ROR","value":"https://ror.org/04qkymg17"},"departmentName":"genera surgical ward","endDate":"2013-10-012015-02-01","orcid":"0009-0009-7638-0453","roleTitle":"Registered Nurse"}
|
||||
{"affiliationId":{"schema":"RINGGOLD","value":"575342"},"departmentName":"GBUZ Naucno-prakticeskij psihonevrologiceskij centr imeni Z P Solov'eva Departamenta zdravoohranenia goroda Moskvy","endDate":"2022-11-01","orcid":"0000-0002-0344-9765","roleTitle":""}
|
||||
{"affiliationId":{"schema":"","value":""},"departmentName":"SERVICE DE CHIRURGIE ORTHOPÉDIQUE ET TRAUMATOLOGIE","endDate":"2024-02-01","orcid":"0009-0003-2119-1766","roleTitle":"CHIRURGIEN ORTHOPEDISTE"}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1 @@
|
|||
{"doi":"https://doi.org/10.1007/978-3-030-75768-7","OpenAlexId":"https://openalex.org/W2124362779"}
|
|
@ -8,7 +8,6 @@ import java.nio.charset.StandardCharsets;
|
|||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
|
@ -23,7 +22,6 @@ import org.apache.spark.sql.SparkSession;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.gson.Gson;
|
||||
|
||||
|
@ -32,6 +30,8 @@ import eu.dnetlib.dhp.api.model.CommunityEntityMap;
|
|||
import eu.dnetlib.dhp.api.model.EntityCommunities;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.bulktag.community.*;
|
||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.Context;
|
||||
|
@ -93,9 +93,10 @@ public class SparkBulkTagJob {
|
|||
ProtoMap protoMap = new Gson().fromJson(temp, ProtoMap.class);
|
||||
log.info("pathMap: {}", new Gson().toJson(protoMap));
|
||||
|
||||
SelectionConstraints taggingConstraints = new Gson()
|
||||
.fromJson(parser.get("taggingCriteria"), SelectionConstraints.class);
|
||||
taggingConstraints.setSelection(VerbResolverFactory.newInstance());
|
||||
TaggingConstraints taggingConstraints = new Gson()
|
||||
.fromJson(parser.get("taggingCriteria"), TaggingConstraints.class);
|
||||
|
||||
taggingConstraints.getTags().forEach(t -> t.setSelection(VerbResolverFactory.newInstance()));
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
CommunityConfiguration cc;
|
||||
|
@ -277,13 +278,8 @@ public class SparkBulkTagJob {
|
|||
String outputPath,
|
||||
ProtoMap protoMappingParams,
|
||||
CommunityConfiguration communityConfiguration,
|
||||
SelectionConstraints taggingConstraints) {
|
||||
TaggingConstraints taggingConstraints) {
|
||||
|
||||
try {
|
||||
System.out.println(new ObjectMapper().writeValueAsString(protoMappingParams));
|
||||
} catch (JsonProcessingException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
ModelSupport.entityTypes
|
||||
.keySet()
|
||||
.parallelStream()
|
||||
|
@ -295,30 +291,22 @@ public class SparkBulkTagJob {
|
|||
readPath(spark, inputPath + e.name(), resultClazz)
|
||||
.map(patchResult(), Encoders.bean(resultClazz))
|
||||
.filter(Objects::nonNull)
|
||||
.map((MapFunction<R, Tagging>) value -> resultTagger
|
||||
.map(
|
||||
(MapFunction<R, R>) value -> resultTagger
|
||||
.enrichContextCriteria(
|
||||
value, communityConfiguration, protoMappingParams, taggingConstraints),
|
||||
Encoders.bean(Tagging.class))
|
||||
Encoders.bean(resultClazz))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath + e.name());// writing the tagging in the working dir for entity
|
||||
|
||||
readPath(spark, outputPath + e.name(), Tagging.class)
|
||||
.map((MapFunction<Tagging, R>) t -> (R) t.getResult(), Encoders.bean(resultClazz) )// copy the tagging in the actual result output path
|
||||
readPath(spark, outputPath + e.name(), resultClazz)
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(inputPath + e.name());
|
||||
|
||||
readPath(spark, outputPath + e.name(), Tagging.class)
|
||||
.map((MapFunction<Tagging, String>) t -> t.getTag(), Encoders.STRING() )// copy the tagging in the actual result output path
|
||||
.filter(Objects::nonNull)
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json("/user/miriam.baglioni/graphTagging/" + e.name());
|
||||
|
||||
});
|
||||
|
||||
}
|
||||
|
|
|
@ -1,32 +1,34 @@
|
|||
|
||||
package eu.dnetlib.dhp.bulktag;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class Tagging <R extends Result> implements Serializable {
|
||||
private String tag;
|
||||
private R result;
|
||||
public class Tagging<R extends Result> implements Serializable {
|
||||
private String tag;
|
||||
private R result;
|
||||
|
||||
public String getTag() {
|
||||
return tag;
|
||||
}
|
||||
public String getTag() {
|
||||
return tag;
|
||||
}
|
||||
|
||||
public void setTag(String tag) {
|
||||
this.tag = tag;
|
||||
}
|
||||
public void setTag(String tag) {
|
||||
this.tag = tag;
|
||||
}
|
||||
|
||||
public R getResult() {
|
||||
return result;
|
||||
}
|
||||
public R getResult() {
|
||||
return result;
|
||||
}
|
||||
|
||||
public void setResult(R result) {
|
||||
this.result = result;
|
||||
}
|
||||
public void setResult(R result) {
|
||||
this.result = result;
|
||||
}
|
||||
|
||||
public static <R extends Result> Tagging newInstance(R result, String tag){
|
||||
Tagging t = new Tagging<>();
|
||||
t.result = result;
|
||||
t.tag = tag;
|
||||
return t;
|
||||
}
|
||||
public static <R extends Result> Tagging newInstance(R result, String tag) {
|
||||
Tagging t = new Tagging<>();
|
||||
t.result = result;
|
||||
t.tag = tag;
|
||||
return t;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,8 +10,6 @@ import java.lang.reflect.Method;
|
|||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
|
||||
import eu.dnetlib.dhp.bulktag.Tagging;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -21,6 +19,7 @@ import com.jayway.jsonpath.DocumentContext;
|
|||
import com.jayway.jsonpath.JsonPath;
|
||||
import com.jayway.jsonpath.PathNotFoundException;
|
||||
|
||||
import eu.dnetlib.dhp.bulktag.Tagging;
|
||||
import eu.dnetlib.dhp.bulktag.actions.MapModel;
|
||||
import eu.dnetlib.dhp.bulktag.actions.Parameters;
|
||||
import eu.dnetlib.dhp.bulktag.eosc.EoscIFTag;
|
||||
|
@ -93,18 +92,18 @@ public class ResultTagger implements Serializable {
|
|||
|
||||
}
|
||||
|
||||
public <R extends Result> Tagging enrichContextCriteria(
|
||||
final R result, final CommunityConfiguration conf, final Map<String, MapModel> criteria, SelectionConstraints taggingConstraints)
|
||||
public <R extends Result> R enrichContextCriteria(
|
||||
final R result, final CommunityConfiguration conf, final Map<String, MapModel> criteria,
|
||||
TaggingConstraints taggingConstraints)
|
||||
throws InvocationTargetException, NoSuchMethodException {
|
||||
|
||||
// Verify if the entity is deletedbyinference. In case verify if to clean the context list
|
||||
// from all the zenodo communities
|
||||
if (result.getDataInfo().getDeletedbyinference()) {
|
||||
clearContext(result);
|
||||
return Tagging.newInstance(result, null);
|
||||
return result;
|
||||
}
|
||||
|
||||
String retString = null;
|
||||
final Map<String, List<String>> param = getParamMap(result, criteria);
|
||||
|
||||
// Execute the EOSCTag for the services
|
||||
|
@ -123,8 +122,11 @@ public class ResultTagger implements Serializable {
|
|||
}
|
||||
|
||||
//adding code for tagging of results searching supplementaryMaterial
|
||||
if(taggingConstraints.getCriteria().stream().anyMatch(crit -> crit.verifyCriteria(param)))
|
||||
retString = "supplementary";
|
||||
final Set<String> tags = new HashSet<>();
|
||||
taggingConstraints.getTags().forEach(t -> {
|
||||
if (t.getCriteria().stream().anyMatch(crit -> crit.verifyCriteria(param)))
|
||||
tags.add(t.getTagId());
|
||||
});
|
||||
|
||||
// communities contains all the communities to be not added to the context
|
||||
final Set<String> removeCommunities = new HashSet<>();
|
||||
|
@ -253,10 +255,26 @@ public class ResultTagger implements Serializable {
|
|||
clearContext(result);
|
||||
|
||||
/* Verify if there is something to bulktag */
|
||||
if (communities.isEmpty()) {
|
||||
return Tagging.newInstance(result, retString);
|
||||
if (communities.isEmpty() && tags.isEmpty()) {
|
||||
return result;
|
||||
}
|
||||
|
||||
tags.forEach(t -> {
|
||||
Context con = new Context();
|
||||
con.setId(t);
|
||||
List<DataInfo> dataInfoList = Arrays
|
||||
.asList(
|
||||
OafMapperUtils
|
||||
.dataInfo(
|
||||
false, ANNOTATION_DATA_INFO_TYPE, true, false,
|
||||
OafMapperUtils
|
||||
.qualifier(
|
||||
CLASS_ID_ANNOTATION, CLASS_NAME_ANNOTATION, DNET_PROVENANCE_ACTIONS,
|
||||
DNET_PROVENANCE_ACTIONS),
|
||||
TAGGING_TRUST));
|
||||
result.getContext().add(con);
|
||||
});
|
||||
|
||||
result.getContext().forEach(c -> {
|
||||
final String cId = c.getId();
|
||||
if (communities.contains(cId)) {
|
||||
|
@ -321,7 +339,7 @@ public class ResultTagger implements Serializable {
|
|||
result.getContext().stream().map(Context::getId).collect(Collectors.toSet()));
|
||||
|
||||
if (communities.isEmpty())
|
||||
return Tagging.newInstance(result, retString);
|
||||
return result;
|
||||
|
||||
List<Context> toaddcontext = communities
|
||||
.stream()
|
||||
|
@ -381,7 +399,7 @@ public class ResultTagger implements Serializable {
|
|||
.collect(Collectors.toList());
|
||||
|
||||
result.getContext().addAll(toaddcontext);
|
||||
return Tagging.newInstance(result, retString);
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -8,6 +8,10 @@ public class TaggingConstants {
|
|||
|
||||
public static final String BULKTAG_DATA_INFO_TYPE = "bulktagging";
|
||||
|
||||
public static final String ANNOTATION_DATA_INFO_TYPE = "annotation";
|
||||
public static final String CLASS_ID_ANNOTATION = "graph:annotation";
|
||||
public static final String CLASS_NAME_ANNOTATION = "Graph Annotation";
|
||||
|
||||
public static final String CLASS_ID_SUBJECT = "community:subject";
|
||||
public static final String CLASS_ID_DATASOURCE = "community:datasource";
|
||||
public static final String CLASS_ID_CZENODO = "community:zenodocommunity";
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
|
||||
package eu.dnetlib.dhp.bulktag.community;
|
||||
|
||||
public class TaggingConstraint extends SelectionConstraints {
|
||||
private String tagId;
|
||||
|
||||
public String getTagId() {
|
||||
return tagId;
|
||||
}
|
||||
|
||||
public void setTagId(String tagId) {
|
||||
this.tagId = tagId;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
|
||||
package eu.dnetlib.dhp.bulktag.community;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class TaggingConstraints {
|
||||
private List<TaggingConstraint> tags;
|
||||
|
||||
public List<TaggingConstraint> getTags() {
|
||||
return tags;
|
||||
}
|
||||
|
||||
public void setTags(List<TaggingConstraint> tags) {
|
||||
this.tags = tags;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,4 @@
|
|||
sourcePath=/tmp/miriam/12_graph_copy
|
||||
pathMap=/data/bulktagging/pathMap
|
||||
baseURL=https://services.openaire.eu/openaire/community/
|
||||
taggingCriteria={"tags":[{"id":"SM","criteria":[{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary material for"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary document for"},{"verb":"starts_with_caseinsensitive","field":"title","value":"figure"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary figure"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplemental figure"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary table"},{"verb":"starts_with_caseinsensitive","field":"title","value":"table for"}]}]}]}
|
|
@ -122,9 +122,9 @@ public class BulkTagJobTest {
|
|||
"-taggingConf", taggingConf,
|
||||
"-outputPath", workingDir.toString() + "/",
|
||||
"-pathMap", pathMap,
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
@ -162,9 +162,9 @@ public class BulkTagJobTest {
|
|||
"-taggingConf", taggingConf,
|
||||
"-outputPath", workingDir.toString() + "/",
|
||||
"-pathMap", pathMap,
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
@ -271,8 +271,8 @@ public class BulkTagJobTest {
|
|||
|
||||
"-pathMap", workingDir.toString() + "/data/bulktagging/protoMap",
|
||||
"-nameNode", "local",
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/"
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
|
@ -354,9 +354,9 @@ public class BulkTagJobTest {
|
|||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap,
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
@ -438,7 +438,7 @@ public class BulkTagJobTest {
|
|||
|
||||
"-pathMap", workingDir.toString() + "/data/bulktagging/protoMap/pathMap",
|
||||
"-nameNode", "local",
|
||||
"-taggingCriteria", taggingCriteria
|
||||
"-taggingCriteria", taggingCriteria
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
@ -496,9 +496,9 @@ public class BulkTagJobTest {
|
|||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap,
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
@ -620,9 +620,9 @@ public class BulkTagJobTest {
|
|||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap,
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
@ -751,9 +751,9 @@ public class BulkTagJobTest {
|
|||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap,
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
@ -854,9 +854,9 @@ public class BulkTagJobTest {
|
|||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap,
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
@ -900,9 +900,9 @@ public class BulkTagJobTest {
|
|||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap,
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
@ -957,9 +957,9 @@ public class BulkTagJobTest {
|
|||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap,
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
@ -1009,9 +1009,9 @@ public class BulkTagJobTest {
|
|||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap,
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
@ -1231,9 +1231,9 @@ public class BulkTagJobTest {
|
|||
|
||||
"-pathMap", pathMap,
|
||||
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
@ -1352,9 +1352,9 @@ public class BulkTagJobTest {
|
|||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap,
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
@ -1475,9 +1475,9 @@ public class BulkTagJobTest {
|
|||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap,
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
@ -1518,9 +1518,9 @@ public class BulkTagJobTest {
|
|||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap,
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
@ -1566,9 +1566,9 @@ public class BulkTagJobTest {
|
|||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap,
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
@ -1606,9 +1606,9 @@ public class BulkTagJobTest {
|
|||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap,
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
@ -1684,9 +1684,9 @@ public class BulkTagJobTest {
|
|||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", pathMap,
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
});
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
|
@ -1725,9 +1725,9 @@ public class BulkTagJobTest {
|
|||
// "-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-pathMap", pathMap,
|
||||
"-taggingConf", taggingConf,
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
@ -1769,9 +1769,9 @@ public class BulkTagJobTest {
|
|||
"/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf_publicationdate.xml")),
|
||||
"-outputPath", workingDir.toString() + "/",
|
||||
"-pathMap", pathMap,
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
@ -1874,9 +1874,9 @@ public class BulkTagJobTest {
|
|||
"-outputPath", workingDir.toString() + "/",
|
||||
|
||||
"-pathMap", workingDir.toString() + "/data/bulktagging/protoMap/pathMap",
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
"-taggingCriteria", taggingCriteria,
|
||||
"-baseURL", "https://services.openaire.eu/openaire/community/",
|
||||
"-nameNode", "local"
|
||||
});
|
||||
|
||||
}
|
||||
|
|
|
@ -5,7 +5,6 @@ import java.io.StringReader;
|
|||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.dhp.schema.solr.ExternalReference;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentException;
|
||||
|
@ -31,6 +30,7 @@ import eu.dnetlib.dhp.schema.solr.Context;
|
|||
import eu.dnetlib.dhp.schema.solr.Country;
|
||||
import eu.dnetlib.dhp.schema.solr.Datasource;
|
||||
import eu.dnetlib.dhp.schema.solr.EoscIfGuidelines;
|
||||
import eu.dnetlib.dhp.schema.solr.ExternalReference;
|
||||
import eu.dnetlib.dhp.schema.solr.Instance;
|
||||
import eu.dnetlib.dhp.schema.solr.Journal;
|
||||
import eu.dnetlib.dhp.schema.solr.Measure;
|
||||
|
@ -562,10 +562,16 @@ public class ProvisionModelSupport {
|
|||
.orElse(null);
|
||||
}
|
||||
|
||||
private static List<ExternalReference> mapExternalReference(List<eu.dnetlib.dhp.schema.oaf.ExternalReference> externalReference) {
|
||||
return Optional.ofNullable(externalReference)
|
||||
.map(ext -> ext.stream()
|
||||
.map(e -> ExternalReference.newInstance(
|
||||
private static List<ExternalReference> mapExternalReference(
|
||||
List<eu.dnetlib.dhp.schema.oaf.ExternalReference> externalReference) {
|
||||
return Optional
|
||||
.ofNullable(externalReference)
|
||||
.map(
|
||||
ext -> ext
|
||||
.stream()
|
||||
.map(
|
||||
e -> ExternalReference
|
||||
.newInstance(
|
||||
e.getSitename(),
|
||||
e.getLabel(),
|
||||
e.getAlternateLabel(),
|
||||
|
@ -573,8 +579,8 @@ public class ProvisionModelSupport {
|
|||
mapCodeLabel(e.getQualifier()),
|
||||
e.getRefidentifier(),
|
||||
e.getQuery()))
|
||||
.collect(Collectors.toList()))
|
||||
.orElse(Lists.newArrayList());
|
||||
.collect(Collectors.toList()))
|
||||
.orElse(Lists.newArrayList());
|
||||
}
|
||||
|
||||
private static List<Context> asContext(List<eu.dnetlib.dhp.schema.oaf.Context> ctxList,
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.provision;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
|
@ -32,14 +33,13 @@ import org.junit.jupiter.api.io.TempDir;
|
|||
import org.mockito.Mock;
|
||||
import org.mockito.Mockito;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
|
||||
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
|
||||
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
public class SolrConfigExploreTest {
|
||||
|
@ -91,7 +91,7 @@ public class SolrConfigExploreTest {
|
|||
SparkConf conf = new SparkConf();
|
||||
conf.setAppName(XmlIndexingJobTest.class.getSimpleName());
|
||||
conf.registerKryoClasses(new Class[] {
|
||||
SerializableSolrInputDocument.class
|
||||
SerializableSolrInputDocument.class
|
||||
});
|
||||
|
||||
conf.setMaster("local[1]");
|
||||
|
@ -101,10 +101,10 @@ public class SolrConfigExploreTest {
|
|||
conf.set("spark.sql.warehouse.dir", workingDir.resolve("spark").toString());
|
||||
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(SolrConfigExploreTest.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
.builder()
|
||||
.appName(SolrConfigExploreTest.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
|
||||
// random unassigned HTTP port
|
||||
final int jettyPort = 0;
|
||||
|
@ -134,35 +134,35 @@ public class SolrConfigExploreTest {
|
|||
|
||||
log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString());
|
||||
log
|
||||
.info(
|
||||
CollectionAdminRequest.ClusterStatus
|
||||
.getClusterStatus()
|
||||
.process(miniCluster.getSolrClient())
|
||||
.toString());
|
||||
.info(
|
||||
CollectionAdminRequest.ClusterStatus
|
||||
.getClusterStatus()
|
||||
.process(miniCluster.getSolrClient())
|
||||
.toString());
|
||||
|
||||
NamedList<Object> res = createCollection(
|
||||
miniCluster.getSolrClient(), SHADOW_COLLECTION, 4, 2, 20, CONFIG_NAME);
|
||||
miniCluster.getSolrClient(), SHADOW_COLLECTION, 4, 2, 20, CONFIG_NAME);
|
||||
res.forEach(o -> log.info(o.toString()));
|
||||
|
||||
// miniCluster.getSolrClient().setDefaultCollection(SHADOW_COLLECTION);
|
||||
|
||||
res = createCollection(
|
||||
miniCluster.getSolrClient(), PUBLIC_COLLECTION, 4, 2, 20, CONFIG_NAME);
|
||||
miniCluster.getSolrClient(), PUBLIC_COLLECTION, 4, 2, 20, CONFIG_NAME);
|
||||
res.forEach(o -> log.info(o.toString()));
|
||||
|
||||
admin = new SolrAdminApplication(miniCluster.getZkClient().getZkServerAddress());
|
||||
CollectionAdminResponse rsp = (CollectionAdminResponse) admin
|
||||
.createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, PUBLIC_COLLECTION);
|
||||
.createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, PUBLIC_COLLECTION);
|
||||
assertEquals(0, rsp.getStatus());
|
||||
rsp = (CollectionAdminResponse) admin.createAlias(ProvisionConstants.SHADOW_ALIAS_NAME, SHADOW_COLLECTION);
|
||||
assertEquals(0, rsp.getStatus());
|
||||
|
||||
log
|
||||
.info(
|
||||
CollectionAdminRequest.ClusterStatus
|
||||
.getClusterStatus()
|
||||
.process(miniCluster.getSolrClient())
|
||||
.toString());
|
||||
.info(
|
||||
CollectionAdminRequest.ClusterStatus
|
||||
.getClusterStatus()
|
||||
.process(miniCluster.getSolrClient())
|
||||
.toString());
|
||||
|
||||
}
|
||||
|
||||
|
@ -180,7 +180,8 @@ public class SolrConfigExploreTest {
|
|||
|
||||
new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
|
||||
.run(isLookupClient);
|
||||
Assertions.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
|
||||
Assertions
|
||||
.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
|
||||
|
||||
String[] queryStrings = {
|
||||
"cancer",
|
||||
|
@ -200,7 +201,8 @@ public class SolrConfigExploreTest {
|
|||
// System.out.println(rsp.getExplainMap());
|
||||
|
||||
for (SolrDocument doc : rsp.getResults()) {
|
||||
log.info(
|
||||
log
|
||||
.info(
|
||||
doc.get("score") + "\t" +
|
||||
doc.get("__indexrecordidentifier") + "\t" +
|
||||
doc.get("resultidentifier") + "\t" +
|
||||
|
@ -216,7 +218,7 @@ public class SolrConfigExploreTest {
|
|||
}
|
||||
|
||||
protected static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards,
|
||||
int replicationFactor, int maxShardsPerNode, String configName) throws Exception {
|
||||
int replicationFactor, int maxShardsPerNode, String configName) throws Exception {
|
||||
ModifiableSolrParams modParams = new ModifiableSolrParams();
|
||||
modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name());
|
||||
modParams.set("name", name);
|
||||
|
|
|
@ -85,7 +85,8 @@ public class SolrConfigTest extends SolrTest {
|
|||
|
||||
new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
|
||||
.run(isLookupClient);
|
||||
Assertions.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
|
||||
Assertions
|
||||
.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
|
||||
|
||||
String[] queryStrings = {
|
||||
"cancer",
|
||||
|
|
Loading…
Reference in New Issue