diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CountryMatch.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CountryMatch.java index 9cf616356..c02381983 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CountryMatch.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CountryMatch.java @@ -1,3 +1,4 @@ + package eu.dnetlib.pace.tree; import java.util.Map; @@ -11,37 +12,37 @@ import eu.dnetlib.pace.tree.support.ComparatorClass; @ComparatorClass("countryMatch") public class CountryMatch extends AbstractStringComparator { - public CountryMatch(Map params) { - super(params, new com.wcohen.ss.JaroWinkler()); - } + public CountryMatch(Map params) { + super(params, new com.wcohen.ss.JaroWinkler()); + } - public CountryMatch(final double weight) { - super(weight, new com.wcohen.ss.JaroWinkler()); - } + public CountryMatch(final double weight) { + super(weight, new com.wcohen.ss.JaroWinkler()); + } - protected CountryMatch(final double weight, final AbstractStringDistance ssalgo) { - super(weight, ssalgo); - } + protected CountryMatch(final double weight, final AbstractStringDistance ssalgo) { + super(weight, ssalgo); + } - @Override - public double distance(final String a, final String b, final Config conf) { - if (a.isEmpty() || b.isEmpty()) { - return -1.0; // return -1 if a field is missing - } - if (a.equalsIgnoreCase("unknown") || b.equalsIgnoreCase("unknown")) { - return -1.0; // return -1 if a country is UNKNOWN - } + @Override + public double distance(final String a, final String b, final Config conf) { + if (a.isEmpty() || b.isEmpty()) { + return -1.0; // return -1 if a field is missing + } + if (a.equalsIgnoreCase("unknown") || b.equalsIgnoreCase("unknown")) { + return -1.0; // return -1 if a country is UNKNOWN + } - return a.equals(b) ? 1.0 : 0; - } + return a.equals(b) ? 1.0 : 0; + } - @Override - public double getWeight() { - return super.weight; - } + @Override + public double getWeight() { + return super.weight; + } - @Override - protected double normalize(final double d) { - return d; - } + @Override + protected double normalize(final double d) { + return d; + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/sdgnodoi/job.properties b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/sdgnodoi/job.properties new file mode 100644 index 000000000..69eca95f4 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/sdgnodoi/job.properties @@ -0,0 +1,2 @@ +sdgPath=/tmp/sdg_20240627_oaid_csv +outputPath=/tmp/miriam/sdgnodoi \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/person/Authors/part-00000.snappy.parquet b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/person/Authors/part-00000.snappy.parquet new file mode 100644 index 000000000..72ee6064c Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/person/Authors/part-00000.snappy.parquet differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/person/Employments/part-00000.snappy.parquet b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/person/Employments/part-00000.snappy.parquet new file mode 100644 index 000000000..d3878ace4 Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/person/Employments/part-00000.snappy.parquet differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/person/EmploymentsJson/part-00000 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/person/EmploymentsJson/part-00000 new file mode 100644 index 000000000..8e1b363b2 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/person/EmploymentsJson/part-00000 @@ -0,0 +1,100 @@ +{"affiliationId":{"schema":"RINGGOLD","value":"8244"},"departmentName":"Biology","endDate":"2019-05-19","orcid":"0000-0001-6291-9619","roleTitle":"Undergraduate Research Assistant","startDate":"2017-01-26"} +{"affiliationId":{"schema":"GRID","value":"grid.445941.9"},"departmentName":"Department architectural-building constructions","endDate":"","orcid":"0000-0001-6291-9619","roleTitle":"Assistant professor","startDate":"2014-09"} +{"affiliationId":{"schema":"ROR","value":"https://ror.org/02jx3x895"},"departmentName":"Learning and Leadership","endDate":"2012-09-01","orcid":"0000-0002-3210-3034","roleTitle":"Leturer"} +{"affiliationId":{"schema":"RINGGOLD","value":"445071"},"departmentName":"Fisheries and Aquaculture","endDate":"2006-01-012012-08-23","orcid":"0000-0002-9030-7609","roleTitle":"Technical Officer"} +{"affiliationId":{"schema":"ROR","value":"https://ror.org/05a28rw58"},"departmentName":"Institute of Environmental Engineering","endDate":"2023-11-012024-01-31","orcid":"0000-0002-9030-7609","roleTitle":"Visiting Researcher"} +{"affiliationId":{"schema":"","value":""},"departmentName":"Faculty of Engineering and Informatics","endDate":"2021-12-20","orcid":"0000-0003-0305-8980","roleTitle":"Lecturer"} +{"affiliationId":{"schema":"RINGGOLD","value":"26066"},"departmentName":"Obstetrics and Gynaecology","orcid":"0000-0003-0305-8980","roleTitle":"MD, PhD"} +{"affiliationId":{"schema":"ROR","value":"https://ror.org/00ey9xa07"},"departmentName":"Harbin Sport University","endDate":"2024-06-01","orcid":"0000-0003-0305-8980","roleTitle":"Student"} +{"affiliationId":{"schema":"","value":""},"departmentName":"KIPP DC Schools","endDate":"2016-01-012017-01-01","orcid":"0009-0004-7554-419X","roleTitle":"Middle School Science Teacher"} +{"affiliationId":{"schema":"RINGGOLD","value":"19374"},"departmentName":"Music Therapy","endDate":"","orcid":"0000-0002-5115-9762","roleTitle":"Dementia Program Director","startDate":"2017-10-01"} +{"affiliationId":{"schema":"RINGGOLD","value":"9144"},"departmentName":"Interdisciplinary Center for Scientific Computing - IWR","endDate":"2009-01-152012-10-31","orcid":"0000-0002-2004-4153","roleTitle":"PhD student"} +{"departmentName":"2nd Air Supply Maintenance Center Command","endDate":"2012-08-30","orcid":"0000-0002-4389-9744","roleTitle":"Production Planning Group Supervisor","startDate":"2008-09-01"} +{"affiliationId":{"schema":"","value":""},"departmentName":"MITAKY High-Tech Co., Ltd.","endDate":"2020-07-01","orcid":"0000-0001-7628-743X","roleTitle":"President & CEO"} +{"departmentName":"Ancash","endDate":"","orcid":"0000-0002-3861-2833","startDate":""} +{"departmentName":"CALDAS","endDate":"2010-06-17","orcid":"0000-0003-1077-4053","roleTitle":"COORDINADOR ESTUDIO DE RADIO Y TV","startDate":"1999-03-17"} +{"departmentName":"Institute of Sociogenesis and Social Dynamics","endDate":"","orcid":"0000-0001-6881-7760","startDate":""} +{"affiliationId":{"schema":"RINGGOLD","value":"27004"},"departmentName":"Biology","endDate":"1999-09-02","orcid":"0000-0002-8553-169X","roleTitle":"MCf Microbiology "} +{"affiliationId":{"schema":"ROR","value":"https://ror.org/041nas322"},"departmentName":"Bonn Center for Dependency and Slavery Studies","endDate":"2019-01-012023-01-01","orcid":"0009-0002-9250-948X","roleTitle":"Predoctoral Research Associate / wiss. Mitarbeiter"} +{"departmentName":"Процессы и аппараты химических и пищевых производств","endDate":"","orcid":"0000-0002-1805-5670","roleTitle":"старший преподаватель","startDate":"2015-09-03"} +{"affiliationId":{"schema":"RINGGOLD","value":"95414"},"departmentName":"Department of Science, Technology and International relations","endDate":"2021-03-09","orcid":"0000-0002-7616-2482","roleTitle":"Researcher"} +{"affiliationId":{"schema":"RINGGOLD","value":"28730"},"departmentName":"Forensic Medicine & Toxicology","endDate":"2019-08-26","orcid":"0000-0001-7369-1744","roleTitle":"Senior Resident","startDate":"2016-08-27"} +{"departmentName":"Department of Functional & Comparative Genomics","endDate":"2015-10","orcid":"0000-0001-8059-8919","roleTitle":"Postdoctoral Research Associate, Fluorescence Chemical Sensors","startDate":"2014-08"} +{"affiliationId":{"schema":"RINGGOLD","value":"16771"},"departmentName":"Catedra UNESCO de Gestion y Politica Universitaria","endDate":"2016","orcid":"0000-0001-9437-6700","roleTitle":"Investigador","startDate":"2001"} +{"affiliationId":{"schema":"ROR","value":"https://ror.org/05v3pg621"},"departmentName":"Department of Multimedia Animation and Application","endDate":"","orcid":"0000-0003-2513-7065","roleTitle":"Professor","startDate":"2012-02-01"} +{"affiliationId":{"schema":"RINGGOLD","value":"246714"},"departmentName":"Delta Dental of Wisconsin","endDate":"","orcid":"0000-0003-2675-3206","roleTitle":"VP & Science Officer","startDate":"2006"} +{"affiliationId":{"schema":"RINGGOLD","value":"146895"},"departmentName":"Facultad de Ciencias Económicas, administrativas y Contables ","endDate":"2015-02-01","orcid":"0000-0001-9384-6395","roleTitle":"Coordinadora de Investigaciones-Facultad CEAC"} +{"affiliationId":{"schema":"RINGGOLD","value":"8367"},"departmentName":"Microbiology, Immunology and Tropical Medicine","endDate":"2019-10-01","orcid":"0000-0002-2349-263X","roleTitle":"Assistant Professor"} +{"affiliationId":{"schema":"RINGGOLD","value":"129705"},"departmentName":"Institut de Recherche en Informatique Fondamentale","orcid":"0000-0003-0287-6252","roleTitle":""} +{"affiliationId":{"schema":"RINGGOLD","value":"119726"},"departmentName":"Computer Engineering/MIS","endDate":"2018-09-15","orcid":"0000-0003-0014-5106","roleTitle":"Assoc.Prof.Dr."} +{"affiliationId":{"schema":"ROR","value":"https://ror.org/00wbwde85"},"departmentName":"school","endDate":"2021-03-01","orcid":"0009-0007-7585-0594","roleTitle":"mahasiswa"} +{"affiliationId":{"schema":"RINGGOLD","value":"9171"},"departmentName":"Computer Science Dept.","endDate":"","orcid":"0000-0002-3202-2904","startDate":"2002-04-01"} +{"affiliationId":{"schema":"GRID","value":"grid.474837.b"},"departmentName":"Gastroenterology","endDate":"","orcid":"0000-0003-0705-5760","startDate":""} +{"affiliationId":{"schema":"","value":""},"departmentName":"School of Physical and Occupational Therapy","endDate":"2018-03-012021-12-31","orcid":"0000-0002-8406-5228","roleTitle":"Postdoctoral Fellow"} +{"affiliationId":{"schema":"","value":""},"departmentName":"Xinqiao Hospital","orcid":"0000-0001-6200-2309","roleTitle":""} +{"affiliationId":{"schema":"","value":""},"departmentName":"DIPARTIMENTO DI PATOLOGIA CHIRURGICA, MEDICA, MOLECOLARE E DELL'AREA CRITICA","endDate":"2018-10-012021-10-01","orcid":"0000-0002-5588-2608","roleTitle":"Ricercatori a tempo determinato"} +{"affiliationId":{"schema":"","value":""},"departmentName":"ECE","endDate":"2021-01-012022-02-01","orcid":"0000-0002-8729-0287","roleTitle":"Visiting Scholar"} +{"departmentName":"Mantenimiento","endDate":"1998-01-01","orcid":"0000-0002-8663-2716","roleTitle":"Project coordinator","startDate":"1994-01-01"} +{"affiliationId":{"schema":"ROR","value":"https://ror.org/012wtwr40"},"departmentName":"Centro Universitário Newton Paiva","endDate":"2012-07","orcid":"0000-0002-1725-1805","roleTitle":"Professor Assistente","startDate":"2005-09"} +{"departmentName":"DFC","endDate":"","orcid":"0000-0003-3764-9500","roleTitle":"Professora Substituta","startDate":"2018-02-01"} +{"affiliationId":{"schema":"RINGGOLD","value":"10107"},"departmentName":"Automated Lab - Women's & Children's Hospital site","endDate":"1995-09-052018-09-09","orcid":"0000-0002-5594-9737","roleTitle":"Medical Scientist"} +{"affiliationId":{"schema":"RINGGOLD","value":"33784"},"departmentName":"Computer Science","endDate":"2023-06-04","orcid":"0009-0000-6585-6246","roleTitle":"Visiting Assistant Professor","startDate":"2023-01-09"} +{"affiliationId":{"schema":"ROR","value":"https://ror.org/0280a3n32"},"departmentName":"Research","endDate":"2022-06","orcid":"0000-0002-0846-9503","roleTitle":"Research Assistant","startDate":"2019-06"} +{"affiliationId":{"schema":"GRID","value":"grid.8657.c"},"departmentName":"Finnish Meteorological Institute","endDate":"2019-06-01","orcid":"0000-0002-4826-2929","roleTitle":""} +{"affiliationId":{"schema":"ROR","value":"https://ror.org/05290cv24"},"departmentName":"Dipartimento di Informatica e Tecnologie dell'Informazione","endDate":"2022-11-012025-11-01","orcid":"0009-0000-6476-8092","roleTitle":"PhD Student"} +{"affiliationId":{"schema":"GRID","value":"grid.495082.2"},"departmentName":"Laboratory of water bodies sanitaric microbiology and human microbial ecology","endDate":"2017-01-01","orcid":"0000-0003-1194-7251","roleTitle":"Senior reseacher "} +{"affiliationId":{"schema":"RINGGOLD","value":"150713"},"departmentName":"Education","endDate":"2013-09-01","orcid":"0000-0002-2489-1202","roleTitle":"Doctor of Education"} +{"departmentName":"Office of Risk Management","endDate":"","orcid":"0000-0003-2772-313X","roleTitle":"Senior Policy Advisor","startDate":"2014-09-04"} +{"affiliationId":{"schema":"ROR","value":"https://ror.org/04pe1sa24"},"departmentName":"Facultad de Estudios Globales y Hospitalidad","endDate":"","orcid":"0009-0003-4270-4196","roleTitle":"Docente en las Carreras de Licenciatura en Turismo y Relaciones Internacionales","startDate":"2023-06-12"} +{"affiliationId":{"schema":"","value":""},"departmentName":"Civil & Mechanical Engineering","endDate":"2014-06-012020-06-30","orcid":"0000-0001-6598-2525","roleTitle":"Assistant Professor"} +{"affiliationId":{"schema":"ROR","value":"https://ror.org/02ytfzr55"},"departmentName":"Department of Civil Engineering ","endDate":"2022-03-212024-02-27","orcid":"0000-0002-9572-1358","roleTitle":"Temporary Faculty"} +{"affiliationId":{"schema":"ROR","value":"https://ror.org/04wdt0z89"},"departmentName":"library","endDate":"2024-03-01","orcid":"0009-0002-8124-1772","roleTitle":"library it"} +{"departmentName":"Kalil e Pires Advogados","endDate":"","orcid":"0009-0001-3403-0297","roleTitle":"Estagiário","startDate":"2023-03-13"} +{"affiliationId":{"schema":"","value":""},"departmentName":"Coronel Institute of Occupational Health","endDate":"2019-01-01","orcid":"0000-0002-0461-4013","roleTitle":"Principal Investigator"} +{"affiliationId":{"schema":"GRID","value":"grid.5801.c"},"departmentName":"Health Sciences and Technology","orcid":"0000-0002-1651-0457","roleTitle":"Doctoral student"} +{"departmentName":"Prefeitura de Teresina","endDate":"","orcid":"0000-0002-8148-4179","startDate":""} +{"affiliationId":{"schema":"ROR","value":"https://ror.org/00k8rrx20"},"departmentName":"Prosseguir","endDate":"2023-05-012024-05-01","orcid":"0000-0001-5147-3455","roleTitle":"Coordenadora Pedagógica Regional do Prosseguir em Manaus"} +{"affiliationId":{"schema":"RINGGOLD","value":"381864"},"departmentName":"Pharmaron Beijing Co Ltd","endDate":"","orcid":"0000-0003-2165-740X","startDate":"2015-10-27"} +{"affiliationId":{"schema":"RINGGOLD","value":"183390"},"departmentName":"Instituto Tecnológico Superior de Irapuato","endDate":"","orcid":"0000-0003-2101-5917","startDate":"2018-11-01"} +{"affiliationId":{"schema":"RINGGOLD","value":"384754"},"departmentName":"SynCat@Beijing","orcid":"0000-0002-1050-2165","roleTitle":"Vice Director"} +{"affiliationId":{"schema":"RINGGOLD","value":"282795"},"departmentName":"Setor de Coleções Científicas","endDate":"","orcid":"0000-0003-3755-0025","roleTitle":"Estagiária","startDate":"2019-11"} +{"affiliationId":{"schema":"RINGGOLD","value":"434589"},"departmentName":"Chemistry","endDate":"2015-05-25","orcid":"0000-0001-5861-4425","roleTitle":"Lecturer"} +{"affiliationId":{"schema":"","value":""},"departmentName":"Cajamarca","endDate":"2023-11-15","orcid":"0000-0003-1524-3315","roleTitle":"Asistente Administrativo Provincial"} +{"affiliationId":{"schema":"","value":""},"departmentName":"Seduc ma","endDate":"2012-01-24","orcid":"0000-0003-3142-356X","roleTitle":""} +{"departmentName":"Rede Particular de Ensino","endDate":"2021","orcid":"0000-0002-4771-2131","roleTitle":"Professora de Artes Cênicas","startDate":"2018"} +{"affiliationId":{"schema":"FUNDREF","value":"http://dx.doi.org/10.13039/100009042"},"departmentName":"Derecho del Trabajo y de la Seguridad Social","endDate":"2002-12-01","orcid":"0000-0002-1275-5289","roleTitle":"Catedrático de Derecho del Trabajo y de la Seguridad Social"} +{"departmentName":"Department of Ethology","endDate":"2011-01-02","orcid":"0000-0003-1436-7324","roleTitle":"Research Assistant","startDate":"2007-09-01"} +{"affiliationId":{"schema":"ROR","value":"https://ror.org/04ka8rx28"},"departmentName":"Mechanical Engineering","orcid":"0009-0006-6397-2183","roleTitle":""} +{"affiliationId":{"schema":"ROR","value":"https://ror.org/002qhr126"},"departmentName":"Theatre","orcid":"0009-0001-6531-9624","roleTitle":"theacher"} +{"affiliationId":{"schema":"","value":""},"departmentName":"Institute of Molecular Medicine, Renji Hospital, School of Medicine.","endDate":"2021-01-27","orcid":"0000-0003-0399-1201","roleTitle":"Associate Professor"} +{"affiliationId":{"schema":"","value":""},"departmentName":"Cell Biology","endDate":"2013-11-012014-07-01","orcid":"0000-0003-1489-4757","roleTitle":"Research Scolarship for Undergraduate Students"} +{"affiliationId":{"schema":"RINGGOLD","value":"346985"},"departmentName":"Maternidade","endDate":"","orcid":"0000-0002-6985-9679","roleTitle":"Enfermeira/UTI neonatal","startDate":"2019-02-26"} +{"affiliationId":{"schema":"RINGGOLD","value":"41726"},"departmentName":"Area Team - Biodiversity","endDate":"2006-01-012008-01-01","orcid":"0000-0002-6553-3786","roleTitle":"Catchment Biodiversity Technical Officer"} +{"affiliationId":{"schema":"ROR","value":"https://ror.org/04z7qrj66"},"departmentName":"Merchant Marine College","orcid":"0009-0003-6812-3576","roleTitle":""} +{"affiliationId":{"schema":"","value":""},"departmentName":"Lima","endDate":"2022-03-25","orcid":"0000-0002-9262-5619","roleTitle":"Docente Universitario"} +{"affiliationId":{"schema":"","value":""},"departmentName":"University of Bristol","orcid":"0000-0002-9793-3485","roleTitle":""} +{"departmentName":"US Geological Survey, Ecosystems Mission Area, Cooperative Fish and Wildlife Research Units Program","endDate":"","orcid":"0000-0002-8638-6682","startDate":"2011-06-01"} +{"affiliationId":{"schema":"RINGGOLD","value":"28666"},"departmentName":"English","endDate":"","orcid":"0000-0001-5361-109X","roleTitle":"Assistant Professor","startDate":"2019-07-05"} +{"affiliationId":{"schema":"","value":""},"departmentName":"Centre for Earth System Science","endDate":"2010-03-012012-11-01","orcid":"0000-0001-5323-4431","roleTitle":"Researcher and Executive Officer"} +{"affiliationId":{"schema":"ROR","value":"https://ror.org/05bjd0w70"},"departmentName":"Education","endDate":"2013-08-15","orcid":"0000-0001-5960-0586","roleTitle":"Associate Professor and Chair, Department of Education","startDate":"2002-08-15"} +{"affiliationId":{"schema":"RINGGOLD","value":"632513"},"departmentName":"Board ","endDate":"2020-01-01","orcid":"0000-0002-4222-4518","roleTitle":"Boardmember"} +{"affiliationId":{"schema":"GRID","value":"grid.22657.34"},"departmentName":"Faculty of Food Technology","endDate":"2017-01-01","orcid":"0000-0003-2606-8380","roleTitle":"guest scientific assistant, guest researcher, guest lecturer"} +{"affiliationId":{"schema":"FUNDREF","value":"http://dx.doi.org/10.13039/501100008331"},"departmentName":"Radiology","endDate":"","orcid":"0000-0001-6249-450X","startDate":"2016-03-01"} +{"departmentName":"кафедра физики","endDate":"","orcid":"0000-0001-6786-838X","roleTitle":"доцент","startDate":"1981-11-28"} +{"affiliationId":{"schema":"RINGGOLD","value":"6429"},"departmentName":"Molecular and Cellular Physiology","endDate":"2010-01-012013-01-01","orcid":"0000-0002-5538-0464","roleTitle":"Research-Associate"} +{"affiliationId":{"schema":"RINGGOLD","value":"48455"},"departmentName":"Clinical Biochem","endDate":"2016-12-30","orcid":"0000-0002-9563-8044","roleTitle":"Associate Professor","startDate":"2008"} +{"affiliationId":{"schema":"ROR","value":"https://ror.org/01xf75524"},"departmentName":"Molecular Oncology","endDate":"2022-01-01","orcid":"0000-0003-0928-003X","roleTitle":""} +{"affiliationId":{"schema":"ROR","value":"https://ror.org/022kthw22"},"departmentName":"Anesthesiology and Perioperative Medicine","endDate":"2022-07-01","orcid":"0000-0001-7410-7271","roleTitle":"Postdoctoral Researcher"} +{"affiliationId":{"schema":"RINGGOLD","value":"125792"},"departmentName":"Environmental Management and Toxicology","endDate":"2006-06-20","orcid":"0000-0001-7855-4183","roleTitle":"Instructional/Tutorial Facilitator"} +{"departmentName":"2004 – 2007 | Teacher of Fiqh and Usul-al-Fiqh | Islamic University | Moscow, Russia","endDate":"","orcid":"0000-0001-8386-4426","startDate":""} +{"affiliationId":{"schema":"ROR","value":"https://ror.org/0190ak572"},"departmentName":"Biology","endDate":"2024-06-01","orcid":"0009-0001-6766-7876","roleTitle":"Research assistant"} +{"affiliationId":{"schema":"RINGGOLD","value":"16763"},"departmentName":"Education","orcid":"0000-0003-2355-4682","roleTitle":"Profesora titular de Universidad/ Senior Lecturer "} +{"affiliationId":{"schema":"ROR","value":"https://ror.org/0406jsq08"},"departmentName":"Farmácia","endDate":"2023-03-01","orcid":"0009-0009-1661-5046","roleTitle":"Residente farmacêutico"} +{"affiliationId":{"schema":"ROR","value":"https://ror.org/02aqsxs83"},"departmentName":"School of Biological Sciences","endDate":"2021-08-16","orcid":"0000-0002-1696-1952","roleTitle":"Assistant Professor"} +{"affiliationId":{"schema":"ROR","value":"https://ror.org/00s582s04"},"departmentName":"cajamarca","endDate":"2024-01-01","orcid":"0009-0001-0970-2741","roleTitle":"BACHILLER"} +{"affiliationId":{"schema":"RINGGOLD","value":"186027"},"departmentName":"РЯиК","endDate":"","orcid":"0000-0002-1000-5441","roleTitle":"старший преподаватель","startDate":"2007-09-01"} +{"affiliationId":{"schema":"RINGGOLD","value":"47910"},"departmentName":"Faculty of Life Science and Technology","endDate":"2012-07-082015-10-01","orcid":"0000-0001-7533-998X","roleTitle":"Lecturer"} +{"affiliationId":{"schema":"RINGGOLD","value":"2234"},"departmentName":"Education","endDate":"","orcid":"0000-0001-6123-8483","roleTitle":"Assistant Professor","startDate":"2019-01-07"} +{"affiliationId":{"schema":"ROR","value":"https://ror.org/04qkymg17"},"departmentName":"genera surgical ward","endDate":"2013-10-012015-02-01","orcid":"0009-0009-7638-0453","roleTitle":"Registered Nurse"} +{"affiliationId":{"schema":"RINGGOLD","value":"575342"},"departmentName":"GBUZ Naucno-prakticeskij psihonevrologiceskij centr imeni Z P Solov'eva Departamenta zdravoohranenia goroda Moskvy","endDate":"2022-11-01","orcid":"0000-0002-0344-9765","roleTitle":""} +{"affiliationId":{"schema":"","value":""},"departmentName":"SERVICE DE CHIRURGIE ORTHOPÉDIQUE ET TRAUMATOLOGIE","endDate":"2024-02-01","orcid":"0009-0003-2119-1766","roleTitle":"CHIRURGIEN ORTHOPEDISTE"} diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/person/Works/part-00001.snappy.parquet b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/person/Works/part-00001.snappy.parquet new file mode 100644 index 000000000..51f797092 Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/person/Works/part-00001.snappy.parquet differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00003 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00003 new file mode 100644 index 000000000..fd296ef06 Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00003 differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00010 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00010 new file mode 100644 index 000000000..b83cbef73 Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00010 differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00042 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00042 new file mode 100644 index 000000000..aa2369378 Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00042 differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00066 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00066 new file mode 100644 index 000000000..c08053152 Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00066 differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00087 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00087 new file mode 100644 index 000000000..2bcb831a3 Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00087 differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00126 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00126 new file mode 100644 index 000000000..b090f3c21 Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00126 differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00154 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00154 new file mode 100644 index 000000000..027c9f5bb Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00154 differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00160 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00160 new file mode 100644 index 000000000..7d0af8a04 Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00160 differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00163 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00163 new file mode 100644 index 000000000..228ce04d5 Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00163 differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00179 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00179 new file mode 100644 index 000000000..344d44a0a Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00179 differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00196 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00196 new file mode 100644 index 000000000..fbc7edd35 Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/part-00196 differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/blackList/not_irish.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/blackList/not_irish.json new file mode 100644 index 000000000..a5509cab1 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/blackList/not_irish.json @@ -0,0 +1 @@ +{"doi":"https://doi.org/10.1007/978-3-030-75768-7","OpenAlexId":"https://openalex.org/W2124362779"} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index 0a7a4913e..f4824dd55 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -8,7 +8,6 @@ import java.nio.charset.StandardCharsets; import java.util.*; import java.util.stream.Collectors; -import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -23,7 +22,6 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; @@ -32,6 +30,8 @@ import eu.dnetlib.dhp.api.model.CommunityEntityMap; import eu.dnetlib.dhp.api.model.EntityCommunities; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.bulktag.community.*; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.Context; @@ -93,9 +93,10 @@ public class SparkBulkTagJob { ProtoMap protoMap = new Gson().fromJson(temp, ProtoMap.class); log.info("pathMap: {}", new Gson().toJson(protoMap)); - SelectionConstraints taggingConstraints = new Gson() - .fromJson(parser.get("taggingCriteria"), SelectionConstraints.class); - taggingConstraints.setSelection(VerbResolverFactory.newInstance()); + TaggingConstraints taggingConstraints = new Gson() + .fromJson(parser.get("taggingCriteria"), TaggingConstraints.class); + + taggingConstraints.getTags().forEach(t -> t.setSelection(VerbResolverFactory.newInstance())); SparkConf conf = new SparkConf(); CommunityConfiguration cc; @@ -277,13 +278,8 @@ public class SparkBulkTagJob { String outputPath, ProtoMap protoMappingParams, CommunityConfiguration communityConfiguration, - SelectionConstraints taggingConstraints) { + TaggingConstraints taggingConstraints) { - try { - System.out.println(new ObjectMapper().writeValueAsString(protoMappingParams)); - } catch (JsonProcessingException e) { - throw new RuntimeException(e); - } ModelSupport.entityTypes .keySet() .parallelStream() @@ -295,30 +291,22 @@ public class SparkBulkTagJob { readPath(spark, inputPath + e.name(), resultClazz) .map(patchResult(), Encoders.bean(resultClazz)) .filter(Objects::nonNull) - .map((MapFunction) value -> resultTagger + .map( + (MapFunction) value -> resultTagger .enrichContextCriteria( value, communityConfiguration, protoMappingParams, taggingConstraints), - Encoders.bean(Tagging.class)) + Encoders.bean(resultClazz)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath + e.name());// writing the tagging in the working dir for entity - readPath(spark, outputPath + e.name(), Tagging.class) - .map((MapFunction) t -> (R) t.getResult(), Encoders.bean(resultClazz) )// copy the tagging in the actual result output path + readPath(spark, outputPath + e.name(), resultClazz) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(inputPath + e.name()); - readPath(spark, outputPath + e.name(), Tagging.class) - .map((MapFunction) t -> t.getTag(), Encoders.STRING() )// copy the tagging in the actual result output path - .filter(Objects::nonNull) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json("/user/miriam.baglioni/graphTagging/" + e.name()); - }); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/Tagging.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/Tagging.java index d01d4b805..3ea7d9ef1 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/Tagging.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/Tagging.java @@ -1,32 +1,34 @@ + package eu.dnetlib.dhp.bulktag; import java.io.Serializable; + import eu.dnetlib.dhp.schema.oaf.Result; -public class Tagging implements Serializable { - private String tag; - private R result; +public class Tagging implements Serializable { + private String tag; + private R result; - public String getTag() { - return tag; - } + public String getTag() { + return tag; + } - public void setTag(String tag) { - this.tag = tag; - } + public void setTag(String tag) { + this.tag = tag; + } - public R getResult() { - return result; - } + public R getResult() { + return result; + } - public void setResult(R result) { - this.result = result; - } + public void setResult(R result) { + this.result = result; + } - public static Tagging newInstance(R result, String tag){ - Tagging t = new Tagging<>(); - t.result = result; - t.tag = tag; - return t; - } + public static Tagging newInstance(R result, String tag) { + Tagging t = new Tagging<>(); + t.result = result; + t.tag = tag; + return t; + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index 75189e7de..49fed9493 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -10,8 +10,6 @@ import java.lang.reflect.Method; import java.util.*; import java.util.stream.Collectors; - -import eu.dnetlib.dhp.bulktag.Tagging; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -21,6 +19,7 @@ import com.jayway.jsonpath.DocumentContext; import com.jayway.jsonpath.JsonPath; import com.jayway.jsonpath.PathNotFoundException; +import eu.dnetlib.dhp.bulktag.Tagging; import eu.dnetlib.dhp.bulktag.actions.MapModel; import eu.dnetlib.dhp.bulktag.actions.Parameters; import eu.dnetlib.dhp.bulktag.eosc.EoscIFTag; @@ -93,18 +92,18 @@ public class ResultTagger implements Serializable { } - public Tagging enrichContextCriteria( - final R result, final CommunityConfiguration conf, final Map criteria, SelectionConstraints taggingConstraints) + public R enrichContextCriteria( + final R result, final CommunityConfiguration conf, final Map criteria, + TaggingConstraints taggingConstraints) throws InvocationTargetException, NoSuchMethodException { // Verify if the entity is deletedbyinference. In case verify if to clean the context list // from all the zenodo communities if (result.getDataInfo().getDeletedbyinference()) { clearContext(result); - return Tagging.newInstance(result, null); + return result; } - String retString = null; final Map> param = getParamMap(result, criteria); // Execute the EOSCTag for the services @@ -123,8 +122,11 @@ public class ResultTagger implements Serializable { } //adding code for tagging of results searching supplementaryMaterial - if(taggingConstraints.getCriteria().stream().anyMatch(crit -> crit.verifyCriteria(param))) - retString = "supplementary"; + final Set tags = new HashSet<>(); + taggingConstraints.getTags().forEach(t -> { + if (t.getCriteria().stream().anyMatch(crit -> crit.verifyCriteria(param))) + tags.add(t.getTagId()); + }); // communities contains all the communities to be not added to the context final Set removeCommunities = new HashSet<>(); @@ -253,10 +255,26 @@ public class ResultTagger implements Serializable { clearContext(result); /* Verify if there is something to bulktag */ - if (communities.isEmpty()) { - return Tagging.newInstance(result, retString); + if (communities.isEmpty() && tags.isEmpty()) { + return result; } + tags.forEach(t -> { + Context con = new Context(); + con.setId(t); + List dataInfoList = Arrays + .asList( + OafMapperUtils + .dataInfo( + false, ANNOTATION_DATA_INFO_TYPE, true, false, + OafMapperUtils + .qualifier( + CLASS_ID_ANNOTATION, CLASS_NAME_ANNOTATION, DNET_PROVENANCE_ACTIONS, + DNET_PROVENANCE_ACTIONS), + TAGGING_TRUST)); + result.getContext().add(con); + }); + result.getContext().forEach(c -> { final String cId = c.getId(); if (communities.contains(cId)) { @@ -321,7 +339,7 @@ public class ResultTagger implements Serializable { result.getContext().stream().map(Context::getId).collect(Collectors.toSet())); if (communities.isEmpty()) - return Tagging.newInstance(result, retString); + return result; List toaddcontext = communities .stream() @@ -381,7 +399,7 @@ public class ResultTagger implements Serializable { .collect(Collectors.toList()); result.getContext().addAll(toaddcontext); - return Tagging.newInstance(result, retString); + return result; } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java index aea21f8e5..637fa7f2f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java @@ -8,6 +8,10 @@ public class TaggingConstants { public static final String BULKTAG_DATA_INFO_TYPE = "bulktagging"; + public static final String ANNOTATION_DATA_INFO_TYPE = "annotation"; + public static final String CLASS_ID_ANNOTATION = "graph:annotation"; + public static final String CLASS_NAME_ANNOTATION = "Graph Annotation"; + public static final String CLASS_ID_SUBJECT = "community:subject"; public static final String CLASS_ID_DATASOURCE = "community:datasource"; public static final String CLASS_ID_CZENODO = "community:zenodocommunity"; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstraint.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstraint.java new file mode 100644 index 000000000..80c5cbf18 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstraint.java @@ -0,0 +1,14 @@ + +package eu.dnetlib.dhp.bulktag.community; + +public class TaggingConstraint extends SelectionConstraints { + private String tagId; + + public String getTagId() { + return tagId; + } + + public void setTagId(String tagId) { + this.tagId = tagId; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstraints.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstraints.java new file mode 100644 index 000000000..90042af58 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstraints.java @@ -0,0 +1,16 @@ + +package eu.dnetlib.dhp.bulktag.community; + +import java.util.List; + +public class TaggingConstraints { + private List tags; + + public List getTags() { + return tags; + } + + public void setTags(List tags) { + this.tags = tags; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/job.properties b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/job.properties new file mode 100644 index 000000000..9e7d615a4 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/job.properties @@ -0,0 +1,4 @@ +sourcePath=/tmp/miriam/12_graph_copy +pathMap=/data/bulktagging/pathMap +baseURL=https://services.openaire.eu/openaire/community/ +taggingCriteria={"tags":[{"id":"SM","criteria":[{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary material for"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary document for"},{"verb":"starts_with_caseinsensitive","field":"title","value":"figure"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary figure"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplemental figure"},{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary table"},{"verb":"starts_with_caseinsensitive","field":"title","value":"table for"}]}]}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java index 5e51b687f..e70752bb7 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java @@ -122,9 +122,9 @@ public class BulkTagJobTest { "-taggingConf", taggingConf, "-outputPath", workingDir.toString() + "/", "-pathMap", pathMap, - "-taggingCriteria", taggingCriteria, - "-baseURL", "https://services.openaire.eu/openaire/community/", - "-nameNode", "local" + "-taggingCriteria", taggingCriteria, + "-baseURL", "https://services.openaire.eu/openaire/community/", + "-nameNode", "local" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -162,9 +162,9 @@ public class BulkTagJobTest { "-taggingConf", taggingConf, "-outputPath", workingDir.toString() + "/", "-pathMap", pathMap, - "-taggingCriteria", taggingCriteria, - "-baseURL", "https://services.openaire.eu/openaire/community/", - "-nameNode", "local" + "-taggingCriteria", taggingCriteria, + "-baseURL", "https://services.openaire.eu/openaire/community/", + "-nameNode", "local" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -271,8 +271,8 @@ public class BulkTagJobTest { "-pathMap", workingDir.toString() + "/data/bulktagging/protoMap", "-nameNode", "local", - "-taggingCriteria", taggingCriteria, - "-baseURL", "https://services.openaire.eu/openaire/community/" + "-taggingCriteria", taggingCriteria, + "-baseURL", "https://services.openaire.eu/openaire/community/" }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); @@ -354,9 +354,9 @@ public class BulkTagJobTest { "-outputPath", workingDir.toString() + "/", "-pathMap", pathMap, - "-taggingCriteria", taggingCriteria, - "-baseURL", "https://services.openaire.eu/openaire/community/", - "-nameNode", "local" + "-taggingCriteria", taggingCriteria, + "-baseURL", "https://services.openaire.eu/openaire/community/", + "-nameNode", "local" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -438,7 +438,7 @@ public class BulkTagJobTest { "-pathMap", workingDir.toString() + "/data/bulktagging/protoMap/pathMap", "-nameNode", "local", - "-taggingCriteria", taggingCriteria + "-taggingCriteria", taggingCriteria }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -496,9 +496,9 @@ public class BulkTagJobTest { "-outputPath", workingDir.toString() + "/", "-pathMap", pathMap, - "-taggingCriteria", taggingCriteria, - "-baseURL", "https://services.openaire.eu/openaire/community/", - "-nameNode", "local" + "-taggingCriteria", taggingCriteria, + "-baseURL", "https://services.openaire.eu/openaire/community/", + "-nameNode", "local" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -620,9 +620,9 @@ public class BulkTagJobTest { "-outputPath", workingDir.toString() + "/", "-pathMap", pathMap, - "-taggingCriteria", taggingCriteria, - "-baseURL", "https://services.openaire.eu/openaire/community/", - "-nameNode", "local" + "-taggingCriteria", taggingCriteria, + "-baseURL", "https://services.openaire.eu/openaire/community/", + "-nameNode", "local" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -751,9 +751,9 @@ public class BulkTagJobTest { "-outputPath", workingDir.toString() + "/", "-pathMap", pathMap, - "-taggingCriteria", taggingCriteria, - "-baseURL", "https://services.openaire.eu/openaire/community/", - "-nameNode", "local" + "-taggingCriteria", taggingCriteria, + "-baseURL", "https://services.openaire.eu/openaire/community/", + "-nameNode", "local" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -854,9 +854,9 @@ public class BulkTagJobTest { "-outputPath", workingDir.toString() + "/", "-pathMap", pathMap, - "-taggingCriteria", taggingCriteria, - "-baseURL", "https://services.openaire.eu/openaire/community/", - "-nameNode", "local" + "-taggingCriteria", taggingCriteria, + "-baseURL", "https://services.openaire.eu/openaire/community/", + "-nameNode", "local" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -900,9 +900,9 @@ public class BulkTagJobTest { "-outputPath", workingDir.toString() + "/", "-pathMap", pathMap, - "-taggingCriteria", taggingCriteria, - "-baseURL", "https://services.openaire.eu/openaire/community/", - "-nameNode", "local" + "-taggingCriteria", taggingCriteria, + "-baseURL", "https://services.openaire.eu/openaire/community/", + "-nameNode", "local" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -957,9 +957,9 @@ public class BulkTagJobTest { "-outputPath", workingDir.toString() + "/", "-pathMap", pathMap, - "-taggingCriteria", taggingCriteria, - "-baseURL", "https://services.openaire.eu/openaire/community/", - "-nameNode", "local" + "-taggingCriteria", taggingCriteria, + "-baseURL", "https://services.openaire.eu/openaire/community/", + "-nameNode", "local" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -1009,9 +1009,9 @@ public class BulkTagJobTest { "-outputPath", workingDir.toString() + "/", "-pathMap", pathMap, - "-taggingCriteria", taggingCriteria, - "-baseURL", "https://services.openaire.eu/openaire/community/", - "-nameNode", "local" + "-taggingCriteria", taggingCriteria, + "-baseURL", "https://services.openaire.eu/openaire/community/", + "-nameNode", "local" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -1231,9 +1231,9 @@ public class BulkTagJobTest { "-pathMap", pathMap, - "-taggingCriteria", taggingCriteria, - "-baseURL", "https://services.openaire.eu/openaire/community/", - "-nameNode", "local" + "-taggingCriteria", taggingCriteria, + "-baseURL", "https://services.openaire.eu/openaire/community/", + "-nameNode", "local" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -1352,9 +1352,9 @@ public class BulkTagJobTest { "-outputPath", workingDir.toString() + "/", "-pathMap", pathMap, - "-taggingCriteria", taggingCriteria, - "-baseURL", "https://services.openaire.eu/openaire/community/", - "-nameNode", "local" + "-taggingCriteria", taggingCriteria, + "-baseURL", "https://services.openaire.eu/openaire/community/", + "-nameNode", "local" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -1475,9 +1475,9 @@ public class BulkTagJobTest { "-outputPath", workingDir.toString() + "/", "-pathMap", pathMap, - "-taggingCriteria", taggingCriteria, - "-baseURL", "https://services.openaire.eu/openaire/community/", - "-nameNode", "local" + "-taggingCriteria", taggingCriteria, + "-baseURL", "https://services.openaire.eu/openaire/community/", + "-nameNode", "local" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -1518,9 +1518,9 @@ public class BulkTagJobTest { "-outputPath", workingDir.toString() + "/", "-pathMap", pathMap, - "-taggingCriteria", taggingCriteria, - "-baseURL", "https://services.openaire.eu/openaire/community/", - "-nameNode", "local" + "-taggingCriteria", taggingCriteria, + "-baseURL", "https://services.openaire.eu/openaire/community/", + "-nameNode", "local" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -1566,9 +1566,9 @@ public class BulkTagJobTest { "-outputPath", workingDir.toString() + "/", "-pathMap", pathMap, - "-taggingCriteria", taggingCriteria, - "-baseURL", "https://services.openaire.eu/openaire/community/", - "-nameNode", "local" + "-taggingCriteria", taggingCriteria, + "-baseURL", "https://services.openaire.eu/openaire/community/", + "-nameNode", "local" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -1606,9 +1606,9 @@ public class BulkTagJobTest { "-outputPath", workingDir.toString() + "/", "-pathMap", pathMap, - "-taggingCriteria", taggingCriteria, - "-baseURL", "https://services.openaire.eu/openaire/community/", - "-nameNode", "local" + "-taggingCriteria", taggingCriteria, + "-baseURL", "https://services.openaire.eu/openaire/community/", + "-nameNode", "local" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -1684,9 +1684,9 @@ public class BulkTagJobTest { "-outputPath", workingDir.toString() + "/", "-pathMap", pathMap, - "-taggingCriteria", taggingCriteria, - "-baseURL", "https://services.openaire.eu/openaire/community/", - "-nameNode", "local" + "-taggingCriteria", taggingCriteria, + "-baseURL", "https://services.openaire.eu/openaire/community/", + "-nameNode", "local" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -1725,9 +1725,9 @@ public class BulkTagJobTest { // "-baseURL", "https://services.openaire.eu/openaire/community/", "-pathMap", pathMap, "-taggingConf", taggingConf, - "-taggingCriteria", taggingCriteria, - "-baseURL", "https://services.openaire.eu/openaire/community/", - "-nameNode", "local" + "-taggingCriteria", taggingCriteria, + "-baseURL", "https://services.openaire.eu/openaire/community/", + "-nameNode", "local" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -1769,9 +1769,9 @@ public class BulkTagJobTest { "/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf_publicationdate.xml")), "-outputPath", workingDir.toString() + "/", "-pathMap", pathMap, - "-taggingCriteria", taggingCriteria, - "-baseURL", "https://services.openaire.eu/openaire/community/", - "-nameNode", "local" + "-taggingCriteria", taggingCriteria, + "-baseURL", "https://services.openaire.eu/openaire/community/", + "-nameNode", "local" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -1874,9 +1874,9 @@ public class BulkTagJobTest { "-outputPath", workingDir.toString() + "/", "-pathMap", workingDir.toString() + "/data/bulktagging/protoMap/pathMap", - "-taggingCriteria", taggingCriteria, - "-baseURL", "https://services.openaire.eu/openaire/community/", - "-nameNode", "local" + "-taggingCriteria", taggingCriteria, + "-baseURL", "https://services.openaire.eu/openaire/community/", + "-nameNode", "local" }); } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java index b450b95be..abcf4992f 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java @@ -5,7 +5,6 @@ import java.io.StringReader; import java.util.*; import java.util.stream.Collectors; -import eu.dnetlib.dhp.schema.solr.ExternalReference; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.DocumentException; @@ -31,6 +30,7 @@ import eu.dnetlib.dhp.schema.solr.Context; import eu.dnetlib.dhp.schema.solr.Country; import eu.dnetlib.dhp.schema.solr.Datasource; import eu.dnetlib.dhp.schema.solr.EoscIfGuidelines; +import eu.dnetlib.dhp.schema.solr.ExternalReference; import eu.dnetlib.dhp.schema.solr.Instance; import eu.dnetlib.dhp.schema.solr.Journal; import eu.dnetlib.dhp.schema.solr.Measure; @@ -562,10 +562,16 @@ public class ProvisionModelSupport { .orElse(null); } - private static List mapExternalReference(List externalReference) { - return Optional.ofNullable(externalReference) - .map(ext -> ext.stream() - .map(e -> ExternalReference.newInstance( + private static List mapExternalReference( + List externalReference) { + return Optional + .ofNullable(externalReference) + .map( + ext -> ext + .stream() + .map( + e -> ExternalReference + .newInstance( e.getSitename(), e.getLabel(), e.getAlternateLabel(), @@ -573,8 +579,8 @@ public class ProvisionModelSupport { mapCodeLabel(e.getQualifier()), e.getRefidentifier(), e.getQuery())) - .collect(Collectors.toList())) - .orElse(Lists.newArrayList()); + .collect(Collectors.toList())) + .orElse(Lists.newArrayList()); } private static List asContext(List ctxList, diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigExploreTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigExploreTest.java index 41eac2a30..90aef5adc 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigExploreTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigExploreTest.java @@ -1,12 +1,13 @@ package eu.dnetlib.dhp.oa.provision; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.io.File; import java.io.IOException; import java.net.URI; import java.nio.file.Path; -import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.solr.client.solrj.SolrQuery; @@ -32,14 +33,13 @@ import org.junit.jupiter.api.io.TempDir; import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; - -import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.junit.jupiter.api.Assertions.assertEquals; +import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument; +import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) public class SolrConfigExploreTest { @@ -91,7 +91,7 @@ public class SolrConfigExploreTest { SparkConf conf = new SparkConf(); conf.setAppName(XmlIndexingJobTest.class.getSimpleName()); conf.registerKryoClasses(new Class[] { - SerializableSolrInputDocument.class + SerializableSolrInputDocument.class }); conf.setMaster("local[1]"); @@ -101,10 +101,10 @@ public class SolrConfigExploreTest { conf.set("spark.sql.warehouse.dir", workingDir.resolve("spark").toString()); spark = SparkSession - .builder() - .appName(SolrConfigExploreTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); + .builder() + .appName(SolrConfigExploreTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); // random unassigned HTTP port final int jettyPort = 0; @@ -134,35 +134,35 @@ public class SolrConfigExploreTest { log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString()); log - .info( - CollectionAdminRequest.ClusterStatus - .getClusterStatus() - .process(miniCluster.getSolrClient()) - .toString()); + .info( + CollectionAdminRequest.ClusterStatus + .getClusterStatus() + .process(miniCluster.getSolrClient()) + .toString()); NamedList res = createCollection( - miniCluster.getSolrClient(), SHADOW_COLLECTION, 4, 2, 20, CONFIG_NAME); + miniCluster.getSolrClient(), SHADOW_COLLECTION, 4, 2, 20, CONFIG_NAME); res.forEach(o -> log.info(o.toString())); // miniCluster.getSolrClient().setDefaultCollection(SHADOW_COLLECTION); res = createCollection( - miniCluster.getSolrClient(), PUBLIC_COLLECTION, 4, 2, 20, CONFIG_NAME); + miniCluster.getSolrClient(), PUBLIC_COLLECTION, 4, 2, 20, CONFIG_NAME); res.forEach(o -> log.info(o.toString())); admin = new SolrAdminApplication(miniCluster.getZkClient().getZkServerAddress()); CollectionAdminResponse rsp = (CollectionAdminResponse) admin - .createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, PUBLIC_COLLECTION); + .createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, PUBLIC_COLLECTION); assertEquals(0, rsp.getStatus()); rsp = (CollectionAdminResponse) admin.createAlias(ProvisionConstants.SHADOW_ALIAS_NAME, SHADOW_COLLECTION); assertEquals(0, rsp.getStatus()); log - .info( - CollectionAdminRequest.ClusterStatus - .getClusterStatus() - .process(miniCluster.getSolrClient()) - .toString()); + .info( + CollectionAdminRequest.ClusterStatus + .getClusterStatus() + .process(miniCluster.getSolrClient()) + .toString()); } @@ -180,7 +180,8 @@ public class SolrConfigExploreTest { new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize) .run(isLookupClient); - Assertions.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus()); + Assertions + .assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus()); String[] queryStrings = { "cancer", @@ -200,7 +201,8 @@ public class SolrConfigExploreTest { // System.out.println(rsp.getExplainMap()); for (SolrDocument doc : rsp.getResults()) { - log.info( + log + .info( doc.get("score") + "\t" + doc.get("__indexrecordidentifier") + "\t" + doc.get("resultidentifier") + "\t" + @@ -216,7 +218,7 @@ public class SolrConfigExploreTest { } protected static NamedList createCollection(CloudSolrClient client, String name, int numShards, - int replicationFactor, int maxShardsPerNode, String configName) throws Exception { + int replicationFactor, int maxShardsPerNode, String configName) throws Exception { ModifiableSolrParams modParams = new ModifiableSolrParams(); modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name()); modParams.set("name", name); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java index 2c62389c6..c04fa1cc6 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java @@ -85,7 +85,8 @@ public class SolrConfigTest extends SolrTest { new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize) .run(isLookupClient); - Assertions.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus()); + Assertions + .assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus()); String[] queryStrings = { "cancer",