Compare commits

...

7 Commits

34 changed files with 331 additions and 42 deletions

View File

@ -1,4 +1,3 @@
package eu.dnetlib.pace.tree;
import java.util.Map;

View File

@ -0,0 +1,2 @@
sdgPath=/tmp/sdg_20240627_oaid_csv
outputPath=/tmp/miriam/sdgnodoi

View File

@ -0,0 +1,100 @@
{"affiliationId":{"schema":"RINGGOLD","value":"8244"},"departmentName":"Biology","endDate":"2019-05-19","orcid":"0000-0001-6291-9619","roleTitle":"Undergraduate Research Assistant","startDate":"2017-01-26"}
{"affiliationId":{"schema":"GRID","value":"grid.445941.9"},"departmentName":"Department architectural-building constructions","endDate":"","orcid":"0000-0001-6291-9619","roleTitle":"Assistant professor","startDate":"2014-09"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/02jx3x895"},"departmentName":"Learning and Leadership","endDate":"2012-09-01","orcid":"0000-0002-3210-3034","roleTitle":"Leturer"}
{"affiliationId":{"schema":"RINGGOLD","value":"445071"},"departmentName":"Fisheries and Aquaculture","endDate":"2006-01-012012-08-23","orcid":"0000-0002-9030-7609","roleTitle":"Technical Officer"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/05a28rw58"},"departmentName":"Institute of Environmental Engineering","endDate":"2023-11-012024-01-31","orcid":"0000-0002-9030-7609","roleTitle":"Visiting Researcher"}
{"affiliationId":{"schema":"","value":""},"departmentName":"Faculty of Engineering and Informatics","endDate":"2021-12-20","orcid":"0000-0003-0305-8980","roleTitle":"Lecturer"}
{"affiliationId":{"schema":"RINGGOLD","value":"26066"},"departmentName":"Obstetrics and Gynaecology","orcid":"0000-0003-0305-8980","roleTitle":"MD, PhD"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/00ey9xa07"},"departmentName":"Harbin Sport University","endDate":"2024-06-01","orcid":"0000-0003-0305-8980","roleTitle":"Student"}
{"affiliationId":{"schema":"","value":""},"departmentName":"KIPP DC Schools","endDate":"2016-01-012017-01-01","orcid":"0009-0004-7554-419X","roleTitle":"Middle School Science Teacher"}
{"affiliationId":{"schema":"RINGGOLD","value":"19374"},"departmentName":"Music Therapy","endDate":"","orcid":"0000-0002-5115-9762","roleTitle":"Dementia Program Director","startDate":"2017-10-01"}
{"affiliationId":{"schema":"RINGGOLD","value":"9144"},"departmentName":"Interdisciplinary Center for Scientific Computing - IWR","endDate":"2009-01-152012-10-31","orcid":"0000-0002-2004-4153","roleTitle":"PhD student"}
{"departmentName":"2nd Air Supply Maintenance Center Command","endDate":"2012-08-30","orcid":"0000-0002-4389-9744","roleTitle":"Production Planning Group Supervisor","startDate":"2008-09-01"}
{"affiliationId":{"schema":"","value":""},"departmentName":"MITAKY High-Tech Co., Ltd.","endDate":"2020-07-01","orcid":"0000-0001-7628-743X","roleTitle":"President & CEO"}
{"departmentName":"Ancash","endDate":"","orcid":"0000-0002-3861-2833","startDate":""}
{"departmentName":"CALDAS","endDate":"2010-06-17","orcid":"0000-0003-1077-4053","roleTitle":"COORDINADOR ESTUDIO DE RADIO Y TV","startDate":"1999-03-17"}
{"departmentName":"Institute of Sociogenesis and Social Dynamics","endDate":"","orcid":"0000-0001-6881-7760","startDate":""}
{"affiliationId":{"schema":"RINGGOLD","value":"27004"},"departmentName":"Biology","endDate":"1999-09-02","orcid":"0000-0002-8553-169X","roleTitle":"MCf Microbiology "}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/041nas322"},"departmentName":"Bonn Center for Dependency and Slavery Studies","endDate":"2019-01-012023-01-01","orcid":"0009-0002-9250-948X","roleTitle":"Predoctoral Research Associate / wiss. Mitarbeiter"}
{"departmentName":"Процессы и аппараты химических и пищевых производств","endDate":"","orcid":"0000-0002-1805-5670","roleTitle":"старший преподаватель","startDate":"2015-09-03"}
{"affiliationId":{"schema":"RINGGOLD","value":"95414"},"departmentName":"Department of Science, Technology and International relations","endDate":"2021-03-09","orcid":"0000-0002-7616-2482","roleTitle":"Researcher"}
{"affiliationId":{"schema":"RINGGOLD","value":"28730"},"departmentName":"Forensic Medicine & Toxicology","endDate":"2019-08-26","orcid":"0000-0001-7369-1744","roleTitle":"Senior Resident","startDate":"2016-08-27"}
{"departmentName":"Department of Functional & Comparative Genomics","endDate":"2015-10","orcid":"0000-0001-8059-8919","roleTitle":"Postdoctoral Research Associate, Fluorescence Chemical Sensors","startDate":"2014-08"}
{"affiliationId":{"schema":"RINGGOLD","value":"16771"},"departmentName":"Catedra UNESCO de Gestion y Politica Universitaria","endDate":"2016","orcid":"0000-0001-9437-6700","roleTitle":"Investigador","startDate":"2001"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/05v3pg621"},"departmentName":"Department of Multimedia Animation and Application","endDate":"","orcid":"0000-0003-2513-7065","roleTitle":"Professor","startDate":"2012-02-01"}
{"affiliationId":{"schema":"RINGGOLD","value":"246714"},"departmentName":"Delta Dental of Wisconsin","endDate":"","orcid":"0000-0003-2675-3206","roleTitle":"VP & Science Officer","startDate":"2006"}
{"affiliationId":{"schema":"RINGGOLD","value":"146895"},"departmentName":"Facultad de Ciencias Económicas, administrativas y Contables ","endDate":"2015-02-01","orcid":"0000-0001-9384-6395","roleTitle":"Coordinadora de Investigaciones-Facultad CEAC"}
{"affiliationId":{"schema":"RINGGOLD","value":"8367"},"departmentName":"Microbiology, Immunology and Tropical Medicine","endDate":"2019-10-01","orcid":"0000-0002-2349-263X","roleTitle":"Assistant Professor"}
{"affiliationId":{"schema":"RINGGOLD","value":"129705"},"departmentName":"Institut de Recherche en Informatique Fondamentale","orcid":"0000-0003-0287-6252","roleTitle":""}
{"affiliationId":{"schema":"RINGGOLD","value":"119726"},"departmentName":"Computer Engineering/MIS","endDate":"2018-09-15","orcid":"0000-0003-0014-5106","roleTitle":"Assoc.Prof.Dr."}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/00wbwde85"},"departmentName":"school","endDate":"2021-03-01","orcid":"0009-0007-7585-0594","roleTitle":"mahasiswa"}
{"affiliationId":{"schema":"RINGGOLD","value":"9171"},"departmentName":"Computer Science Dept.","endDate":"","orcid":"0000-0002-3202-2904","startDate":"2002-04-01"}
{"affiliationId":{"schema":"GRID","value":"grid.474837.b"},"departmentName":"Gastroenterology","endDate":"","orcid":"0000-0003-0705-5760","startDate":""}
{"affiliationId":{"schema":"","value":""},"departmentName":"School of Physical and Occupational Therapy","endDate":"2018-03-012021-12-31","orcid":"0000-0002-8406-5228","roleTitle":"Postdoctoral Fellow"}
{"affiliationId":{"schema":"","value":""},"departmentName":"Xinqiao Hospital","orcid":"0000-0001-6200-2309","roleTitle":""}
{"affiliationId":{"schema":"","value":""},"departmentName":"DIPARTIMENTO DI PATOLOGIA CHIRURGICA, MEDICA, MOLECOLARE E DELL'AREA CRITICA","endDate":"2018-10-012021-10-01","orcid":"0000-0002-5588-2608","roleTitle":"Ricercatori a tempo determinato"}
{"affiliationId":{"schema":"","value":""},"departmentName":"ECE","endDate":"2021-01-012022-02-01","orcid":"0000-0002-8729-0287","roleTitle":"Visiting Scholar"}
{"departmentName":"Mantenimiento","endDate":"1998-01-01","orcid":"0000-0002-8663-2716","roleTitle":"Project coordinator","startDate":"1994-01-01"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/012wtwr40"},"departmentName":"Centro Universitário Newton Paiva","endDate":"2012-07","orcid":"0000-0002-1725-1805","roleTitle":"Professor Assistente","startDate":"2005-09"}
{"departmentName":"DFC","endDate":"","orcid":"0000-0003-3764-9500","roleTitle":"Professora Substituta","startDate":"2018-02-01"}
{"affiliationId":{"schema":"RINGGOLD","value":"10107"},"departmentName":"Automated Lab - Women's & Children's Hospital site","endDate":"1995-09-052018-09-09","orcid":"0000-0002-5594-9737","roleTitle":"Medical Scientist"}
{"affiliationId":{"schema":"RINGGOLD","value":"33784"},"departmentName":"Computer Science","endDate":"2023-06-04","orcid":"0009-0000-6585-6246","roleTitle":"Visiting Assistant Professor","startDate":"2023-01-09"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/0280a3n32"},"departmentName":"Research","endDate":"2022-06","orcid":"0000-0002-0846-9503","roleTitle":"Research Assistant","startDate":"2019-06"}
{"affiliationId":{"schema":"GRID","value":"grid.8657.c"},"departmentName":"Finnish Meteorological Institute","endDate":"2019-06-01","orcid":"0000-0002-4826-2929","roleTitle":""}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/05290cv24"},"departmentName":"Dipartimento di Informatica e Tecnologie dell'Informazione","endDate":"2022-11-012025-11-01","orcid":"0009-0000-6476-8092","roleTitle":"PhD Student"}
{"affiliationId":{"schema":"GRID","value":"grid.495082.2"},"departmentName":"Laboratory of water bodies sanitaric microbiology and human microbial ecology","endDate":"2017-01-01","orcid":"0000-0003-1194-7251","roleTitle":"Senior reseacher "}
{"affiliationId":{"schema":"RINGGOLD","value":"150713"},"departmentName":"Education","endDate":"2013-09-01","orcid":"0000-0002-2489-1202","roleTitle":"Doctor of Education"}
{"departmentName":"Office of Risk Management","endDate":"","orcid":"0000-0003-2772-313X","roleTitle":"Senior Policy Advisor","startDate":"2014-09-04"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/04pe1sa24"},"departmentName":"Facultad de Estudios Globales y Hospitalidad","endDate":"","orcid":"0009-0003-4270-4196","roleTitle":"Docente en las Carreras de Licenciatura en Turismo y Relaciones Internacionales","startDate":"2023-06-12"}
{"affiliationId":{"schema":"","value":""},"departmentName":"Civil & Mechanical Engineering","endDate":"2014-06-012020-06-30","orcid":"0000-0001-6598-2525","roleTitle":"Assistant Professor"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/02ytfzr55"},"departmentName":"Department of Civil Engineering ","endDate":"2022-03-212024-02-27","orcid":"0000-0002-9572-1358","roleTitle":"Temporary Faculty"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/04wdt0z89"},"departmentName":"library","endDate":"2024-03-01","orcid":"0009-0002-8124-1772","roleTitle":"library it"}
{"departmentName":"Kalil e Pires Advogados","endDate":"","orcid":"0009-0001-3403-0297","roleTitle":"Estagiário","startDate":"2023-03-13"}
{"affiliationId":{"schema":"","value":""},"departmentName":"Coronel Institute of Occupational Health","endDate":"2019-01-01","orcid":"0000-0002-0461-4013","roleTitle":"Principal Investigator"}
{"affiliationId":{"schema":"GRID","value":"grid.5801.c"},"departmentName":"Health Sciences and Technology","orcid":"0000-0002-1651-0457","roleTitle":"Doctoral student"}
{"departmentName":"Prefeitura de Teresina","endDate":"","orcid":"0000-0002-8148-4179","startDate":""}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/00k8rrx20"},"departmentName":"Prosseguir","endDate":"2023-05-012024-05-01","orcid":"0000-0001-5147-3455","roleTitle":"Coordenadora Pedagógica Regional do Prosseguir em Manaus"}
{"affiliationId":{"schema":"RINGGOLD","value":"381864"},"departmentName":"Pharmaron Beijing Co Ltd","endDate":"","orcid":"0000-0003-2165-740X","startDate":"2015-10-27"}
{"affiliationId":{"schema":"RINGGOLD","value":"183390"},"departmentName":"Instituto Tecnológico Superior de Irapuato","endDate":"","orcid":"0000-0003-2101-5917","startDate":"2018-11-01"}
{"affiliationId":{"schema":"RINGGOLD","value":"384754"},"departmentName":"SynCat@Beijing","orcid":"0000-0002-1050-2165","roleTitle":"Vice Director"}
{"affiliationId":{"schema":"RINGGOLD","value":"282795"},"departmentName":"Setor de Coleções Científicas","endDate":"","orcid":"0000-0003-3755-0025","roleTitle":"Estagiária","startDate":"2019-11"}
{"affiliationId":{"schema":"RINGGOLD","value":"434589"},"departmentName":"Chemistry","endDate":"2015-05-25","orcid":"0000-0001-5861-4425","roleTitle":"Lecturer"}
{"affiliationId":{"schema":"","value":""},"departmentName":"Cajamarca","endDate":"2023-11-15","orcid":"0000-0003-1524-3315","roleTitle":"Asistente Administrativo Provincial"}
{"affiliationId":{"schema":"","value":""},"departmentName":"Seduc ma","endDate":"2012-01-24","orcid":"0000-0003-3142-356X","roleTitle":""}
{"departmentName":"Rede Particular de Ensino","endDate":"2021","orcid":"0000-0002-4771-2131","roleTitle":"Professora de Artes Cênicas","startDate":"2018"}
{"affiliationId":{"schema":"FUNDREF","value":"http://dx.doi.org/10.13039/100009042"},"departmentName":"Derecho del Trabajo y de la Seguridad Social","endDate":"2002-12-01","orcid":"0000-0002-1275-5289","roleTitle":"Catedrático de Derecho del Trabajo y de la Seguridad Social"}
{"departmentName":"Department of Ethology","endDate":"2011-01-02","orcid":"0000-0003-1436-7324","roleTitle":"Research Assistant","startDate":"2007-09-01"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/04ka8rx28"},"departmentName":"Mechanical Engineering","orcid":"0009-0006-6397-2183","roleTitle":""}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/002qhr126"},"departmentName":"Theatre","orcid":"0009-0001-6531-9624","roleTitle":"theacher"}
{"affiliationId":{"schema":"","value":""},"departmentName":"Institute of Molecular Medicine, Renji Hospital, School of Medicine.","endDate":"2021-01-27","orcid":"0000-0003-0399-1201","roleTitle":"Associate Professor"}
{"affiliationId":{"schema":"","value":""},"departmentName":"Cell Biology","endDate":"2013-11-012014-07-01","orcid":"0000-0003-1489-4757","roleTitle":"Research Scolarship for Undergraduate Students"}
{"affiliationId":{"schema":"RINGGOLD","value":"346985"},"departmentName":"Maternidade","endDate":"","orcid":"0000-0002-6985-9679","roleTitle":"Enfermeira/UTI neonatal","startDate":"2019-02-26"}
{"affiliationId":{"schema":"RINGGOLD","value":"41726"},"departmentName":"Area Team - Biodiversity","endDate":"2006-01-012008-01-01","orcid":"0000-0002-6553-3786","roleTitle":"Catchment Biodiversity Technical Officer"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/04z7qrj66"},"departmentName":"Merchant Marine College","orcid":"0009-0003-6812-3576","roleTitle":""}
{"affiliationId":{"schema":"","value":""},"departmentName":"Lima","endDate":"2022-03-25","orcid":"0000-0002-9262-5619","roleTitle":"Docente Universitario"}
{"affiliationId":{"schema":"","value":""},"departmentName":"University of Bristol","orcid":"0000-0002-9793-3485","roleTitle":""}
{"departmentName":"US Geological Survey, Ecosystems Mission Area, Cooperative Fish and Wildlife Research Units Program","endDate":"","orcid":"0000-0002-8638-6682","startDate":"2011-06-01"}
{"affiliationId":{"schema":"RINGGOLD","value":"28666"},"departmentName":"English","endDate":"","orcid":"0000-0001-5361-109X","roleTitle":"Assistant Professor","startDate":"2019-07-05"}
{"affiliationId":{"schema":"","value":""},"departmentName":"Centre for Earth System Science","endDate":"2010-03-012012-11-01","orcid":"0000-0001-5323-4431","roleTitle":"Researcher and Executive Officer"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/05bjd0w70"},"departmentName":"Education","endDate":"2013-08-15","orcid":"0000-0001-5960-0586","roleTitle":"Associate Professor and Chair, Department of Education","startDate":"2002-08-15"}
{"affiliationId":{"schema":"RINGGOLD","value":"632513"},"departmentName":"Board ","endDate":"2020-01-01","orcid":"0000-0002-4222-4518","roleTitle":"Boardmember"}
{"affiliationId":{"schema":"GRID","value":"grid.22657.34"},"departmentName":"Faculty of Food Technology","endDate":"2017-01-01","orcid":"0000-0003-2606-8380","roleTitle":"guest scientific assistant, guest researcher, guest lecturer"}
{"affiliationId":{"schema":"FUNDREF","value":"http://dx.doi.org/10.13039/501100008331"},"departmentName":"Radiology","endDate":"","orcid":"0000-0001-6249-450X","startDate":"2016-03-01"}
{"departmentName":"кафедра физики","endDate":"","orcid":"0000-0001-6786-838X","roleTitle":"доцент","startDate":"1981-11-28"}
{"affiliationId":{"schema":"RINGGOLD","value":"6429"},"departmentName":"Molecular and Cellular Physiology","endDate":"2010-01-012013-01-01","orcid":"0000-0002-5538-0464","roleTitle":"Research-Associate"}
{"affiliationId":{"schema":"RINGGOLD","value":"48455"},"departmentName":"Clinical Biochem","endDate":"2016-12-30","orcid":"0000-0002-9563-8044","roleTitle":"Associate Professor","startDate":"2008"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/01xf75524"},"departmentName":"Molecular Oncology","endDate":"2022-01-01","orcid":"0000-0003-0928-003X","roleTitle":""}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/022kthw22"},"departmentName":"Anesthesiology and Perioperative Medicine","endDate":"2022-07-01","orcid":"0000-0001-7410-7271","roleTitle":"Postdoctoral Researcher"}
{"affiliationId":{"schema":"RINGGOLD","value":"125792"},"departmentName":"Environmental Management and Toxicology","endDate":"2006-06-20","orcid":"0000-0001-7855-4183","roleTitle":"Instructional/Tutorial Facilitator"}
{"departmentName":"2004 2007 | Teacher of Fiqh and Usul-al-Fiqh | Islamic University | Moscow, Russia","endDate":"","orcid":"0000-0001-8386-4426","startDate":""}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/0190ak572"},"departmentName":"Biology","endDate":"2024-06-01","orcid":"0009-0001-6766-7876","roleTitle":"Research assistant"}
{"affiliationId":{"schema":"RINGGOLD","value":"16763"},"departmentName":"Education","orcid":"0000-0003-2355-4682","roleTitle":"Profesora titular de Universidad/ Senior Lecturer "}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/0406jsq08"},"departmentName":"Farmácia","endDate":"2023-03-01","orcid":"0009-0009-1661-5046","roleTitle":"Residente farmacêutico"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/02aqsxs83"},"departmentName":"School of Biological Sciences","endDate":"2021-08-16","orcid":"0000-0002-1696-1952","roleTitle":"Assistant Professor"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/00s582s04"},"departmentName":"cajamarca","endDate":"2024-01-01","orcid":"0009-0001-0970-2741","roleTitle":"BACHILLER"}
{"affiliationId":{"schema":"RINGGOLD","value":"186027"},"departmentName":"РЯиК","endDate":"","orcid":"0000-0002-1000-5441","roleTitle":"старший преподаватель","startDate":"2007-09-01"}
{"affiliationId":{"schema":"RINGGOLD","value":"47910"},"departmentName":"Faculty of Life Science and Technology","endDate":"2012-07-082015-10-01","orcid":"0000-0001-7533-998X","roleTitle":"Lecturer"}
{"affiliationId":{"schema":"RINGGOLD","value":"2234"},"departmentName":"Education","endDate":"","orcid":"0000-0001-6123-8483","roleTitle":"Assistant Professor","startDate":"2019-01-07"}
{"affiliationId":{"schema":"ROR","value":"https://ror.org/04qkymg17"},"departmentName":"genera surgical ward","endDate":"2013-10-012015-02-01","orcid":"0009-0009-7638-0453","roleTitle":"Registered Nurse"}
{"affiliationId":{"schema":"RINGGOLD","value":"575342"},"departmentName":"GBUZ Naucno-prakticeskij psihonevrologiceskij centr imeni Z P Solov'eva Departamenta zdravoohranenia goroda Moskvy","endDate":"2022-11-01","orcid":"0000-0002-0344-9765","roleTitle":""}
{"affiliationId":{"schema":"","value":""},"departmentName":"SERVICE DE CHIRURGIE ORTHOPÉDIQUE ET TRAUMATOLOGIE","endDate":"2024-02-01","orcid":"0009-0003-2119-1766","roleTitle":"CHIRURGIEN ORTHOPEDISTE"}

View File

@ -0,0 +1 @@
{"doi":"https://doi.org/10.1007/978-3-030-75768-7","OpenAlexId":"https://openalex.org/W2124362779"}

View File

@ -22,7 +22,6 @@ import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
@ -31,6 +30,8 @@ import eu.dnetlib.dhp.api.model.CommunityEntityMap;
import eu.dnetlib.dhp.api.model.EntityCommunities;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.bulktag.community.*;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
@ -89,6 +90,11 @@ public class SparkBulkTagJob {
ProtoMap protoMap = new Gson().fromJson(temp, ProtoMap.class);
log.info("pathMap: {}", new Gson().toJson(protoMap));
TaggingConstraints taggingConstraints = new Gson()
.fromJson(parser.get("taggingCriteria"), TaggingConstraints.class);
taggingConstraints.getTags().forEach(t -> t.setSelection(VerbResolverFactory.newInstance()));
SparkConf conf = new SparkConf();
CommunityConfiguration cc;
@ -110,16 +116,16 @@ public class SparkBulkTagJob {
spark -> {
extendCommunityConfigurationForEOSC(spark, inputPath, cc);
execBulkTag(
spark, inputPath, outputPath, protoMap, cc);
spark, inputPath, outputPath, protoMap, cc, taggingConstraints);
execEntityTag(
spark, inputPath + "organization", outputPath + "organization",
Utils.getCommunityOrganization(baseURL), Organization.class, TaggingConstants.CLASS_ID_ORGANIZATION,
TaggingConstants.CLASS_NAME_BULKTAG_ORGANIZATION);
execEntityTag(
spark, inputPath + "project", outputPath + "project", Utils.getCommunityProjects(baseURL),
Project.class, TaggingConstants.CLASS_ID_PROJECT, TaggingConstants.CLASS_NAME_BULKTAG_PROJECT);
spark, inputPath + "project", outputPath + "project",
Utils.getCommunityProjects(baseURL), Project.class, TaggingConstants.CLASS_ID_PROJECT,
TaggingConstants.CLASS_NAME_BULKTAG_PROJECT);
execDatasourceTag(spark, inputPath, outputPath, Utils.getDatasourceCommunities(baseURL));
});
}
@ -276,18 +282,15 @@ public class SparkBulkTagJob {
String inputPath,
String outputPath,
ProtoMap protoMappingParams,
CommunityConfiguration communityConfiguration) {
CommunityConfiguration communityConfiguration,
TaggingConstraints taggingConstraints) {
try {
System.out.println(new ObjectMapper().writeValueAsString(protoMappingParams));
} catch (JsonProcessingException e) {
throw new RuntimeException(e);
}
ModelSupport.entityTypes
.keySet()
.parallelStream()
.filter(ModelSupport::isResult)
.forEach(e -> {
removeOutputDir(spark, outputPath + e.name());
ResultTagger resultTagger = new ResultTagger();
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
@ -297,18 +300,19 @@ public class SparkBulkTagJob {
.map(
(MapFunction<R, R>) value -> resultTagger
.enrichContextCriteria(
value, communityConfiguration, protoMappingParams),
value, communityConfiguration, protoMappingParams, taggingConstraints),
Encoders.bean(resultClazz))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + e.name());// writing the tagging in the working dir for entity
readPath(spark, outputPath + e.name(), resultClazz) // copy the tagging in the actual result output path
readPath(spark, outputPath + e.name(), resultClazz)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(inputPath + e.name());
});
}

View File

@ -0,0 +1,34 @@
package eu.dnetlib.dhp.bulktag;
import java.io.Serializable;
import eu.dnetlib.dhp.schema.oaf.Result;
public class Tagging<R extends Result> implements Serializable {
private String tag;
private R result;
public String getTag() {
return tag;
}
public void setTag(String tag) {
this.tag = tag;
}
public R getResult() {
return result;
}
public void setResult(R result) {
this.result = result;
}
public static <R extends Result> Tagging newInstance(R result, String tag) {
Tagging t = new Tagging<>();
t.result = result;
t.tag = tag;
return t;
}
}

View File

@ -10,6 +10,7 @@ import java.lang.reflect.Method;
import java.util.*;
import java.util.stream.Collectors;
import com.jayway.jsonpath.Criteria;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -91,7 +92,8 @@ public class ResultTagger implements Serializable {
}
public <R extends Result> R enrichContextCriteria(
final R result, final CommunityConfiguration conf, final Map<String, MapModel> criteria)
final R result, final CommunityConfiguration conf, final Map<String, MapModel> criteria,
TaggingConstraints taggingConstraints)
throws InvocationTargetException, NoSuchMethodException {
// Verify if the entity is deletedbyinference. In case verify if to clean the context list
@ -118,6 +120,15 @@ public class ResultTagger implements Serializable {
break;
}
//adding code for tagging of results searching supplementaryMaterial
final Set<String> tags = new HashSet<>();
taggingConstraints.getTags().forEach(t -> {
if (t.getCriteria().stream().anyMatch(crit -> crit.verifyCriteria(param)))
tags.add(t.getId());
});
// communities contains all the communities to be not added to the context
final Set<String> removeCommunities = new HashSet<>();
@ -245,10 +256,26 @@ public class ResultTagger implements Serializable {
clearContext(result);
/* Verify if there is something to bulktag */
if (communities.isEmpty()) {
if (communities.isEmpty() && tags.isEmpty()) {
return result;
}
tags.forEach(t -> {
Context con = new Context();
con.setId(t);
con.setDataInfo(Arrays
.asList(
OafMapperUtils
.dataInfo(
false, ANNOTATION_DATA_INFO_TYPE, true, false,
OafMapperUtils
.qualifier(
CLASS_ID_ANNOTATION, CLASS_NAME_ANNOTATION, DNET_PROVENANCE_ACTIONS,
DNET_PROVENANCE_ACTIONS),
TAGGING_TRUST)));
result.getContext().add(con);
});
result.getContext().forEach(c -> {
final String cId = c.getId();
if (communities.contains(cId)) {

View File

@ -8,6 +8,10 @@ public class TaggingConstants {
public static final String BULKTAG_DATA_INFO_TYPE = "bulktagging";
public static final String ANNOTATION_DATA_INFO_TYPE = "annotation";
public static final String CLASS_ID_ANNOTATION = "graph:annotation";
public static final String CLASS_NAME_ANNOTATION = "Graph Annotation";
public static final String CLASS_ID_SUBJECT = "community:subject";
public static final String CLASS_ID_DATASOURCE = "community:datasource";
public static final String CLASS_ID_CZENODO = "community:zenodocommunity";

View File

@ -0,0 +1,16 @@
package eu.dnetlib.dhp.bulktag.community;
import java.io.Serializable;
public class TaggingConstraint extends SelectionConstraints implements Serializable {
private String id;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
}

View File

@ -0,0 +1,17 @@
package eu.dnetlib.dhp.bulktag.community;
import java.io.Serializable;
import java.util.List;
public class TaggingConstraints implements Serializable {
private List<TaggingConstraint> tags;
public List<TaggingConstraint> getTags() {
return tags;
}
public void setTags(List<TaggingConstraint> tags) {
this.tags = tags;
}
}

View File

@ -39,5 +39,10 @@
"paramLongName": "nameNode",
"paramDescription": "this parameter is to specify the api to be queried (beta or production)",
"paramRequired": true
},{
"paramName": "tc",
"paramLongName": "taggingCriteria",
"paramDescription": "this parameter is to specify the api to be queried (beta or production)",
"paramRequired": true
}
]

View File

@ -0,0 +1,4 @@
sourcePath=/tmp/miriam/12_graph_copy
pathMap=/data/bulktagging/pathMap
baseURL=https://services.openaire.eu/openaire/community/
taggingCriteria={"tags":[{"id":"SM","criteria":[{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary material for"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary document for"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"figure"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary figure"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplemental figure"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"supplementary table"}]},{"constraint":[{"verb":"starts_with_caseinsensitive","field":"title","value":"table for"}]}]}]}

View File

@ -77,6 +77,7 @@
<arg>--pathMap</arg><arg>${pathMap}</arg>
<arg>--baseURL</arg><arg>${baseURL}</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--taggingCriteria</arg><arg>${taggingCriteria}</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>

View File

@ -33,6 +33,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import eu.dnetlib.dhp.bulktag.community.ProtoMap;
import eu.dnetlib.dhp.bulktag.community.TaggingConstraints;
import eu.dnetlib.dhp.schema.oaf.*;
public class BulkTagJobTest {
@ -68,6 +69,8 @@ public class BulkTagJobTest {
private static String taggingConf = "";
private static String taggingCriteria = "{\"tags\":[{\"id\":\"SM\",\"criteria\":[{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplementary material for\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplementary document for\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"figure\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplementary figure\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplemental figure\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"supplementary table\"}]},{\"constraint\":[{\"verb\":\"starts_with_caseinsensitive\",\"field\":\"title\",\"value\":\"table for\"}]}]}]}";
static {
try {
taggingConf = IOUtils
@ -119,7 +122,10 @@ public class BulkTagJobTest {
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates/").getPath(),
"-taggingConf", taggingConf,
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -156,7 +162,10 @@ public class BulkTagJobTest {
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -242,15 +251,15 @@ public class BulkTagJobTest {
fs
.copyFromLocalFile(
false, new org.apache.hadoop.fs.Path(getClass()
.getResource("/eu/dnetlib/dhp/bulktag/pathMap/")
.getResource("/eu/dnetlib/dhp/bulktag/pathMap/pathMap")
.getPath()),
new org.apache.hadoop.fs.Path(workingDir.toString() + "/data/bulktagging/protoMap"));
final String pathMap = workingDir.toString() + "/data/bulktagging/protoMap";
final String sourcePath = getClass()
.getResource(
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance/")
.getPath();
final String pathMap = BulkTagJobTest.pathMap;
SparkBulkTagJob
.main(
new String[] {
@ -262,7 +271,9 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", workingDir.toString() + "/data/bulktagging/protoMap",
"-nameNode", "local"
"-nameNode", "local",
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/"
});
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
@ -342,8 +353,11 @@ public class BulkTagJobTest {
"-taggingConf", taggingConf,
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-pathMap", pathMap
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -424,7 +438,8 @@ public class BulkTagJobTest {
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-pathMap", workingDir.toString() + "/data/bulktagging/protoMap/pathMap",
"-nameNode", "local"
"-nameNode", "local",
"-taggingCriteria", taggingCriteria
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -613,7 +628,10 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -734,7 +752,10 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -862,7 +883,10 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -962,7 +986,10 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1005,7 +1032,10 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1059,7 +1089,10 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1108,7 +1141,10 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1326,7 +1362,11 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1444,7 +1484,10 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1564,7 +1607,10 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1604,7 +1650,10 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1649,7 +1698,10 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1686,7 +1738,10 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1761,7 +1816,10 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1799,7 +1857,10 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
// "-baseURL", "https://services.openaire.eu/openaire/community/",
"-pathMap", pathMap,
"-taggingConf", taggingConf
"-taggingConf", taggingConf,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1840,7 +1901,10 @@ public class BulkTagJobTest {
.getResourceAsStream(
"/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf_publicationdate.xml")),
"-outputPath", workingDir.toString() + "/",
"-pathMap", pathMap
"-pathMap", pathMap,
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -1943,10 +2007,15 @@ public class BulkTagJobTest {
"-outputPath", workingDir.toString() + "/",
"-pathMap", workingDir.toString() + "/data/bulktagging/protoMap/pathMap",
"-baseURL", "none",
"-taggingCriteria", taggingCriteria,
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-nameNode", "local"
});
}
System.out.println("prrr");
}
}

View File

@ -31,5 +31,11 @@ class ORCIDAuthorMatchersTest {
assertTrue(matchOrderedTokenAndAbbreviations("孙林 Sun Lin", "Sun Lin"))
// assertTrue(AuthorsMatchRevised.compare("孙林 Sun Lin", "孙林")); // not yet implemented
}
@Test def testDocumentationNames(): Unit = {
assertTrue(matchOrderedTokenAndAbbreviations("James C. A. Miller-Jones", "James Antony Miller-Jones"))
}
@Test def testDocumentationNames2(): Unit = {
assertTrue(matchOrderedTokenAndAbbreviations("James C. A. Miller-Jones", "James Antony Miller Jones"))
}
}