[Enrichment Step] get rid of hive

This commit is contained in:
Miriam Baglioni 2022-04-12 11:26:48 +02:00
commit 1a8641227d
35 changed files with 1237 additions and 97 deletions

View File

@ -6,7 +6,7 @@
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-build</artifactId>
<version>1.2.4-SNAPSHOT</version>
<version>1.2.5-SNAPSHOT</version>
</parent>
<artifactId>dhp-build-assembly-resources</artifactId>

View File

@ -6,7 +6,7 @@
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-build</artifactId>
<version>1.2.4-SNAPSHOT</version>
<version>1.2.5-SNAPSHOT</version>
</parent>
<artifactId>dhp-build-properties-maven-plugin</artifactId>

View File

@ -5,7 +5,7 @@
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-code-style</artifactId>
<version>1.2.4-SNAPSHOT</version>
<version>1.2.5-SNAPSHOT</version>
<packaging>jar</packaging>
@ -47,12 +47,16 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId>
<version>3.9.1</version>
<configuration>
<skip>true</skip>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<dhp.site.stage.path>sftp://dnet-hadoop@static-web.d4science.org/dnet-hadoop</dhp.site.stage.path>
</properties>

View File

@ -4,7 +4,7 @@
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp</artifactId>
<version>1.2.4-SNAPSHOT</version>
<version>1.2.5-SNAPSHOT</version>
</parent>
<artifactId>dhp-build</artifactId>
<packaging>pom</packaging>

View File

@ -5,7 +5,7 @@
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp</artifactId>
<version>1.2.4-SNAPSHOT</version>
<version>1.2.5-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@ -4,7 +4,7 @@
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-workflows</artifactId>
<version>1.2.4-SNAPSHOT</version>
<version>1.2.5-SNAPSHOT</version>
</parent>
<artifactId>dhp-actionmanager</artifactId>

View File

@ -4,7 +4,7 @@
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-workflows</artifactId>
<version>1.2.4-SNAPSHOT</version>
<version>1.2.5-SNAPSHOT</version>
</parent>
<artifactId>dhp-aggregation</artifactId>
<build>

View File

@ -72,7 +72,6 @@ class DataciteToOAFTest extends AbstractVocabularyTest {
spark.read.load(targetPath).printSchema();
val result: Dataset[Oaf] = spark.read.load(targetPath).as[Oaf]
result

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version>
<version>1.2.5-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -1,11 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version>
<version>1.2.5-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version>
<version>1.2.5-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>dhp-dedup-openaire</artifactId>

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version>
<version>1.2.5-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version>
<version>1.2.5-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -0,0 +1,982 @@
{
"indexed": {
"date-parts": [
[
2022,
4,
5
]
],
"date-time": "2022-04-05T11:07:00Z",
"timestamp": 1649156820730
},
"reference-count": 63,
"publisher": "Public Library of Science (PLoS)",
"issue": "5",
"license": [
{
"start": {
"date-parts": [
[
2020,
5,
29
]
],
"date-time": "2020-05-29T00:00:00Z",
"timestamp": 1590710400000
},
"content-version": "vor",
"delay-in-days": 0,
"URL": "http://creativecommons.org/licenses/by/4.0/"
}
],
"funder": [
{
"DOI": "10.13039/501100001602",
"name": "Science Foundation Ireland",
"doi-asserted-by": "crossref",
"award": [
"SFI/12/RC/2273"
]
},
{
"DOI": "10.13039/501100001602",
"name": "Science Foundation Ireland",
"doi-asserted-by": "crossref",
"award": [
"SFI/12/RC/2273"
]
},
{
"DOI": "10.13039/501100001602",
"name": "Science Foundation Ireland",
"doi-asserted-by": "crossref",
"award": [
"SFI/12/RC/2273"
]
},
{
"DOI": "10.13039/501100001602",
"name": "Science Foundation Ireland",
"doi-asserted-by": "crossref",
"award": [
"SFI/12/RC/2273"
]
}
],
"content-domain": {
"domain": [
"www.plosone.org"
],
"crossmark-restriction": false
},
"short-container-title": [
"PLoS ONE"
],
"DOI": "10.1371/journal.pone.0233284",
"type": "journal-article",
"created": {
"date-parts": [
[
2020,
5,
29
]
],
"date-time": "2020-05-29T17:53:47Z",
"timestamp": 1590774827000
},
"page": "e0233284",
"update-policy": "http://dx.doi.org/10.1371/journal.pone.corrections_policy",
"source": "Crossref",
"is-referenced-by-count": 13,
"title": [
"Vancomycin and nisin A are effective against biofilms of multi-drug resistant Staphylococcus aureus isolates from human milk"
],
"prefix": "10.1371",
"volume": "15",
"author": [
{
"ORCID": "http://orcid.org/0000-0003-4107-0278",
"authenticated-orcid": true,
"given": "Angeliki",
"family": "Angelopoulou",
"sequence": "first",
"affiliation": [
]
},
{
"given": "Des",
"family": "Field",
"sequence": "additional",
"affiliation": [
]
},
{
"given": "Mariana",
"family": "Pérez-Ibarreche",
"sequence": "additional",
"affiliation": [
]
},
{
"ORCID": "http://orcid.org/0000-0001-8317-6455",
"authenticated-orcid": true,
"given": "Alicja K.",
"family": "Warda",
"sequence": "additional",
"affiliation": [
]
},
{
"given": "Colin",
"family": "Hill",
"sequence": "additional",
"affiliation": [
]
},
{
"given": "R. Paul",
"family": "Ross",
"sequence": "additional",
"affiliation": [
]
}
],
"member": "340",
"published-online": {
"date-parts": [
[
2020,
5,
29
]
]
},
"reference": [
{
"issue": "78",
"key": "pone.0233284.ref001",
"doi-asserted-by": "crossref",
"first-page": "509",
"DOI": "10.1016/S0899-9007(00)00363-4",
"article-title": "Breast milk: a truly functional food",
"volume": "16",
"author": "B. Lönnerdal",
"year": "2000",
"journal-title": "Nutrition"
},
{
"key": "pone.0233284.ref002",
"first-page": "1",
"volume-title": "Mastitis: causes and management",
"author": "WHO",
"year": "2000"
},
{
"issue": "12",
"key": "pone.0233284.ref003",
"doi-asserted-by": "crossref",
"first-page": "115",
"DOI": "10.1038/pr.2014.178",
"article-title": "Human milk and infant intestinal mucosal glycans guide succession of the neonatal intestinal microbiota",
"volume": "77",
"author": "DS Newburg",
"year": "2015",
"journal-title": "Pediatr Res"
},
{
"issue": "7",
"key": "pone.0233284.ref004",
"doi-asserted-by": "crossref",
"first-page": "647",
"DOI": "10.1001/jamapediatrics.2017.0378",
"article-title": "Association between breast milk bacterial communities and establishment and development of the infant gut microbiome",
"volume": "171",
"author": "PS Pannaraj",
"year": "2017",
"journal-title": "JAMA Pediatr"
},
{
"issue": "Suppl 2:",
"key": "pone.0233284.ref005",
"doi-asserted-by": "crossref",
"first-page": "S69",
"DOI": "10.1542/peds.2008-1315i",
"article-title": "Why mothers stop breastfeeding: mothers' self-reported reasons for stopping during the first year",
"volume": "122",
"author": "R Li",
"year": "2008",
"journal-title": "Pediatrics"
},
{
"issue": "2",
"key": "pone.0233284.ref006",
"doi-asserted-by": "crossref",
"first-page": "83",
"DOI": "10.1007/s00430-017-0532-z",
"article-title": "The microbiology and treatment of human mastitis",
"volume": "207",
"author": "A Angelopoulou",
"year": "2018",
"journal-title": "Med Microbiol Immunol"
},
{
"issue": "2",
"key": "pone.0233284.ref007",
"doi-asserted-by": "crossref",
"first-page": "169",
"DOI": "10.3920/BM2013.0036",
"article-title": "Probiotics for human lactational mastitis",
"volume": "5",
"author": "L Fernández",
"year": "2014",
"journal-title": "Benef Microbes"
},
{
"issue": "15",
"key": "pone.0233284.ref008",
"doi-asserted-by": "crossref",
"first-page": "4650",
"DOI": "10.1128/AEM.02599-07",
"article-title": "Oral administration of Lactobacillus strains isolated from breast milk as an alternative for the treatment of infectious mastitis during lactation",
"volume": "74",
"author": "E Jiménez",
"year": "2008",
"journal-title": "Appl Environ Microbiol"
},
{
"issue": "3",
"key": "pone.0233284.ref009",
"doi-asserted-by": "crossref",
"first-page": "406",
"DOI": "10.1177/0890334415585078",
"article-title": "Metagenomic analysis of milk of healthy and mastitis-suffering women",
"volume": "31",
"author": "E Jiménez",
"year": "2015",
"journal-title": "J Hum Lact"
},
{
"issue": "2",
"key": "pone.0233284.ref010",
"doi-asserted-by": "crossref",
"first-page": "176",
"DOI": "10.1086/589241",
"article-title": "Risk of infection and death due to methicillin-resistant Staphylococcus aureus in long-term carriers",
"volume": "47",
"author": "R Datta",
"year": "2008",
"journal-title": "Clin Infect Dis"
},
{
"issue": "4",
"key": "pone.0233284.ref011",
"doi-asserted-by": "crossref",
"DOI": "10.1128/microbiolspec.GPP3-0023-2018",
"article-title": "Staphylococcal biofilms",
"volume": "6",
"author": "M. Otto",
"year": "2018",
"journal-title": "Microbiol Spectr"
},
{
"issue": "1",
"key": "pone.0233284.ref012",
"doi-asserted-by": "crossref",
"first-page": "9",
"DOI": "10.1038/s41522-018-0053-6",
"article-title": "Fighting biofilms with lantibiotics and other groups of bacteriocins",
"volume": "4",
"author": "H Mathur",
"year": "2018",
"journal-title": "NPJ Biofilms Microbiomes"
},
{
"issue": "2",
"key": "pone.0233284.ref013",
"doi-asserted-by": "crossref",
"first-page": "310",
"DOI": "10.1128/MMBR.00041-08",
"article-title": "Signals, regulatory networks, and materials that build and break bacterial biofilms",
"volume": "73",
"author": "E Karatan",
"year": "2009",
"journal-title": "Microbiol Mol Biol Rev"
},
{
"issue": "3",
"key": "pone.0233284.ref014",
"doi-asserted-by": "crossref",
"first-page": "147",
"DOI": "10.1016/S1473-3099(01)00091-3",
"article-title": "Vancomycin-resistant Staphylococcus aureus: a new model of antibiotic resistance",
"volume": "1",
"author": "K Hiramatsu",
"year": "2001",
"journal-title": "Lancet Infect Dis"
},
{
"key": "pone.0233284.ref015",
"doi-asserted-by": "crossref",
"first-page": "339",
"DOI": "10.1146/annurev.mi.38.100184.002011",
"article-title": "The structure and mode of action of glycopeptide antibiotics of the vancomycin group",
"volume": "38",
"author": "JC Barna",
"year": "1984",
"journal-title": "Annu Rev Microbiol"
},
{
"key": "pone.0233284.ref016",
"unstructured": "Health Service Executive Mastitis Factsheet for Health Care Professionals. Available at: https://www.breastfeeding.ie/Uploads/Mastitis.pdf"
},
{
"issue": "3",
"key": "pone.0233284.ref017",
"first-page": "136",
"article-title": "Breast infection: a review of diagnosis and management practices",
"volume": "14",
"author": "E Boakes",
"year": "2018",
"journal-title": "Eur J Breast Health"
},
{
"key": "pone.0233284.ref018",
"doi-asserted-by": "crossref",
"first-page": "1205",
"DOI": "10.3389/fmicb.2017.01205",
"article-title": "Bacteriocin-antimicrobial synergy: A medical and food perspective",
"volume": "8",
"author": "H Mathur",
"year": "2017",
"journal-title": "Front Microbiol"
},
{
"issue": "1",
"key": "pone.0233284.ref019",
"doi-asserted-by": "crossref",
"first-page": "223",
"DOI": "10.1016/S0005-2736(99)00208-4",
"article-title": "The lantibiotic nisin, a special case or not?",
"volume": "1462",
"author": "E Breukink",
"year": "1999",
"journal-title": "Biochim Biophys Acta"
},
{
"issue": "10",
"key": "pone.0233284.ref020",
"doi-asserted-by": "crossref",
"first-page": "963",
"DOI": "10.1038/nsmb830",
"article-title": "The nisin-lipid II complex reveals a pyrophosphate cage that provides a blueprint for novel antibiotics",
"volume": "11",
"author": "ST Hsu",
"year": "2004",
"journal-title": "Nat Struct Mol Biol"
},
{
"issue": "5793",
"key": "pone.0233284.ref021",
"doi-asserted-by": "crossref",
"first-page": "1636",
"DOI": "10.1126/science.1129818",
"article-title": "An alternative bactericidal mechanism of action for lantibiotic peptides that target lipid II",
"volume": "313",
"author": "HE Hasper",
"year": "2006",
"journal-title": "Science"
},
{
"issue": "3",
"key": "pone.0233284.ref022",
"doi-asserted-by": "crossref",
"first-page": "1772",
"DOI": "10.1074/jbc.M006770200",
"article-title": "Specific binding of nisin to the peptidoglycan precursor lipid II combines pore formation and inhibition of cell wall biosynthesis for potent antibiotic activity",
"volume": "276",
"author": "I Wiedemann",
"year": "2001",
"journal-title": "J Biol Chem"
},
{
"key": "pone.0233284.ref023",
"doi-asserted-by": "crossref",
"first-page": "104539",
"DOI": "10.1016/j.idairyj.2019.104539",
"article-title": "Bovine mastitis is a polymicrobial disease requiring a polydiagnostic approach",
"volume": "99",
"author": "A Angelopoulou",
"year": "2019",
"journal-title": "Int Dairy J"
},
{
"issue": "4",
"key": "pone.0233284.ref024",
"doi-asserted-by": "crossref",
"first-page": "493",
"DOI": "10.1093/ajcp/45.4_ts.493",
"article-title": "Antibiotic susceptibility testing by a standardized single disk method",
"volume": "45",
"author": "AW Bauer",
"year": "1966",
"journal-title": "Am Journal Clin Pathol"
},
{
"key": "pone.0233284.ref025",
"unstructured": "v_9.0_Breakpoint_Tables.pdf. Available at: http://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Breakpoint_tables/v_9.0_Breakpoint_Tables.pdf (Accessed: 28th July 2019)."
},
{
"issue": "12",
"key": "pone.0233284.ref026",
"doi-asserted-by": "crossref",
"first-page": "67",
"DOI": "10.1016/j.vetmic.2010.05.044",
"article-title": "(GTG)5-PCR fingerprinting for the classification and identification of coagulase-negative Staphylococcus species from bovine milk and teat apices: a comparison of type strains and field isolates",
"volume": "147",
"author": "G Braem",
"year": "2011",
"journal-title": "Vet Microbiol"
},
{
"key": "pone.0233284.ref027",
"doi-asserted-by": "crossref",
"first-page": "270",
"DOI": "10.1186/s12859-015-0703-0",
"article-title": "GelJ a tool for analyzing DNA fingerprint gel images",
"volume": "16",
"author": "J Heras",
"year": "2015",
"journal-title": "BMC bioinformatics"
},
{
"issue": "4",
"key": "pone.0233284.ref028",
"doi-asserted-by": "crossref",
"first-page": "473",
"DOI": "10.1111/j.1751-7915.2010.00184.x",
"article-title": "Studies with bioengineered nisin peptides highlight the broad-spectrum potency of nisin V",
"volume": "3",
"author": "D Field",
"year": "2010",
"journal-title": "Microb Biotechnol"
},
{
"issue": "11",
"key": "pone.0233284.ref029",
"doi-asserted-by": "crossref",
"first-page": "e79563",
"DOI": "10.1371/journal.pone.0079563",
"article-title": "Intensive mutagenesis of the nisin hinge leads to the rational design of enhanced derivatives",
"volume": "8",
"author": "B Healy",
"year": "2013",
"journal-title": "PLoS One"
},
{
"issue": "10",
"key": "pone.0233284.ref030",
"doi-asserted-by": "crossref",
"first-page": "e46884",
"DOI": "10.1371/journal.pone.0046884",
"article-title": "Bioengineered nisin A derivatives with enhanced activity against both Gram positive and Gram negative pathogens",
"volume": "7",
"author": "D Field",
"year": "2012",
"journal-title": "PLoS One"
},
{
"issue": "3",
"key": "pone.0233284.ref031",
"doi-asserted-by": "crossref",
"first-page": "e0119684",
"DOI": "10.1371/journal.pone.0119684",
"article-title": "A Bioengineered nisin derivative to control biofilms of Staphylococcus pseudintermedius",
"volume": "10",
"author": "D Field",
"year": "2015a",
"journal-title": "PLoS One"
},
{
"issue": "2",
"key": "pone.0233284.ref032",
"doi-asserted-by": "crossref",
"first-page": "207",
"DOI": "10.1007/s13765-012-3253-4",
"article-title": "Biofilm formation, attachment, and cell hydrophobicity of foodborne pathogens under varied environmental conditions",
"volume": "56",
"author": "NY Choi",
"year": "2013",
"journal-title": "J Korean Soc Appl Biol Chem"
},
{
"issue": "2",
"key": "pone.0233284.ref033",
"doi-asserted-by": "crossref",
"first-page": "175",
"DOI": "10.1016/S0167-7012(00)00122-6",
"article-title": "A modified microtiter-plate test for quantification of staphylococcal biofilm formation",
"volume": "40",
"author": "S Stepanović",
"year": "2000",
"journal-title": "J Microbiol Methods"
},
{
"issue": "2",
"key": "pone.0233284.ref034",
"doi-asserted-by": "crossref",
"first-page": "225",
"DOI": "10.1111/j.1574-695X.2011.00806.x",
"article-title": "Characterization of Staphylococcus aureus strains involved in human and bovine mastitis",
"volume": "62",
"author": "S Delgado",
"year": "2011",
"journal-title": "FEMS Immunol Med Microbiol"
},
{
"key": "pone.0233284.ref035",
"doi-asserted-by": "crossref",
"first-page": "53",
"DOI": "10.2174/1874285801711010053",
"article-title": "Understanding the mechanism of bacterial biofilms resistance to antimicrobial agents",
"volume": "11",
"author": "S Singh",
"year": "2017",
"journal-title": "Open Microbiol J"
},
{
"issue": "1",
"key": "pone.0233284.ref036",
"doi-asserted-by": "crossref",
"first-page": "61",
"DOI": "10.2174/1389203053027584",
"article-title": "Bacterial lantibiotics: strategies to improve therapeutic potential",
"volume": "6",
"author": "PD Cotter",
"year": "2005",
"journal-title": "Curr Protein Pept Sci"
},
{
"issue": "5",
"key": "pone.0233284.ref037",
"doi-asserted-by": "crossref",
"first-page": "494",
"DOI": "10.1016/j.ijantimicag.2015.07.011",
"article-title": "Bacteriocins and their position in the next wave of conventional antibiotics",
"volume": "46",
"author": "VL Cavera",
"year": "2015",
"journal-title": "Int J Antimicrob Agents"
},
{
"key": "pone.0233284.ref038",
"doi-asserted-by": "crossref",
"first-page": "1363",
"DOI": "10.3389/fmicb.2015.01363",
"article-title": "Bioengineering lantibiotics for therapeutic success",
"volume": "6",
"author": "D Field",
"year": "2015b",
"journal-title": "Front Microbiol"
},
{
"issue": "11",
"key": "pone.0233284.ref039",
"doi-asserted-by": "crossref",
"first-page": "5572",
"DOI": "10.1128/AAC.00888-13",
"article-title": "Effects of bacteriocins on methicillin-resistant Staphylococcus aureus biofilm",
"volume": "57",
"author": "K Okuda",
"year": "2013",
"journal-title": "Antimicrob Agents Chemother"
},
{
"issue": "6",
"key": "pone.0233284.ref040",
"doi-asserted-by": "crossref",
"first-page": "511",
"DOI": "10.1159/000335598",
"article-title": "In vitro activities of nisin alone or in combination with vancomycin and ciprofloxacin against methicillin-resistant and methicillin-susceptible Staphylococcus aureus strains",
"volume": "57",
"author": "S Dosler",
"year": "2011",
"journal-title": "Chemotherapy"
},
{
"issue": "18",
"key": "pone.0233284.ref041",
"doi-asserted-by": "crossref",
"first-page": "5809",
"DOI": "10.1128/AEM.01104-07",
"article-title": "Dissection and modulation of the four distinct activities of nisin by mutagenesis of rings A and B and by C-terminal truncation",
"volume": "73",
"author": "R Rink",
"year": "2007",
"journal-title": "Appl Environ Microbiol"
},
{
"issue": "6",
"key": "pone.0233284.ref042",
"doi-asserted-by": "crossref",
"first-page": "806",
"DOI": "10.1007/s00253-004-1599-1",
"article-title": "Site-directed mutagenesis of the hinge region of nisinZ and properties of nisinZ mutants",
"volume": "64",
"author": "J Yuan",
"year": "2004",
"journal-title": "Appl Microbiol Biotechnol"
},
{
"key": "pone.0233284.ref043",
"doi-asserted-by": "crossref",
"first-page": "508",
"DOI": "10.3389/fmicb.2016.00508",
"article-title": "In vitro activities of nisin and nisin derivatives alone and in combination with antibiotics against Staphylococcus biofilms",
"volume": "7",
"author": "D Field",
"year": "2016",
"journal-title": "Front Microbiol"
},
{
"issue": "18",
"key": "pone.0233284.ref044",
"doi-asserted-by": "crossref",
"first-page": "1573",
"DOI": "10.2217/fmb-2019-0153",
"article-title": "Nisin Z and lacticin 3147 improve efficacy of antibiotics against clinically significant bacteria",
"volume": "14",
"author": "JC Ellis",
"year": "2020",
"journal-title": "Future Microbiol"
},
{
"issue": "3",
"key": "pone.0233284.ref045",
"doi-asserted-by": "crossref",
"first-page": "311",
"DOI": "10.1177/0890334408317435",
"article-title": "The bacteriocin nisin, an effective agent for the treatment of staphylococcal mastitis during lactation",
"volume": "24",
"author": "L Fernández",
"year": "2008",
"journal-title": "J Hum Lact"
},
{
"issue": "1",
"key": "pone.0233284.ref046",
"doi-asserted-by": "crossref",
"first-page": "33",
"DOI": "10.1159/000272223",
"article-title": "Inflammatory breast diseases during lactation: milk stasis, puerperal mastitis, abscesses of the breast, and malignant tumorscurrent and evidence-based strategies for diagnosis and therapy",
"volume": "5",
"author": "M Abou-Dakn",
"year": "2010",
"journal-title": "Breast Care"
},
{
"issue": "6",
"key": "pone.0233284.ref047",
"doi-asserted-by": "crossref",
"first-page": "430",
"DOI": "10.1007/s12262-012-0776-1",
"article-title": "Management of lactational mastitis and breast abscesses: review of current knowledge and practice",
"volume": "75",
"author": "K Kataria",
"year": "2013",
"journal-title": "Indian J Surg"
},
{
"issue": "2",
"key": "pone.0233284.ref048",
"doi-asserted-by": "crossref",
"first-page": "77",
"DOI": "10.1016/j.micres.2012.09.004",
"article-title": "Genotypic diversity and virulent factors of Staphylococcus epidermidis isolated from human breast milk",
"volume": "168",
"author": "J Begović",
"year": "2013",
"journal-title": "Microbiol Res"
},
{
"issue": "Pt 8",
"key": "pone.0233284.ref049",
"doi-asserted-by": "crossref",
"first-page": "761",
"DOI": "10.1099/jmm.0.05453-0",
"article-title": "Antimicrobial-resistance and enterotoxin-encoding genes among staphylococci isolated from expressed human breast milk",
"volume": "53",
"author": "LA Carneiro",
"year": "2004",
"journal-title": "J Med Microbiol"
},
{
"issue": "2",
"key": "pone.0233284.ref050",
"doi-asserted-by": "crossref",
"first-page": "113",
"DOI": "10.1007/s00284-015-0925-4",
"article-title": "Antibiotic susceptibility of commensal bacteria from human milk",
"volume": "72",
"author": "PW Chen",
"year": "2016",
"journal-title": "Curr Microbiol"
},
{
"key": "pone.0233284.ref051",
"doi-asserted-by": "crossref",
"first-page": "2512",
"DOI": "10.3389/fmicb.2018.02512",
"article-title": "Microbial community dynamics in mother's milk and infant's mouth and gut in moderately preterm infants",
"volume": "9",
"author": "E Biagi",
"year": "2018",
"journal-title": "Front Microbiol"
},
{
"key": "pone.0233284.ref052",
"doi-asserted-by": "crossref",
"first-page": "4",
"DOI": "10.3410/M4-4",
"article-title": "Reduced vancomycin susceptibility among clinical Staphylococcus aureus isolates ('the MIC Creep'): implications for therapy",
"volume": "4",
"author": "A Dhand",
"year": "2012",
"journal-title": "F1000 Med Rep"
},
{
"issue": "12",
"key": "pone.0233284.ref053",
"doi-asserted-by": "crossref",
"first-page": "1112",
"DOI": "10.1136/jcp.2009.069021",
"article-title": "Low concentrations of vancomycin stimulate biofilm formation in some clinical isolates of Staphylococcus epidermidis",
"volume": "62",
"author": "JS Cargill",
"year": "2009",
"journal-title": "J Clin Pathol"
},
{
"issue": "2",
"key": "pone.0233284.ref054",
"doi-asserted-by": "crossref",
"first-page": "191",
"DOI": "10.1002/jobm.201000221",
"article-title": "Effect of sub-lethal doses of vancomycin and oxacillin on biofilm formation by vancomycin intermediate resistant Staphylococcus aureus",
"volume": "51",
"author": "ZA Mirani",
"year": "2011",
"journal-title": "J Basic Microbiol"
},
{
"key": "pone.0233284.ref055",
"doi-asserted-by": "crossref",
"first-page": "225",
"DOI": "10.1016/j.micpath.2017.07.004",
"article-title": "Vancomycin-induced biofilm formation by methicillin-resistant Staphylococcus aureus is associated with the secretion of membrane vesicles",
"volume": "110",
"author": "X He",
"year": "2017",
"journal-title": "Microb Pathog"
},
{
"issue": "9",
"key": "pone.0233284.ref056",
"doi-asserted-by": "crossref",
"first-page": "1627",
"DOI": "10.4315/0362-028X.JFP-12-001",
"article-title": "Effects of nisin and lysozyme on growth inhibition and biofilm formation capacity of Staphylococcus aureus strains isolated from raw milk and cheese samples",
"volume": "75",
"author": "M Sudagidan",
"year": "2012",
"journal-title": "J Food Prot"
},
{
"issue": "3",
"key": "pone.0233284.ref057",
"doi-asserted-by": "crossref",
"first-page": "253",
"DOI": "10.1016/j.ijfoodmicro.2008.01.011",
"article-title": "Nisin-bacteriophage cross-resistance in Staphylococcus aureus",
"volume": "122",
"author": "B Martinez",
"year": "2008",
"journal-title": "Int J Food Microbiol"
},
{
"issue": "1",
"key": "pone.0233284.ref058",
"doi-asserted-by": "crossref",
"first-page": "82",
"DOI": "10.2146/ajhp080434",
"article-title": "Therapeutic monitoring of vancomycin in adult patients: a consensus review of the american society of health-system pharmacists, the infectious diseases society of america, and the society of infectious diseases pharmacists",
"volume": "66",
"author": "M Rybak",
"year": "2009",
"journal-title": "Am J Health Syst Pharm"
},
{
"issue": "2",
"key": "pone.0233284.ref059",
"doi-asserted-by": "crossref",
"first-page": "277",
"DOI": "10.1111/j.1574-695X.2007.00300.x",
"article-title": "Increased tolerance of Staphylococcus aureus to vancomycin in viscous media",
"volume": "51",
"author": "V Kostenko",
"year": "2007",
"journal-title": "FEMS Immunol Med Microbiol"
},
{
"key": "pone.0233284.ref060",
"first-page": "107",
"article-title": "Multidrug tolerance of biofilms and persister cells",
"volume": "322",
"author": "K. Lewis",
"year": "2008",
"journal-title": "Curr Top Microbiol Immunol"
},
{
"issue": "6",
"key": "pone.0233284.ref061",
"doi-asserted-by": "crossref",
"first-page": "ftw056",
"DOI": "10.1093/femspd/ftw056",
"article-title": "Penetration barrier contributes to bacterial biofilm-associated resistance against only select antibiotics, and exhibits genus-, strain- and antibiotic-specific differences",
"volume": "74",
"author": "R Singh",
"year": "2016",
"journal-title": "Pathog Dis"
},
{
"issue": "12",
"key": "pone.0233284.ref062",
"doi-asserted-by": "crossref",
"first-page": "7273",
"DOI": "10.1128/AAC.03132-14",
"article-title": "Extracellular DNA impedes the transport of vancomycin in Staphylococcus epidermidis biofilms preexposed to subinhibitory concentrations of vancomycin",
"volume": "58",
"author": "N Doroshenko",
"year": "2014",
"journal-title": "Antimicrob Agents Chemotherapy"
},
{
"issue": "1",
"key": "pone.0233284.ref063",
"doi-asserted-by": "crossref",
"first-page": "46",
"DOI": "10.1007/s00776-005-0968-7",
"article-title": "Antimicrobial susceptibility of Staphylococcus aureus and Staphylococcus epidermidis biofilms isolated from infected total hip arthroplasty cases",
"volume": "11",
"author": "S Nishimura",
"year": "2006",
"journal-title": "J Orthop Sci"
}
],
"container-title": [
"PLOS ONE"
],
"original-title": [
],
"language": "en",
"link": [
{
"URL": "https://dx.plos.org/10.1371/journal.pone.0233284",
"content-type": "unspecified",
"content-version": "vor",
"intended-application": "similarity-checking"
}
],
"deposited": {
"date-parts": [
[
2020,
5,
29
]
],
"date-time": "2020-05-29T17:54:37Z",
"timestamp": 1590774877000
},
"score": 1,
"resource": {
"primary": {
"URL": "https://dx.plos.org/10.1371/journal.pone.0233284"
}
},
"subtitle": [
],
"editor": [
{
"given": "Rita G.",
"family": "Sobral",
"sequence": "first",
"affiliation": [
]
}
],
"short-title": [
],
"issued": {
"date-parts": [
[
2020,
5,
29
]
]
},
"references-count": 63,
"journal-issue": {
"issue": "5",
"published-online": {
"date-parts": [
[
2020,
5,
29
]
]
}
},
"URL": "http://dx.doi.org/10.1371/journal.pone.0233284",
"relation": {
},
"ISSN": [
"1932-6203"
],
"issn-type": [
{
"value": "1932-6203",
"type": "electronic"
}
],
"subject": [
"Multidisciplinary"
],
"published": {
"date-parts": [
[
2020,
5,
29
]
]
}
}

View File

@ -461,6 +461,86 @@ class CrossrefMappingTest {
}
@Test
def testConvertArticleFromCrossRef2OafSFI(): Unit = {
val json = Source
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/sfi_funded_article.json"))
.mkString
assertNotNull(json)
assertFalse(json.isEmpty);
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
assertTrue(resultList.nonEmpty)
val items = resultList.filter(p => p.isInstanceOf[Publication])
assert(items.nonEmpty)
assert(items.size == 1)
val result: Result = items.head.asInstanceOf[Publication]
assertNotNull(result)
logger.info(mapper.writeValueAsString(result));
assertNotNull(result.getDataInfo, "Datainfo test not null Failed");
assertNotNull(
result.getDataInfo.getProvenanceaction,
"DataInfo/Provenance test not null Failed"
);
assertFalse(
result.getDataInfo.getProvenanceaction.getClassid.isEmpty,
"DataInfo/Provenance/classId test not null Failed"
);
assertFalse(
result.getDataInfo.getProvenanceaction.getClassname.isEmpty,
"DataInfo/Provenance/className test not null Failed"
);
assertFalse(
result.getDataInfo.getProvenanceaction.getSchemeid.isEmpty,
"DataInfo/Provenance/SchemeId test not null Failed"
);
assertFalse(
result.getDataInfo.getProvenanceaction.getSchemename.isEmpty,
"DataInfo/Provenance/SchemeName test not null Failed"
);
assertNotNull(result.getCollectedfrom, "CollectedFrom test not null Failed");
assertFalse(result.getCollectedfrom.isEmpty);
val collectedFromList = result.getCollectedfrom.asScala
assert(
collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")),
"Wrong collected from assertion"
)
assert(
collectedFromList.exists(c => c.getValue.equalsIgnoreCase("crossref")),
"Wrong collected from assertion"
)
val relevantDates = result.getRelevantdate.asScala
assert(
relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("created")),
"Missing relevant date of type created"
)
val rels = resultList.filter(p => p.isInstanceOf[Relation]).asInstanceOf[List[Relation]]
assertFalse(rels.isEmpty)
rels.foreach(relation => {
assertNotNull(relation)
assertFalse(relation.getSource.isEmpty)
assertFalse(relation.getTarget.isEmpty)
assertFalse(relation.getRelClass.isEmpty)
assertFalse(relation.getRelType.isEmpty)
assertFalse(relation.getSubRelType.isEmpty)
})
}
@Test
def testSetDateOfAcceptanceCrossRef2Oaf(): Unit = {

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version>
<version>1.2.5-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
@ -51,7 +51,7 @@
<dependency>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-aggregation</artifactId>
<version>1.2.4-SNAPSHOT</version>
<version>1.2.5-SNAPSHOT</version>
<scope>compile</scope>
</dependency>

View File

@ -8,6 +8,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
@ -56,7 +57,9 @@ public class PrepareResultOrcidAssociationStep1 {
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
final List<String> allowedsemrel = Arrays.asList(parser.get("allowedsemrels").split(";"));
final List<String> allowedsemrel = Arrays.stream(parser.get("allowedsemrels").split(";"))
.map(s -> s.toLowerCase()).collect(Collectors.toList());
log.info("allowedSemRel: {}", new Gson().toJson(allowedsemrel));
final List<String> allowedPids = Arrays.asList(parser.get("allowedpids").split(";"));
@ -122,28 +125,27 @@ public class PrepareResultOrcidAssociationStep1 {
Dataset<R> result = readPath(spark, outputPath + "/resultSubset", resultClazz);
result
.joinWith(relation, result.col("id").equalTo(relation.col("source")))
.map((MapFunction<Tuple2<R, Relation>, ResultOrcidList>) t2 -> {
ResultOrcidList rol = new ResultOrcidList();
rol.setResultId(t2._2().getTarget());
List<AutoritativeAuthor> aal = new ArrayList<>();
t2._1().getAuthor().stream().forEach(a -> {
a.getPid().stream().forEach(p -> {
if (allowedPids.contains(p.getQualifier().getClassid().toLowerCase())) {
aal
.add(
AutoritativeAuthor
.newInstance(a.getName(), a.getSurname(), a.getFullname(), p.getValue()));
}
.joinWith(relation, result.col("id").equalTo(relation.col("source")))
.map((MapFunction<Tuple2<R, Relation>, ResultOrcidList>) t2 -> {
ResultOrcidList rol = new ResultOrcidList();
rol.setResultId(t2._2().getTarget());
List<AutoritativeAuthor> aal = new ArrayList<>();
t2._1().getAuthor().stream().forEach(a -> {
a.getPid().stream().forEach(p -> {
if (allowedPids.contains(p.getQualifier().getClassid().toLowerCase())) {
aal
.add(
AutoritativeAuthor
.newInstance(a.getName(), a.getSurname(), a.getFullname(), p.getValue()));
}
});
});
});
return rol;
}, Encoders.bean(ResultOrcidList.class))
.write()
return rol;
}, Encoders.bean(ResultOrcidList.class)).write()
.option("compression", "gzip")
.mode(SaveMode.Overwrite)
.json(outputPath + "/" + resultType);
;
}

View File

@ -10,6 +10,8 @@ import java.util.Set;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.sql.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -63,33 +65,30 @@ public class PrepareResultOrcidAssociationStep2 {
.union(readPath(spark, inputPath + "/software", ResultOrcidList.class));
resultOrcidAssoc
.toJavaRDD()
.mapToPair(r -> new Tuple2<>(r.getResultId(), r))
.reduceByKey(
(a, b) -> {
if (a == null) {
return b;
}
if (b == null) {
return a;
}
.groupByKey((MapFunction<ResultOrcidList, String>) rol -> rol.getResultId(), Encoders.STRING())
.mapGroups((MapGroupsFunction<String, ResultOrcidList, ResultOrcidList>) (k, it) ->{
ResultOrcidList resultOrcidList = it.next();
if(it.hasNext())
{
Set<String> orcid_set = new HashSet<>();
a.getAuthorList().stream().forEach(aa -> orcid_set.add(aa.getOrcid()));
b
.getAuthorList()
.stream()
.forEach(
aa -> {
if (!orcid_set.contains(aa.getOrcid())) {
a.getAuthorList().add(aa);
orcid_set.add(aa.getOrcid());
}
});
return a;
})
.map(Tuple2::_2)
.map(r -> OBJECT_MAPPER.writeValueAsString(r))
.saveAsTextFile(outputPath, GzipCodec.class);
resultOrcidList.getAuthorList().stream().forEach(aa -> orcid_set.add(aa.getOrcid()));
it.forEachRemaining(val -> val
.getAuthorList()
.stream()
.forEach(
aa -> {
if (!orcid_set.contains(aa.getOrcid())) {
resultOrcidList.getAuthorList().add(aa);
orcid_set.add(aa.getOrcid());
}
}));
}
return resultOrcidList;
},Encoders.bean(ResultOrcidList.class) )
.write()
.mode(SaveMode.Overwrite)
.option("compression","gzip")
.json(outputPath);
}
}

View File

@ -3,6 +3,7 @@ package eu.dnetlib.dhp.orcidtoresultfromsemrel;
import static eu.dnetlib.dhp.PropagationConstant.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.List;
import java.util.Optional;
@ -59,9 +60,9 @@ public class SparkOrcidToResultFromSemRelJob {
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
SparkConf conf = new SparkConf();
conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
runWithSparkHiveSession(
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {

View File

@ -11,10 +11,7 @@ import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -159,8 +156,14 @@ public class SparkResultToOrganizationFromIstRepoJob {
Dataset<R> result = readPath(spark, inputPath, resultClazz);
result.createOrReplaceTempView("result");
createCfHbforResult(spark);
Dataset<Row> cfhb = spark.sql("select distinct r.id, inst.collectedfrom.key cf, inst.hostedby.key hb "
+
"from result r " +
"lateral view explode(instance) i as inst " +
"where r.datainfo.deletedbyinference=false");
//createCfHbforResult(spark);
cfhb.createOrReplaceTempView("cfhb");
dsOrg.createOrReplaceTempView("rels");
return spark

View File

@ -228,8 +228,12 @@
--conf spark.sql.shuffle.partitions=3840
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
<<<<<<< HEAD:dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml
<arg>--preparedInfoPath</arg><arg>${workingDir}/country/publication</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
=======
<arg>--preparedInfoPath</arg><arg>${workingDir}/publication</arg>
>>>>>>> beta:dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
</spark>
@ -258,8 +262,12 @@
--conf spark.sql.shuffle.partitions=3840
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<<<<<<< HEAD:dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml
<arg>--preparedInfoPath</arg><arg>${workingDir}/country/dataset</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
=======
<arg>--preparedInfoPath</arg><arg>${workingDir}/dataset</arg>
>>>>>>> beta:dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
</spark>
@ -288,8 +296,12 @@
--conf spark.sql.shuffle.partitions=3840
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<<<<<<< HEAD:dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml
<arg>--preparedInfoPath</arg><arg>${workingDir}/country/otherresearchproduct</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
=======
<arg>--preparedInfoPath</arg><arg>${workingDir}/otherresearchproduct</arg>
>>>>>>> beta:dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
</spark>
@ -319,7 +331,6 @@
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/software</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
</spark>

View File

@ -5,9 +5,9 @@ import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import com.google.gson.Gson;
import eu.dnetlib.dhp.schema.oaf.Relation;
import org.apache.commons.io.FileUtils;
import org.apache.neethi.Assertion;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
@ -22,6 +22,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Dataset;
@ -63,8 +64,7 @@ public class PrepareStep1Test {
}
@Test
void noUpdateTest() throws Exception {
//7 relationi fra issupplementedby e issupplementto
void noMatchTest() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/orcidtoresultfromsemrel/preparestep1")
@ -77,19 +77,25 @@ public class PrepareStep1Test {
"-sourcePath", sourcePath,
"-resultTableName", Dataset.class.getCanonicalName(),
"-outputPath", workingDir.toString() + "/preparedInfo",
"-allowedsemrels", "IsSupplementedBy;IsSupplementTo",
"-allowedpids","orcid;orcid_pending"
"-allowedsemrels", "IsSupplementedBy;IsSupplementTo",
"-allowedpids", "orcid;orcid_pending"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<ResultOrcidList> tmp = sc
.textFile(workingDir.toString() + "/preparedInfo")
.textFile(workingDir.toString() + "/preparedInfo/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, ResultOrcidList.class));
System.out.println("***************** COUNT ********************* \n" + tmp.count());
tmp.map(s -> new Gson().toJson(s)).foreach(s -> System.out.println(s));
Assertions.assertEquals(0, tmp.count());
Assertions.assertEquals(7, sc
.textFile(workingDir.toString() + "/preparedInfo/relationSubset")
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class)).count());
Assertions.assertEquals(0, sc
.textFile(workingDir.toString() + "/preparedInfo/resultSubset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)).count());
}

View File

@ -11,3 +11,4 @@
{u'dataInfo': {u'invisible': False, u'provenanceaction': {u'classid': u'sysimport:crosswalk:entityregistry', u'classname': u'Harvested', u'schemeid': u'dnet:provenanceActions', u'schemename': u'dnet:provenanceActions'}, u'trust': u'0.91', u'inferred': False, u'deletedbyinference': False}, u'qualifier': {u'classid': u'orcid', u'classname': u'Open Researcher and Contributor ID', u'schemeid': u'dnet:pid_types', u'schemename': u'dnet:pid_types'}, u'value': u'0000-0002-5001-6911'}

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version>
<version>1.2.5-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -4,8 +4,6 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonIgnore;
public class BibJson implements Serializable {
private Editorial editorial;
private PidScheme pid_scheme;

View File

@ -42,10 +42,7 @@ import java.io.IOException;
import java.sql.Array;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.*;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Predicate;
@ -311,7 +308,13 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
ds.setContentpolicies(prepareListOfQualifiers(rs.getArray("contentpolicies")));
ds.setConsenttermsofuse(rs.getBoolean("consenttermsofuse"));
ds.setFulltextdownload(rs.getBoolean("fulltextdownload"));
ds.setConsenttermsofusedate(rs.getDate("consenttermsofusedate").toString());
ds
.setConsenttermsofusedate(
Optional
.ofNullable(
rs.getDate("consenttermsofusedate"))
.map(c -> c.toString())
.orElse(null));
return Arrays.asList(ds);
} catch (final Exception e) {

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version>
<version>1.2.5-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version>
<version>1.2.5-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>dhp-stats-promote</artifactId>

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version>
<version>1.2.5-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>dhp-stats-update</artifactId>

View File

@ -12,6 +12,8 @@ and (ri.accessright = 'Open Access'
or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp
on p.id= tmp.id;
compute stats indi_pub_green_oa;
create table indi_pub_grey_lit stored as parquet as
select distinct p.id, coalesce(grey_lit, 0) as grey_lit
from publication p
@ -22,6 +24,8 @@ join result_classifications rt on rt.id = p.id
where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object') and
not exists (select 1 from result_classifications rc where type ='Other literature type' and rc.id=p.id)) tmp on p.id=tmp.id;
compute stats indi_pub_grey_lit;
create table indi_pub_doi_from_crossref stored as parquet as
select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref
from publication p
@ -31,6 +35,7 @@ join datasource d on d.id = ri.collectedfrom
where pidtype='Digital Object Identifier' and d.name ='Crossref') tmp
on tmp.id=p.id;
compute stats indi_pub_doi_from_crossref;
---- Sprint 2 ----
create table indi_result_has_cc_licence stored as parquet as
select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license
@ -40,6 +45,8 @@ join result_licenses as license on license.id = r.id
where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp
on r.id= tmp.id;
compute stats indi_result_has_cc_licence;
create table indi_result_has_cc_licence_url stored as parquet as
select distinct r.id, case when lic_host='' or lic_host is null then 0 else 1 end as has_cc_license_url
from result r
@ -49,16 +56,21 @@ join result_licenses as license on license.id = r.id
WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp
on r.id= tmp.id;
compute stats indi_result_has_cc_licence_url;
create table indi_pub_has_abstract stored as parquet as
select distinct publication.id, coalesce(abstract, 1) has_abstract
from publication;
compute stats indi_pub_has_abstract;
create table indi_result_with_orcid stored as parquet as
select distinct r.id, coalesce(has_orcid, 0) as has_orcid
from result r
left outer join (select id, 1 as has_orcid from result_orcid) tmp
on r.id= tmp.id;
compute stats indi_result_with_orcid;
---- Sprint 3 ----
create table indi_funded_result_with_fundref stored as parquet as
@ -68,6 +80,8 @@ left outer join (select distinct id, 1 as fundref from project_results
where provenance='Harvested') tmp
on r.id= tmp.id;
compute stats indi_funded_result_with_fundref;
create table indi_result_org_country_collab stored as parquet as
with tmp as
(select o.id as id, o.country , ro.id as result,r.type from organization o
@ -79,6 +93,8 @@ join tmp as o2 on o1.result=o2.result
where o1.id<>o2.id and o1.country<>o2.country
group by o1.id, o1.type,o2.country;
compute stats indi_result_org_country_collab;
create table indi_result_org_collab stored as parquet as
with tmp as
(select o.id, ro.id as result,r.type from organization o
@ -90,6 +106,8 @@ join tmp as o2 on o1.result=o2.result
where o1.id<>o2.id
group by o1.id, o2.id, o1.type;
compute stats indi_result_org_collab;
create table indi_funder_country_collab stored as parquet as
with tmp as (select funder, project, country from organization_projects op
join organization o on o.id=op.id
@ -101,6 +119,8 @@ join tmp as f2 on f1.project=f2.project
where f1.country<>f2.country
group by f1.funder, f2.country, f1.country;
compute stats indi_funder_country_collab;
create table indi_result_country_collab stored as parquet as
with tmp as
(select country, ro.id as result,r.type from organization o
@ -112,6 +132,8 @@ join tmp as o2 on o1.result=o2.result
where o1.country<>o2.country
group by o1.country, o2.country, o1.type;
compute stats indi_result_country_collab;
---- Sprint 4 ----
create table indi_pub_diamond stored as parquet as
select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal
@ -123,6 +145,8 @@ join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_onli
and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp
on pd.id=tmp.id;
compute stats indi_pub_diamond;
create table indi_pub_hybrid stored as parquet as
select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid
from publication_datasources pd
@ -133,6 +157,8 @@ join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_onli
and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp
on pd.id=tmp.id;
compute stats indi_pub_hybrid;
create table indi_pub_in_transformative stored as parquet as
select distinct pd.id, coalesce(is_transformative, 0) as is_transformative
from publication pd
@ -143,6 +169,8 @@ join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_onli
and ps.is_transformative_journal=true) tmp
on pd.id=tmp.id;
compute stats indi_pub_in_transformative;
create table indi_pub_closed_other_open stored as parquet as
select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from result_instance ri
left outer join
@ -153,11 +181,12 @@ where d.type like '%Journal%' and ri.accessright='Closed Access' and
(p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp
on tmp.id=ri.id;
compute stats indi_pub_closed_other_open;
---- Sprint 5 ----
create table indi_result_no_of_copies stored as parquet as
select id, count(id) as number_of_copies from result_instance group by id;
compute stats indi_result_no_of_copies;
---- Sprint 6 ----
create table indi_pub_gold_oa stored as parquet as
WITH gold_oa AS (
@ -183,6 +212,8 @@ LEFT OUTER JOIN (
JOIN issn on issn.id=pd.datasource
JOIN gold_oa on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id;
compute stats indi_pub_gold_oa;
create table indi_datasets_gold_oa stored as parquet as
WITH gold_oa AS (
SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_1 as issn
@ -210,6 +241,8 @@ LEFT OUTER JOIN (
JOIN issn on issn.id=pd.datasource
JOIN gold_oa on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id;
compute stats indi_datasets_gold_oa;
create table indi_software_gold_oa stored as parquet as
WITH gold_oa AS (
SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_1 as issn
@ -237,6 +270,8 @@ LEFT OUTER JOIN (
JOIN issn on issn.id=pd.datasource
JOIN gold_oa on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id;
compute stats indi_software_gold_oa;
create table indi_org_findable stored as parquet as
with result_with_pid as (
select ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro
@ -263,6 +298,8 @@ join result_with_pid_share on result_with_pid_share.organization=allresults.orga
left outer join (
select organization, abstract_share from result_with_abstract_share) tmp on tmp.organization=allresults.organization;
compute stats indi_org_findable;
create table indi_org_openess stored as parquet as
WITH datasets_oa as (
SELECT ro.organization, count(dg.id) no_oadatasets FROM indi_datasets_gold_oa dg
@ -313,6 +350,8 @@ left outer join (
left outer join (
select organization,s from allsoftwaresshare) tmp1 on tmp1.organization=allpubsshare.organization;
compute stats indi_org_openess;
create table indi_pub_hybrid_oa_with_cc stored as parquet as
WITH hybrid_oa AS (
SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_print as issn
@ -343,6 +382,8 @@ LEFT OUTER JOIN (
JOIN indi_result_has_cc_licence cc on pd.id=cc.id
where cc.has_cc_license=1) tmp on pd.id=tmp.id;
compute stats indi_pub_hybrid_oa_with_cc;
create table indi_pub_downloads stored as parquet as
SELECT result_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats
join publication on result_id=id
@ -350,6 +391,8 @@ where downloads>0
GROUP BY result_id
order by no_dowloads desc;
compute stats indi_pub_downloads;
create table indi_pub_downloads_datasource stored as parquet as
SELECT result_id, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats
join publication on result_id=id
@ -357,15 +400,21 @@ where downloads>0
GROUP BY result_id, repository_id
order by result_id;
compute stats indi_pub_downloads_datasource;
create table indi_pub_downloads_year stored as parquet as
SELECT result_id, substring(us.`date`, 1,4) as `year`, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us
join publication on result_id=id where downloads>0
GROUP BY result_id, `year`
order by `year` asc;
compute stats indi_pub_downloads_year;
create table indi_pub_downloads_datasource_year stored as parquet as
SELECT result_id, substring(us.`date`, 1,4) as `year`, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us
join publication on result_id=id
where downloads>0
GROUP BY result_id, repository_id, `year`
order by `year` asc, result_id;
compute stats indi_pub_downloads_datasource_year;

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version>
<version>1.2.5-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>dhp-usage-raw-data-update</artifactId>

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version>
<version>1.2.5-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>dhp-usage-stats-build</artifactId>

View File

@ -3,7 +3,7 @@
<parent>
<artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version>
<version>1.2.5-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -6,7 +6,7 @@
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp</artifactId>
<version>1.2.4-SNAPSHOT</version>
<version>1.2.5-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp</artifactId>
<version>1.2.4-SNAPSHOT</version>
<version>1.2.5-SNAPSHOT</version>
<packaging>pom</packaging>
<licenses>
@ -551,6 +551,9 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId>
<version>3.9.1</version>
<configuration>
<skip>${dhp.site.skip}</skip>
</configuration>
</plugin>
<plugin>
@ -791,6 +794,7 @@
<dhp.spark.version>2.4.0.cloudera2</dhp.spark.version>
<dhp.jackson.version>2.9.6</dhp.jackson.version>
<dhp.commons.lang.version>3.5</dhp.commons.lang.version>
<dhp.site.skip>true</dhp.site.skip>
<dhp.guava.version>11.0.2</dhp.guava.version>
<scala.version>2.11.12</scala.version>
<junit-jupiter.version>5.6.1</junit-jupiter.version>