[Enrichment Step] get rid of hive

This commit is contained in:
Miriam Baglioni 2022-04-12 11:26:48 +02:00
commit 1a8641227d
35 changed files with 1237 additions and 97 deletions

View File

@ -6,7 +6,7 @@
<parent> <parent>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-build</artifactId> <artifactId>dhp-build</artifactId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
</parent> </parent>
<artifactId>dhp-build-assembly-resources</artifactId> <artifactId>dhp-build-assembly-resources</artifactId>

View File

@ -6,7 +6,7 @@
<parent> <parent>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-build</artifactId> <artifactId>dhp-build</artifactId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
</parent> </parent>
<artifactId>dhp-build-properties-maven-plugin</artifactId> <artifactId>dhp-build-properties-maven-plugin</artifactId>

View File

@ -5,7 +5,7 @@
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-code-style</artifactId> <artifactId>dhp-code-style</artifactId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
<packaging>jar</packaging> <packaging>jar</packaging>
@ -47,12 +47,16 @@
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId> <artifactId>maven-site-plugin</artifactId>
<version>3.9.1</version> <version>3.9.1</version>
<configuration>
<skip>true</skip>
</configuration>
</plugin> </plugin>
</plugins> </plugins>
</pluginManagement> </pluginManagement>
</build> </build>
<properties> <properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<dhp.site.stage.path>sftp://dnet-hadoop@static-web.d4science.org/dnet-hadoop</dhp.site.stage.path> <dhp.site.stage.path>sftp://dnet-hadoop@static-web.d4science.org/dnet-hadoop</dhp.site.stage.path>
</properties> </properties>

View File

@ -4,7 +4,7 @@
<parent> <parent>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp</artifactId> <artifactId>dhp</artifactId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
</parent> </parent>
<artifactId>dhp-build</artifactId> <artifactId>dhp-build</artifactId>
<packaging>pom</packaging> <packaging>pom</packaging>

View File

@ -5,7 +5,7 @@
<parent> <parent>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp</artifactId> <artifactId>dhp</artifactId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath> <relativePath>../pom.xml</relativePath>
</parent> </parent>

View File

@ -4,7 +4,7 @@
<parent> <parent>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-workflows</artifactId> <artifactId>dhp-workflows</artifactId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
</parent> </parent>
<artifactId>dhp-actionmanager</artifactId> <artifactId>dhp-actionmanager</artifactId>

View File

@ -4,7 +4,7 @@
<parent> <parent>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-workflows</artifactId> <artifactId>dhp-workflows</artifactId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
</parent> </parent>
<artifactId>dhp-aggregation</artifactId> <artifactId>dhp-aggregation</artifactId>
<build> <build>

View File

@ -72,7 +72,6 @@ class DataciteToOAFTest extends AbstractVocabularyTest {
spark.read.load(targetPath).printSchema(); spark.read.load(targetPath).printSchema();
val result: Dataset[Oaf] = spark.read.load(targetPath).as[Oaf] val result: Dataset[Oaf] = spark.read.load(targetPath).as[Oaf]
result result

View File

@ -3,7 +3,7 @@
<parent> <parent>
<artifactId>dhp-workflows</artifactId> <artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>

View File

@ -1,11 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent> <parent>
<artifactId>dhp-workflows</artifactId> <artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>

View File

@ -3,7 +3,7 @@
<parent> <parent>
<artifactId>dhp-workflows</artifactId> <artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<artifactId>dhp-dedup-openaire</artifactId> <artifactId>dhp-dedup-openaire</artifactId>

View File

@ -3,7 +3,7 @@
<parent> <parent>
<artifactId>dhp-workflows</artifactId> <artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>

View File

@ -3,7 +3,7 @@
<parent> <parent>
<artifactId>dhp-workflows</artifactId> <artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>

View File

@ -0,0 +1,982 @@
{
"indexed": {
"date-parts": [
[
2022,
4,
5
]
],
"date-time": "2022-04-05T11:07:00Z",
"timestamp": 1649156820730
},
"reference-count": 63,
"publisher": "Public Library of Science (PLoS)",
"issue": "5",
"license": [
{
"start": {
"date-parts": [
[
2020,
5,
29
]
],
"date-time": "2020-05-29T00:00:00Z",
"timestamp": 1590710400000
},
"content-version": "vor",
"delay-in-days": 0,
"URL": "http://creativecommons.org/licenses/by/4.0/"
}
],
"funder": [
{
"DOI": "10.13039/501100001602",
"name": "Science Foundation Ireland",
"doi-asserted-by": "crossref",
"award": [
"SFI/12/RC/2273"
]
},
{
"DOI": "10.13039/501100001602",
"name": "Science Foundation Ireland",
"doi-asserted-by": "crossref",
"award": [
"SFI/12/RC/2273"
]
},
{
"DOI": "10.13039/501100001602",
"name": "Science Foundation Ireland",
"doi-asserted-by": "crossref",
"award": [
"SFI/12/RC/2273"
]
},
{
"DOI": "10.13039/501100001602",
"name": "Science Foundation Ireland",
"doi-asserted-by": "crossref",
"award": [
"SFI/12/RC/2273"
]
}
],
"content-domain": {
"domain": [
"www.plosone.org"
],
"crossmark-restriction": false
},
"short-container-title": [
"PLoS ONE"
],
"DOI": "10.1371/journal.pone.0233284",
"type": "journal-article",
"created": {
"date-parts": [
[
2020,
5,
29
]
],
"date-time": "2020-05-29T17:53:47Z",
"timestamp": 1590774827000
},
"page": "e0233284",
"update-policy": "http://dx.doi.org/10.1371/journal.pone.corrections_policy",
"source": "Crossref",
"is-referenced-by-count": 13,
"title": [
"Vancomycin and nisin A are effective against biofilms of multi-drug resistant Staphylococcus aureus isolates from human milk"
],
"prefix": "10.1371",
"volume": "15",
"author": [
{
"ORCID": "http://orcid.org/0000-0003-4107-0278",
"authenticated-orcid": true,
"given": "Angeliki",
"family": "Angelopoulou",
"sequence": "first",
"affiliation": [
]
},
{
"given": "Des",
"family": "Field",
"sequence": "additional",
"affiliation": [
]
},
{
"given": "Mariana",
"family": "Pérez-Ibarreche",
"sequence": "additional",
"affiliation": [
]
},
{
"ORCID": "http://orcid.org/0000-0001-8317-6455",
"authenticated-orcid": true,
"given": "Alicja K.",
"family": "Warda",
"sequence": "additional",
"affiliation": [
]
},
{
"given": "Colin",
"family": "Hill",
"sequence": "additional",
"affiliation": [
]
},
{
"given": "R. Paul",
"family": "Ross",
"sequence": "additional",
"affiliation": [
]
}
],
"member": "340",
"published-online": {
"date-parts": [
[
2020,
5,
29
]
]
},
"reference": [
{
"issue": "78",
"key": "pone.0233284.ref001",
"doi-asserted-by": "crossref",
"first-page": "509",
"DOI": "10.1016/S0899-9007(00)00363-4",
"article-title": "Breast milk: a truly functional food",
"volume": "16",
"author": "B. Lönnerdal",
"year": "2000",
"journal-title": "Nutrition"
},
{
"key": "pone.0233284.ref002",
"first-page": "1",
"volume-title": "Mastitis: causes and management",
"author": "WHO",
"year": "2000"
},
{
"issue": "12",
"key": "pone.0233284.ref003",
"doi-asserted-by": "crossref",
"first-page": "115",
"DOI": "10.1038/pr.2014.178",
"article-title": "Human milk and infant intestinal mucosal glycans guide succession of the neonatal intestinal microbiota",
"volume": "77",
"author": "DS Newburg",
"year": "2015",
"journal-title": "Pediatr Res"
},
{
"issue": "7",
"key": "pone.0233284.ref004",
"doi-asserted-by": "crossref",
"first-page": "647",
"DOI": "10.1001/jamapediatrics.2017.0378",
"article-title": "Association between breast milk bacterial communities and establishment and development of the infant gut microbiome",
"volume": "171",
"author": "PS Pannaraj",
"year": "2017",
"journal-title": "JAMA Pediatr"
},
{
"issue": "Suppl 2:",
"key": "pone.0233284.ref005",
"doi-asserted-by": "crossref",
"first-page": "S69",
"DOI": "10.1542/peds.2008-1315i",
"article-title": "Why mothers stop breastfeeding: mothers' self-reported reasons for stopping during the first year",
"volume": "122",
"author": "R Li",
"year": "2008",
"journal-title": "Pediatrics"
},
{
"issue": "2",
"key": "pone.0233284.ref006",
"doi-asserted-by": "crossref",
"first-page": "83",
"DOI": "10.1007/s00430-017-0532-z",
"article-title": "The microbiology and treatment of human mastitis",
"volume": "207",
"author": "A Angelopoulou",
"year": "2018",
"journal-title": "Med Microbiol Immunol"
},
{
"issue": "2",
"key": "pone.0233284.ref007",
"doi-asserted-by": "crossref",
"first-page": "169",
"DOI": "10.3920/BM2013.0036",
"article-title": "Probiotics for human lactational mastitis",
"volume": "5",
"author": "L Fernández",
"year": "2014",
"journal-title": "Benef Microbes"
},
{
"issue": "15",
"key": "pone.0233284.ref008",
"doi-asserted-by": "crossref",
"first-page": "4650",
"DOI": "10.1128/AEM.02599-07",
"article-title": "Oral administration of Lactobacillus strains isolated from breast milk as an alternative for the treatment of infectious mastitis during lactation",
"volume": "74",
"author": "E Jiménez",
"year": "2008",
"journal-title": "Appl Environ Microbiol"
},
{
"issue": "3",
"key": "pone.0233284.ref009",
"doi-asserted-by": "crossref",
"first-page": "406",
"DOI": "10.1177/0890334415585078",
"article-title": "Metagenomic analysis of milk of healthy and mastitis-suffering women",
"volume": "31",
"author": "E Jiménez",
"year": "2015",
"journal-title": "J Hum Lact"
},
{
"issue": "2",
"key": "pone.0233284.ref010",
"doi-asserted-by": "crossref",
"first-page": "176",
"DOI": "10.1086/589241",
"article-title": "Risk of infection and death due to methicillin-resistant Staphylococcus aureus in long-term carriers",
"volume": "47",
"author": "R Datta",
"year": "2008",
"journal-title": "Clin Infect Dis"
},
{
"issue": "4",
"key": "pone.0233284.ref011",
"doi-asserted-by": "crossref",
"DOI": "10.1128/microbiolspec.GPP3-0023-2018",
"article-title": "Staphylococcal biofilms",
"volume": "6",
"author": "M. Otto",
"year": "2018",
"journal-title": "Microbiol Spectr"
},
{
"issue": "1",
"key": "pone.0233284.ref012",
"doi-asserted-by": "crossref",
"first-page": "9",
"DOI": "10.1038/s41522-018-0053-6",
"article-title": "Fighting biofilms with lantibiotics and other groups of bacteriocins",
"volume": "4",
"author": "H Mathur",
"year": "2018",
"journal-title": "NPJ Biofilms Microbiomes"
},
{
"issue": "2",
"key": "pone.0233284.ref013",
"doi-asserted-by": "crossref",
"first-page": "310",
"DOI": "10.1128/MMBR.00041-08",
"article-title": "Signals, regulatory networks, and materials that build and break bacterial biofilms",
"volume": "73",
"author": "E Karatan",
"year": "2009",
"journal-title": "Microbiol Mol Biol Rev"
},
{
"issue": "3",
"key": "pone.0233284.ref014",
"doi-asserted-by": "crossref",
"first-page": "147",
"DOI": "10.1016/S1473-3099(01)00091-3",
"article-title": "Vancomycin-resistant Staphylococcus aureus: a new model of antibiotic resistance",
"volume": "1",
"author": "K Hiramatsu",
"year": "2001",
"journal-title": "Lancet Infect Dis"
},
{
"key": "pone.0233284.ref015",
"doi-asserted-by": "crossref",
"first-page": "339",
"DOI": "10.1146/annurev.mi.38.100184.002011",
"article-title": "The structure and mode of action of glycopeptide antibiotics of the vancomycin group",
"volume": "38",
"author": "JC Barna",
"year": "1984",
"journal-title": "Annu Rev Microbiol"
},
{
"key": "pone.0233284.ref016",
"unstructured": "Health Service Executive Mastitis Factsheet for Health Care Professionals. Available at: https://www.breastfeeding.ie/Uploads/Mastitis.pdf"
},
{
"issue": "3",
"key": "pone.0233284.ref017",
"first-page": "136",
"article-title": "Breast infection: a review of diagnosis and management practices",
"volume": "14",
"author": "E Boakes",
"year": "2018",
"journal-title": "Eur J Breast Health"
},
{
"key": "pone.0233284.ref018",
"doi-asserted-by": "crossref",
"first-page": "1205",
"DOI": "10.3389/fmicb.2017.01205",
"article-title": "Bacteriocin-antimicrobial synergy: A medical and food perspective",
"volume": "8",
"author": "H Mathur",
"year": "2017",
"journal-title": "Front Microbiol"
},
{
"issue": "1",
"key": "pone.0233284.ref019",
"doi-asserted-by": "crossref",
"first-page": "223",
"DOI": "10.1016/S0005-2736(99)00208-4",
"article-title": "The lantibiotic nisin, a special case or not?",
"volume": "1462",
"author": "E Breukink",
"year": "1999",
"journal-title": "Biochim Biophys Acta"
},
{
"issue": "10",
"key": "pone.0233284.ref020",
"doi-asserted-by": "crossref",
"first-page": "963",
"DOI": "10.1038/nsmb830",
"article-title": "The nisin-lipid II complex reveals a pyrophosphate cage that provides a blueprint for novel antibiotics",
"volume": "11",
"author": "ST Hsu",
"year": "2004",
"journal-title": "Nat Struct Mol Biol"
},
{
"issue": "5793",
"key": "pone.0233284.ref021",
"doi-asserted-by": "crossref",
"first-page": "1636",
"DOI": "10.1126/science.1129818",
"article-title": "An alternative bactericidal mechanism of action for lantibiotic peptides that target lipid II",
"volume": "313",
"author": "HE Hasper",
"year": "2006",
"journal-title": "Science"
},
{
"issue": "3",
"key": "pone.0233284.ref022",
"doi-asserted-by": "crossref",
"first-page": "1772",
"DOI": "10.1074/jbc.M006770200",
"article-title": "Specific binding of nisin to the peptidoglycan precursor lipid II combines pore formation and inhibition of cell wall biosynthesis for potent antibiotic activity",
"volume": "276",
"author": "I Wiedemann",
"year": "2001",
"journal-title": "J Biol Chem"
},
{
"key": "pone.0233284.ref023",
"doi-asserted-by": "crossref",
"first-page": "104539",
"DOI": "10.1016/j.idairyj.2019.104539",
"article-title": "Bovine mastitis is a polymicrobial disease requiring a polydiagnostic approach",
"volume": "99",
"author": "A Angelopoulou",
"year": "2019",
"journal-title": "Int Dairy J"
},
{
"issue": "4",
"key": "pone.0233284.ref024",
"doi-asserted-by": "crossref",
"first-page": "493",
"DOI": "10.1093/ajcp/45.4_ts.493",
"article-title": "Antibiotic susceptibility testing by a standardized single disk method",
"volume": "45",
"author": "AW Bauer",
"year": "1966",
"journal-title": "Am Journal Clin Pathol"
},
{
"key": "pone.0233284.ref025",
"unstructured": "v_9.0_Breakpoint_Tables.pdf. Available at: http://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Breakpoint_tables/v_9.0_Breakpoint_Tables.pdf (Accessed: 28th July 2019)."
},
{
"issue": "12",
"key": "pone.0233284.ref026",
"doi-asserted-by": "crossref",
"first-page": "67",
"DOI": "10.1016/j.vetmic.2010.05.044",
"article-title": "(GTG)5-PCR fingerprinting for the classification and identification of coagulase-negative Staphylococcus species from bovine milk and teat apices: a comparison of type strains and field isolates",
"volume": "147",
"author": "G Braem",
"year": "2011",
"journal-title": "Vet Microbiol"
},
{
"key": "pone.0233284.ref027",
"doi-asserted-by": "crossref",
"first-page": "270",
"DOI": "10.1186/s12859-015-0703-0",
"article-title": "GelJ a tool for analyzing DNA fingerprint gel images",
"volume": "16",
"author": "J Heras",
"year": "2015",
"journal-title": "BMC bioinformatics"
},
{
"issue": "4",
"key": "pone.0233284.ref028",
"doi-asserted-by": "crossref",
"first-page": "473",
"DOI": "10.1111/j.1751-7915.2010.00184.x",
"article-title": "Studies with bioengineered nisin peptides highlight the broad-spectrum potency of nisin V",
"volume": "3",
"author": "D Field",
"year": "2010",
"journal-title": "Microb Biotechnol"
},
{
"issue": "11",
"key": "pone.0233284.ref029",
"doi-asserted-by": "crossref",
"first-page": "e79563",
"DOI": "10.1371/journal.pone.0079563",
"article-title": "Intensive mutagenesis of the nisin hinge leads to the rational design of enhanced derivatives",
"volume": "8",
"author": "B Healy",
"year": "2013",
"journal-title": "PLoS One"
},
{
"issue": "10",
"key": "pone.0233284.ref030",
"doi-asserted-by": "crossref",
"first-page": "e46884",
"DOI": "10.1371/journal.pone.0046884",
"article-title": "Bioengineered nisin A derivatives with enhanced activity against both Gram positive and Gram negative pathogens",
"volume": "7",
"author": "D Field",
"year": "2012",
"journal-title": "PLoS One"
},
{
"issue": "3",
"key": "pone.0233284.ref031",
"doi-asserted-by": "crossref",
"first-page": "e0119684",
"DOI": "10.1371/journal.pone.0119684",
"article-title": "A Bioengineered nisin derivative to control biofilms of Staphylococcus pseudintermedius",
"volume": "10",
"author": "D Field",
"year": "2015a",
"journal-title": "PLoS One"
},
{
"issue": "2",
"key": "pone.0233284.ref032",
"doi-asserted-by": "crossref",
"first-page": "207",
"DOI": "10.1007/s13765-012-3253-4",
"article-title": "Biofilm formation, attachment, and cell hydrophobicity of foodborne pathogens under varied environmental conditions",
"volume": "56",
"author": "NY Choi",
"year": "2013",
"journal-title": "J Korean Soc Appl Biol Chem"
},
{
"issue": "2",
"key": "pone.0233284.ref033",
"doi-asserted-by": "crossref",
"first-page": "175",
"DOI": "10.1016/S0167-7012(00)00122-6",
"article-title": "A modified microtiter-plate test for quantification of staphylococcal biofilm formation",
"volume": "40",
"author": "S Stepanović",
"year": "2000",
"journal-title": "J Microbiol Methods"
},
{
"issue": "2",
"key": "pone.0233284.ref034",
"doi-asserted-by": "crossref",
"first-page": "225",
"DOI": "10.1111/j.1574-695X.2011.00806.x",
"article-title": "Characterization of Staphylococcus aureus strains involved in human and bovine mastitis",
"volume": "62",
"author": "S Delgado",
"year": "2011",
"journal-title": "FEMS Immunol Med Microbiol"
},
{
"key": "pone.0233284.ref035",
"doi-asserted-by": "crossref",
"first-page": "53",
"DOI": "10.2174/1874285801711010053",
"article-title": "Understanding the mechanism of bacterial biofilms resistance to antimicrobial agents",
"volume": "11",
"author": "S Singh",
"year": "2017",
"journal-title": "Open Microbiol J"
},
{
"issue": "1",
"key": "pone.0233284.ref036",
"doi-asserted-by": "crossref",
"first-page": "61",
"DOI": "10.2174/1389203053027584",
"article-title": "Bacterial lantibiotics: strategies to improve therapeutic potential",
"volume": "6",
"author": "PD Cotter",
"year": "2005",
"journal-title": "Curr Protein Pept Sci"
},
{
"issue": "5",
"key": "pone.0233284.ref037",
"doi-asserted-by": "crossref",
"first-page": "494",
"DOI": "10.1016/j.ijantimicag.2015.07.011",
"article-title": "Bacteriocins and their position in the next wave of conventional antibiotics",
"volume": "46",
"author": "VL Cavera",
"year": "2015",
"journal-title": "Int J Antimicrob Agents"
},
{
"key": "pone.0233284.ref038",
"doi-asserted-by": "crossref",
"first-page": "1363",
"DOI": "10.3389/fmicb.2015.01363",
"article-title": "Bioengineering lantibiotics for therapeutic success",
"volume": "6",
"author": "D Field",
"year": "2015b",
"journal-title": "Front Microbiol"
},
{
"issue": "11",
"key": "pone.0233284.ref039",
"doi-asserted-by": "crossref",
"first-page": "5572",
"DOI": "10.1128/AAC.00888-13",
"article-title": "Effects of bacteriocins on methicillin-resistant Staphylococcus aureus biofilm",
"volume": "57",
"author": "K Okuda",
"year": "2013",
"journal-title": "Antimicrob Agents Chemother"
},
{
"issue": "6",
"key": "pone.0233284.ref040",
"doi-asserted-by": "crossref",
"first-page": "511",
"DOI": "10.1159/000335598",
"article-title": "In vitro activities of nisin alone or in combination with vancomycin and ciprofloxacin against methicillin-resistant and methicillin-susceptible Staphylococcus aureus strains",
"volume": "57",
"author": "S Dosler",
"year": "2011",
"journal-title": "Chemotherapy"
},
{
"issue": "18",
"key": "pone.0233284.ref041",
"doi-asserted-by": "crossref",
"first-page": "5809",
"DOI": "10.1128/AEM.01104-07",
"article-title": "Dissection and modulation of the four distinct activities of nisin by mutagenesis of rings A and B and by C-terminal truncation",
"volume": "73",
"author": "R Rink",
"year": "2007",
"journal-title": "Appl Environ Microbiol"
},
{
"issue": "6",
"key": "pone.0233284.ref042",
"doi-asserted-by": "crossref",
"first-page": "806",
"DOI": "10.1007/s00253-004-1599-1",
"article-title": "Site-directed mutagenesis of the hinge region of nisinZ and properties of nisinZ mutants",
"volume": "64",
"author": "J Yuan",
"year": "2004",
"journal-title": "Appl Microbiol Biotechnol"
},
{
"key": "pone.0233284.ref043",
"doi-asserted-by": "crossref",
"first-page": "508",
"DOI": "10.3389/fmicb.2016.00508",
"article-title": "In vitro activities of nisin and nisin derivatives alone and in combination with antibiotics against Staphylococcus biofilms",
"volume": "7",
"author": "D Field",
"year": "2016",
"journal-title": "Front Microbiol"
},
{
"issue": "18",
"key": "pone.0233284.ref044",
"doi-asserted-by": "crossref",
"first-page": "1573",
"DOI": "10.2217/fmb-2019-0153",
"article-title": "Nisin Z and lacticin 3147 improve efficacy of antibiotics against clinically significant bacteria",
"volume": "14",
"author": "JC Ellis",
"year": "2020",
"journal-title": "Future Microbiol"
},
{
"issue": "3",
"key": "pone.0233284.ref045",
"doi-asserted-by": "crossref",
"first-page": "311",
"DOI": "10.1177/0890334408317435",
"article-title": "The bacteriocin nisin, an effective agent for the treatment of staphylococcal mastitis during lactation",
"volume": "24",
"author": "L Fernández",
"year": "2008",
"journal-title": "J Hum Lact"
},
{
"issue": "1",
"key": "pone.0233284.ref046",
"doi-asserted-by": "crossref",
"first-page": "33",
"DOI": "10.1159/000272223",
"article-title": "Inflammatory breast diseases during lactation: milk stasis, puerperal mastitis, abscesses of the breast, and malignant tumorscurrent and evidence-based strategies for diagnosis and therapy",
"volume": "5",
"author": "M Abou-Dakn",
"year": "2010",
"journal-title": "Breast Care"
},
{
"issue": "6",
"key": "pone.0233284.ref047",
"doi-asserted-by": "crossref",
"first-page": "430",
"DOI": "10.1007/s12262-012-0776-1",
"article-title": "Management of lactational mastitis and breast abscesses: review of current knowledge and practice",
"volume": "75",
"author": "K Kataria",
"year": "2013",
"journal-title": "Indian J Surg"
},
{
"issue": "2",
"key": "pone.0233284.ref048",
"doi-asserted-by": "crossref",
"first-page": "77",
"DOI": "10.1016/j.micres.2012.09.004",
"article-title": "Genotypic diversity and virulent factors of Staphylococcus epidermidis isolated from human breast milk",
"volume": "168",
"author": "J Begović",
"year": "2013",
"journal-title": "Microbiol Res"
},
{
"issue": "Pt 8",
"key": "pone.0233284.ref049",
"doi-asserted-by": "crossref",
"first-page": "761",
"DOI": "10.1099/jmm.0.05453-0",
"article-title": "Antimicrobial-resistance and enterotoxin-encoding genes among staphylococci isolated from expressed human breast milk",
"volume": "53",
"author": "LA Carneiro",
"year": "2004",
"journal-title": "J Med Microbiol"
},
{
"issue": "2",
"key": "pone.0233284.ref050",
"doi-asserted-by": "crossref",
"first-page": "113",
"DOI": "10.1007/s00284-015-0925-4",
"article-title": "Antibiotic susceptibility of commensal bacteria from human milk",
"volume": "72",
"author": "PW Chen",
"year": "2016",
"journal-title": "Curr Microbiol"
},
{
"key": "pone.0233284.ref051",
"doi-asserted-by": "crossref",
"first-page": "2512",
"DOI": "10.3389/fmicb.2018.02512",
"article-title": "Microbial community dynamics in mother's milk and infant's mouth and gut in moderately preterm infants",
"volume": "9",
"author": "E Biagi",
"year": "2018",
"journal-title": "Front Microbiol"
},
{
"key": "pone.0233284.ref052",
"doi-asserted-by": "crossref",
"first-page": "4",
"DOI": "10.3410/M4-4",
"article-title": "Reduced vancomycin susceptibility among clinical Staphylococcus aureus isolates ('the MIC Creep'): implications for therapy",
"volume": "4",
"author": "A Dhand",
"year": "2012",
"journal-title": "F1000 Med Rep"
},
{
"issue": "12",
"key": "pone.0233284.ref053",
"doi-asserted-by": "crossref",
"first-page": "1112",
"DOI": "10.1136/jcp.2009.069021",
"article-title": "Low concentrations of vancomycin stimulate biofilm formation in some clinical isolates of Staphylococcus epidermidis",
"volume": "62",
"author": "JS Cargill",
"year": "2009",
"journal-title": "J Clin Pathol"
},
{
"issue": "2",
"key": "pone.0233284.ref054",
"doi-asserted-by": "crossref",
"first-page": "191",
"DOI": "10.1002/jobm.201000221",
"article-title": "Effect of sub-lethal doses of vancomycin and oxacillin on biofilm formation by vancomycin intermediate resistant Staphylococcus aureus",
"volume": "51",
"author": "ZA Mirani",
"year": "2011",
"journal-title": "J Basic Microbiol"
},
{
"key": "pone.0233284.ref055",
"doi-asserted-by": "crossref",
"first-page": "225",
"DOI": "10.1016/j.micpath.2017.07.004",
"article-title": "Vancomycin-induced biofilm formation by methicillin-resistant Staphylococcus aureus is associated with the secretion of membrane vesicles",
"volume": "110",
"author": "X He",
"year": "2017",
"journal-title": "Microb Pathog"
},
{
"issue": "9",
"key": "pone.0233284.ref056",
"doi-asserted-by": "crossref",
"first-page": "1627",
"DOI": "10.4315/0362-028X.JFP-12-001",
"article-title": "Effects of nisin and lysozyme on growth inhibition and biofilm formation capacity of Staphylococcus aureus strains isolated from raw milk and cheese samples",
"volume": "75",
"author": "M Sudagidan",
"year": "2012",
"journal-title": "J Food Prot"
},
{
"issue": "3",
"key": "pone.0233284.ref057",
"doi-asserted-by": "crossref",
"first-page": "253",
"DOI": "10.1016/j.ijfoodmicro.2008.01.011",
"article-title": "Nisin-bacteriophage cross-resistance in Staphylococcus aureus",
"volume": "122",
"author": "B Martinez",
"year": "2008",
"journal-title": "Int J Food Microbiol"
},
{
"issue": "1",
"key": "pone.0233284.ref058",
"doi-asserted-by": "crossref",
"first-page": "82",
"DOI": "10.2146/ajhp080434",
"article-title": "Therapeutic monitoring of vancomycin in adult patients: a consensus review of the american society of health-system pharmacists, the infectious diseases society of america, and the society of infectious diseases pharmacists",
"volume": "66",
"author": "M Rybak",
"year": "2009",
"journal-title": "Am J Health Syst Pharm"
},
{
"issue": "2",
"key": "pone.0233284.ref059",
"doi-asserted-by": "crossref",
"first-page": "277",
"DOI": "10.1111/j.1574-695X.2007.00300.x",
"article-title": "Increased tolerance of Staphylococcus aureus to vancomycin in viscous media",
"volume": "51",
"author": "V Kostenko",
"year": "2007",
"journal-title": "FEMS Immunol Med Microbiol"
},
{
"key": "pone.0233284.ref060",
"first-page": "107",
"article-title": "Multidrug tolerance of biofilms and persister cells",
"volume": "322",
"author": "K. Lewis",
"year": "2008",
"journal-title": "Curr Top Microbiol Immunol"
},
{
"issue": "6",
"key": "pone.0233284.ref061",
"doi-asserted-by": "crossref",
"first-page": "ftw056",
"DOI": "10.1093/femspd/ftw056",
"article-title": "Penetration barrier contributes to bacterial biofilm-associated resistance against only select antibiotics, and exhibits genus-, strain- and antibiotic-specific differences",
"volume": "74",
"author": "R Singh",
"year": "2016",
"journal-title": "Pathog Dis"
},
{
"issue": "12",
"key": "pone.0233284.ref062",
"doi-asserted-by": "crossref",
"first-page": "7273",
"DOI": "10.1128/AAC.03132-14",
"article-title": "Extracellular DNA impedes the transport of vancomycin in Staphylococcus epidermidis biofilms preexposed to subinhibitory concentrations of vancomycin",
"volume": "58",
"author": "N Doroshenko",
"year": "2014",
"journal-title": "Antimicrob Agents Chemotherapy"
},
{
"issue": "1",
"key": "pone.0233284.ref063",
"doi-asserted-by": "crossref",
"first-page": "46",
"DOI": "10.1007/s00776-005-0968-7",
"article-title": "Antimicrobial susceptibility of Staphylococcus aureus and Staphylococcus epidermidis biofilms isolated from infected total hip arthroplasty cases",
"volume": "11",
"author": "S Nishimura",
"year": "2006",
"journal-title": "J Orthop Sci"
}
],
"container-title": [
"PLOS ONE"
],
"original-title": [
],
"language": "en",
"link": [
{
"URL": "https://dx.plos.org/10.1371/journal.pone.0233284",
"content-type": "unspecified",
"content-version": "vor",
"intended-application": "similarity-checking"
}
],
"deposited": {
"date-parts": [
[
2020,
5,
29
]
],
"date-time": "2020-05-29T17:54:37Z",
"timestamp": 1590774877000
},
"score": 1,
"resource": {
"primary": {
"URL": "https://dx.plos.org/10.1371/journal.pone.0233284"
}
},
"subtitle": [
],
"editor": [
{
"given": "Rita G.",
"family": "Sobral",
"sequence": "first",
"affiliation": [
]
}
],
"short-title": [
],
"issued": {
"date-parts": [
[
2020,
5,
29
]
]
},
"references-count": 63,
"journal-issue": {
"issue": "5",
"published-online": {
"date-parts": [
[
2020,
5,
29
]
]
}
},
"URL": "http://dx.doi.org/10.1371/journal.pone.0233284",
"relation": {
},
"ISSN": [
"1932-6203"
],
"issn-type": [
{
"value": "1932-6203",
"type": "electronic"
}
],
"subject": [
"Multidisciplinary"
],
"published": {
"date-parts": [
[
2020,
5,
29
]
]
}
}

View File

@ -461,6 +461,86 @@ class CrossrefMappingTest {
} }
@Test
def testConvertArticleFromCrossRef2OafSFI(): Unit = {
val json = Source
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/sfi_funded_article.json"))
.mkString
assertNotNull(json)
assertFalse(json.isEmpty);
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
assertTrue(resultList.nonEmpty)
val items = resultList.filter(p => p.isInstanceOf[Publication])
assert(items.nonEmpty)
assert(items.size == 1)
val result: Result = items.head.asInstanceOf[Publication]
assertNotNull(result)
logger.info(mapper.writeValueAsString(result));
assertNotNull(result.getDataInfo, "Datainfo test not null Failed");
assertNotNull(
result.getDataInfo.getProvenanceaction,
"DataInfo/Provenance test not null Failed"
);
assertFalse(
result.getDataInfo.getProvenanceaction.getClassid.isEmpty,
"DataInfo/Provenance/classId test not null Failed"
);
assertFalse(
result.getDataInfo.getProvenanceaction.getClassname.isEmpty,
"DataInfo/Provenance/className test not null Failed"
);
assertFalse(
result.getDataInfo.getProvenanceaction.getSchemeid.isEmpty,
"DataInfo/Provenance/SchemeId test not null Failed"
);
assertFalse(
result.getDataInfo.getProvenanceaction.getSchemename.isEmpty,
"DataInfo/Provenance/SchemeName test not null Failed"
);
assertNotNull(result.getCollectedfrom, "CollectedFrom test not null Failed");
assertFalse(result.getCollectedfrom.isEmpty);
val collectedFromList = result.getCollectedfrom.asScala
assert(
collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")),
"Wrong collected from assertion"
)
assert(
collectedFromList.exists(c => c.getValue.equalsIgnoreCase("crossref")),
"Wrong collected from assertion"
)
val relevantDates = result.getRelevantdate.asScala
assert(
relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("created")),
"Missing relevant date of type created"
)
val rels = resultList.filter(p => p.isInstanceOf[Relation]).asInstanceOf[List[Relation]]
assertFalse(rels.isEmpty)
rels.foreach(relation => {
assertNotNull(relation)
assertFalse(relation.getSource.isEmpty)
assertFalse(relation.getTarget.isEmpty)
assertFalse(relation.getRelClass.isEmpty)
assertFalse(relation.getRelType.isEmpty)
assertFalse(relation.getSubRelType.isEmpty)
})
}
@Test @Test
def testSetDateOfAcceptanceCrossRef2Oaf(): Unit = { def testSetDateOfAcceptanceCrossRef2Oaf(): Unit = {

View File

@ -3,7 +3,7 @@
<parent> <parent>
<artifactId>dhp-workflows</artifactId> <artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
@ -51,7 +51,7 @@
<dependency> <dependency>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-aggregation</artifactId> <artifactId>dhp-aggregation</artifactId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
<scope>compile</scope> <scope>compile</scope>
</dependency> </dependency>

View File

@ -8,6 +8,7 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Optional; import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
@ -56,7 +57,9 @@ public class PrepareResultOrcidAssociationStep1 {
final String resultClassName = parser.get("resultTableName"); final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName); log.info("resultTableName: {}", resultClassName);
final List<String> allowedsemrel = Arrays.asList(parser.get("allowedsemrels").split(";")); final List<String> allowedsemrel = Arrays.stream(parser.get("allowedsemrels").split(";"))
.map(s -> s.toLowerCase()).collect(Collectors.toList());
log.info("allowedSemRel: {}", new Gson().toJson(allowedsemrel)); log.info("allowedSemRel: {}", new Gson().toJson(allowedsemrel));
final List<String> allowedPids = Arrays.asList(parser.get("allowedpids").split(";")); final List<String> allowedPids = Arrays.asList(parser.get("allowedpids").split(";"));
@ -122,28 +125,27 @@ public class PrepareResultOrcidAssociationStep1 {
Dataset<R> result = readPath(spark, outputPath + "/resultSubset", resultClazz); Dataset<R> result = readPath(spark, outputPath + "/resultSubset", resultClazz);
result result
.joinWith(relation, result.col("id").equalTo(relation.col("source"))) .joinWith(relation, result.col("id").equalTo(relation.col("source")))
.map((MapFunction<Tuple2<R, Relation>, ResultOrcidList>) t2 -> { .map((MapFunction<Tuple2<R, Relation>, ResultOrcidList>) t2 -> {
ResultOrcidList rol = new ResultOrcidList(); ResultOrcidList rol = new ResultOrcidList();
rol.setResultId(t2._2().getTarget()); rol.setResultId(t2._2().getTarget());
List<AutoritativeAuthor> aal = new ArrayList<>(); List<AutoritativeAuthor> aal = new ArrayList<>();
t2._1().getAuthor().stream().forEach(a -> { t2._1().getAuthor().stream().forEach(a -> {
a.getPid().stream().forEach(p -> { a.getPid().stream().forEach(p -> {
if (allowedPids.contains(p.getQualifier().getClassid().toLowerCase())) { if (allowedPids.contains(p.getQualifier().getClassid().toLowerCase())) {
aal aal
.add( .add(
AutoritativeAuthor AutoritativeAuthor
.newInstance(a.getName(), a.getSurname(), a.getFullname(), p.getValue())); .newInstance(a.getName(), a.getSurname(), a.getFullname(), p.getValue()));
} }
});
}); });
}); return rol;
return rol; }, Encoders.bean(ResultOrcidList.class)).write()
}, Encoders.bean(ResultOrcidList.class))
.write()
.option("compression", "gzip") .option("compression", "gzip")
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.json(outputPath + "/" + resultType); .json(outputPath + "/" + resultType);
;
} }

View File

@ -10,6 +10,8 @@ import java.util.Set;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.sql.*; import org.apache.spark.sql.*;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -63,33 +65,30 @@ public class PrepareResultOrcidAssociationStep2 {
.union(readPath(spark, inputPath + "/software", ResultOrcidList.class)); .union(readPath(spark, inputPath + "/software", ResultOrcidList.class));
resultOrcidAssoc resultOrcidAssoc
.toJavaRDD() .groupByKey((MapFunction<ResultOrcidList, String>) rol -> rol.getResultId(), Encoders.STRING())
.mapToPair(r -> new Tuple2<>(r.getResultId(), r)) .mapGroups((MapGroupsFunction<String, ResultOrcidList, ResultOrcidList>) (k, it) ->{
.reduceByKey( ResultOrcidList resultOrcidList = it.next();
(a, b) -> { if(it.hasNext())
if (a == null) { {
return b;
}
if (b == null) {
return a;
}
Set<String> orcid_set = new HashSet<>(); Set<String> orcid_set = new HashSet<>();
a.getAuthorList().stream().forEach(aa -> orcid_set.add(aa.getOrcid())); resultOrcidList.getAuthorList().stream().forEach(aa -> orcid_set.add(aa.getOrcid()));
b it.forEachRemaining(val -> val
.getAuthorList() .getAuthorList()
.stream() .stream()
.forEach( .forEach(
aa -> { aa -> {
if (!orcid_set.contains(aa.getOrcid())) { if (!orcid_set.contains(aa.getOrcid())) {
a.getAuthorList().add(aa); resultOrcidList.getAuthorList().add(aa);
orcid_set.add(aa.getOrcid()); orcid_set.add(aa.getOrcid());
} }
}); }));
return a; }
}) return resultOrcidList;
.map(Tuple2::_2) },Encoders.bean(ResultOrcidList.class) )
.map(r -> OBJECT_MAPPER.writeValueAsString(r)) .write()
.saveAsTextFile(outputPath, GzipCodec.class); .mode(SaveMode.Overwrite)
.option("compression","gzip")
.json(outputPath);
} }
} }

View File

@ -3,6 +3,7 @@ package eu.dnetlib.dhp.orcidtoresultfromsemrel;
import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.PropagationConstant.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.List; import java.util.List;
import java.util.Optional; import java.util.Optional;
@ -59,9 +60,9 @@ public class SparkOrcidToResultFromSemRelJob {
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName); Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
runWithSparkHiveSession(
runWithSparkSession(
conf, conf,
isSparkSessionManaged, isSparkSessionManaged,
spark -> { spark -> {

View File

@ -11,10 +11,7 @@ import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.*;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -159,8 +156,14 @@ public class SparkResultToOrganizationFromIstRepoJob {
Dataset<R> result = readPath(spark, inputPath, resultClazz); Dataset<R> result = readPath(spark, inputPath, resultClazz);
result.createOrReplaceTempView("result"); result.createOrReplaceTempView("result");
createCfHbforResult(spark);
Dataset<Row> cfhb = spark.sql("select distinct r.id, inst.collectedfrom.key cf, inst.hostedby.key hb "
+
"from result r " +
"lateral view explode(instance) i as inst " +
"where r.datainfo.deletedbyinference=false");
//createCfHbforResult(spark);
cfhb.createOrReplaceTempView("cfhb");
dsOrg.createOrReplaceTempView("rels"); dsOrg.createOrReplaceTempView("rels");
return spark return spark

View File

@ -228,8 +228,12 @@
--conf spark.sql.shuffle.partitions=3840 --conf spark.sql.shuffle.partitions=3840
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg> <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
<<<<<<< HEAD:dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml
<arg>--preparedInfoPath</arg><arg>${workingDir}/country/publication</arg> <arg>--preparedInfoPath</arg><arg>${workingDir}/country/publication</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg> <arg>--saveGraph</arg><arg>${saveGraph}</arg>
=======
<arg>--preparedInfoPath</arg><arg>${workingDir}/publication</arg>
>>>>>>> beta:dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${outputPath}/publication</arg> <arg>--outputPath</arg><arg>${outputPath}/publication</arg>
</spark> </spark>
@ -258,8 +262,12 @@
--conf spark.sql.shuffle.partitions=3840 --conf spark.sql.shuffle.partitions=3840
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg> <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<<<<<<< HEAD:dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml
<arg>--preparedInfoPath</arg><arg>${workingDir}/country/dataset</arg> <arg>--preparedInfoPath</arg><arg>${workingDir}/country/dataset</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg> <arg>--saveGraph</arg><arg>${saveGraph}</arg>
=======
<arg>--preparedInfoPath</arg><arg>${workingDir}/dataset</arg>
>>>>>>> beta:dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg> <arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
</spark> </spark>
@ -288,8 +296,12 @@
--conf spark.sql.shuffle.partitions=3840 --conf spark.sql.shuffle.partitions=3840
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg> <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<<<<<<< HEAD:dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml
<arg>--preparedInfoPath</arg><arg>${workingDir}/country/otherresearchproduct</arg> <arg>--preparedInfoPath</arg><arg>${workingDir}/country/otherresearchproduct</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg> <arg>--saveGraph</arg><arg>${saveGraph}</arg>
=======
<arg>--preparedInfoPath</arg><arg>${workingDir}/otherresearchproduct</arg>
>>>>>>> beta:dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg> <arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
</spark> </spark>
@ -319,7 +331,6 @@
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg> <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/software</arg> <arg>--preparedInfoPath</arg><arg>${workingDir}/software</arg>
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${outputPath}/software</arg> <arg>--outputPath</arg><arg>${outputPath}/software</arg>
</spark> </spark>

View File

@ -5,9 +5,9 @@ import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import eu.dnetlib.dhp.schema.oaf.Relation;
import com.google.gson.Gson;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.neethi.Assertion;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
@ -22,6 +22,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Dataset; import eu.dnetlib.dhp.schema.oaf.Dataset;
@ -63,8 +64,7 @@ public class PrepareStep1Test {
} }
@Test @Test
void noUpdateTest() throws Exception { void noMatchTest() throws Exception {
//7 relationi fra issupplementedby e issupplementto
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/orcidtoresultfromsemrel/preparestep1") .getResource("/eu/dnetlib/dhp/orcidtoresultfromsemrel/preparestep1")
@ -77,19 +77,25 @@ public class PrepareStep1Test {
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-resultTableName", Dataset.class.getCanonicalName(), "-resultTableName", Dataset.class.getCanonicalName(),
"-outputPath", workingDir.toString() + "/preparedInfo", "-outputPath", workingDir.toString() + "/preparedInfo",
"-allowedsemrels", "IsSupplementedBy;IsSupplementTo", "-allowedsemrels", "IsSupplementedBy;IsSupplementTo",
"-allowedpids","orcid;orcid_pending" "-allowedpids", "orcid;orcid_pending"
}); });
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<ResultOrcidList> tmp = sc JavaRDD<ResultOrcidList> tmp = sc
.textFile(workingDir.toString() + "/preparedInfo") .textFile(workingDir.toString() + "/preparedInfo/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, ResultOrcidList.class)); .map(item -> OBJECT_MAPPER.readValue(item, ResultOrcidList.class));
System.out.println("***************** COUNT ********************* \n" + tmp.count()); Assertions.assertEquals(0, tmp.count());
tmp.map(s -> new Gson().toJson(s)).foreach(s -> System.out.println(s));
Assertions.assertEquals(7, sc
.textFile(workingDir.toString() + "/preparedInfo/relationSubset")
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class)).count());
Assertions.assertEquals(0, sc
.textFile(workingDir.toString() + "/preparedInfo/resultSubset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)).count());
} }

View File

@ -11,3 +11,4 @@
{u'dataInfo': {u'invisible': False, u'provenanceaction': {u'classid': u'sysimport:crosswalk:entityregistry', u'classname': u'Harvested', u'schemeid': u'dnet:provenanceActions', u'schemename': u'dnet:provenanceActions'}, u'trust': u'0.91', u'inferred': False, u'deletedbyinference': False}, u'qualifier': {u'classid': u'orcid', u'classname': u'Open Researcher and Contributor ID', u'schemeid': u'dnet:pid_types', u'schemename': u'dnet:pid_types'}, u'value': u'0000-0002-5001-6911'}

View File

@ -3,7 +3,7 @@
<parent> <parent>
<artifactId>dhp-workflows</artifactId> <artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>

View File

@ -4,8 +4,6 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap.model.doaj;
import java.io.Serializable; import java.io.Serializable;
import java.util.List; import java.util.List;
import com.fasterxml.jackson.annotation.JsonIgnore;
public class BibJson implements Serializable { public class BibJson implements Serializable {
private Editorial editorial; private Editorial editorial;
private PidScheme pid_scheme; private PidScheme pid_scheme;

View File

@ -42,10 +42,7 @@ import java.io.IOException;
import java.sql.Array; import java.sql.Array;
import java.sql.ResultSet; import java.sql.ResultSet;
import java.sql.SQLException; import java.sql.SQLException;
import java.util.ArrayList; import java.util.*;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.function.Consumer; import java.util.function.Consumer;
import java.util.function.Function; import java.util.function.Function;
import java.util.function.Predicate; import java.util.function.Predicate;
@ -311,7 +308,13 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
ds.setContentpolicies(prepareListOfQualifiers(rs.getArray("contentpolicies"))); ds.setContentpolicies(prepareListOfQualifiers(rs.getArray("contentpolicies")));
ds.setConsenttermsofuse(rs.getBoolean("consenttermsofuse")); ds.setConsenttermsofuse(rs.getBoolean("consenttermsofuse"));
ds.setFulltextdownload(rs.getBoolean("fulltextdownload")); ds.setFulltextdownload(rs.getBoolean("fulltextdownload"));
ds.setConsenttermsofusedate(rs.getDate("consenttermsofusedate").toString()); ds
.setConsenttermsofusedate(
Optional
.ofNullable(
rs.getDate("consenttermsofusedate"))
.map(c -> c.toString())
.orElse(null));
return Arrays.asList(ds); return Arrays.asList(ds);
} catch (final Exception e) { } catch (final Exception e) {

View File

@ -3,7 +3,7 @@
<parent> <parent>
<artifactId>dhp-workflows</artifactId> <artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>

View File

@ -3,7 +3,7 @@
<parent> <parent>
<artifactId>dhp-workflows</artifactId> <artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<artifactId>dhp-stats-promote</artifactId> <artifactId>dhp-stats-promote</artifactId>

View File

@ -3,7 +3,7 @@
<parent> <parent>
<artifactId>dhp-workflows</artifactId> <artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<artifactId>dhp-stats-update</artifactId> <artifactId>dhp-stats-update</artifactId>

View File

@ -12,6 +12,8 @@ and (ri.accessright = 'Open Access'
or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp or ri.accessright = 'Embargo' or ri.accessright = 'Open Source')) tmp
on p.id= tmp.id; on p.id= tmp.id;
compute stats indi_pub_green_oa;
create table indi_pub_grey_lit stored as parquet as create table indi_pub_grey_lit stored as parquet as
select distinct p.id, coalesce(grey_lit, 0) as grey_lit select distinct p.id, coalesce(grey_lit, 0) as grey_lit
from publication p from publication p
@ -22,6 +24,8 @@ join result_classifications rt on rt.id = p.id
where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object') and where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object') and
not exists (select 1 from result_classifications rc where type ='Other literature type' and rc.id=p.id)) tmp on p.id=tmp.id; not exists (select 1 from result_classifications rc where type ='Other literature type' and rc.id=p.id)) tmp on p.id=tmp.id;
compute stats indi_pub_grey_lit;
create table indi_pub_doi_from_crossref stored as parquet as create table indi_pub_doi_from_crossref stored as parquet as
select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref
from publication p from publication p
@ -31,6 +35,7 @@ join datasource d on d.id = ri.collectedfrom
where pidtype='Digital Object Identifier' and d.name ='Crossref') tmp where pidtype='Digital Object Identifier' and d.name ='Crossref') tmp
on tmp.id=p.id; on tmp.id=p.id;
compute stats indi_pub_doi_from_crossref;
---- Sprint 2 ---- ---- Sprint 2 ----
create table indi_result_has_cc_licence stored as parquet as create table indi_result_has_cc_licence stored as parquet as
select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license
@ -40,6 +45,8 @@ join result_licenses as license on license.id = r.id
where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp
on r.id= tmp.id; on r.id= tmp.id;
compute stats indi_result_has_cc_licence;
create table indi_result_has_cc_licence_url stored as parquet as create table indi_result_has_cc_licence_url stored as parquet as
select distinct r.id, case when lic_host='' or lic_host is null then 0 else 1 end as has_cc_license_url select distinct r.id, case when lic_host='' or lic_host is null then 0 else 1 end as has_cc_license_url
from result r from result r
@ -49,16 +56,21 @@ join result_licenses as license on license.id = r.id
WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp
on r.id= tmp.id; on r.id= tmp.id;
compute stats indi_result_has_cc_licence_url;
create table indi_pub_has_abstract stored as parquet as create table indi_pub_has_abstract stored as parquet as
select distinct publication.id, coalesce(abstract, 1) has_abstract select distinct publication.id, coalesce(abstract, 1) has_abstract
from publication; from publication;
compute stats indi_pub_has_abstract;
create table indi_result_with_orcid stored as parquet as create table indi_result_with_orcid stored as parquet as
select distinct r.id, coalesce(has_orcid, 0) as has_orcid select distinct r.id, coalesce(has_orcid, 0) as has_orcid
from result r from result r
left outer join (select id, 1 as has_orcid from result_orcid) tmp left outer join (select id, 1 as has_orcid from result_orcid) tmp
on r.id= tmp.id; on r.id= tmp.id;
compute stats indi_result_with_orcid;
---- Sprint 3 ---- ---- Sprint 3 ----
create table indi_funded_result_with_fundref stored as parquet as create table indi_funded_result_with_fundref stored as parquet as
@ -68,6 +80,8 @@ left outer join (select distinct id, 1 as fundref from project_results
where provenance='Harvested') tmp where provenance='Harvested') tmp
on r.id= tmp.id; on r.id= tmp.id;
compute stats indi_funded_result_with_fundref;
create table indi_result_org_country_collab stored as parquet as create table indi_result_org_country_collab stored as parquet as
with tmp as with tmp as
(select o.id as id, o.country , ro.id as result,r.type from organization o (select o.id as id, o.country , ro.id as result,r.type from organization o
@ -79,6 +93,8 @@ join tmp as o2 on o1.result=o2.result
where o1.id<>o2.id and o1.country<>o2.country where o1.id<>o2.id and o1.country<>o2.country
group by o1.id, o1.type,o2.country; group by o1.id, o1.type,o2.country;
compute stats indi_result_org_country_collab;
create table indi_result_org_collab stored as parquet as create table indi_result_org_collab stored as parquet as
with tmp as with tmp as
(select o.id, ro.id as result,r.type from organization o (select o.id, ro.id as result,r.type from organization o
@ -90,6 +106,8 @@ join tmp as o2 on o1.result=o2.result
where o1.id<>o2.id where o1.id<>o2.id
group by o1.id, o2.id, o1.type; group by o1.id, o2.id, o1.type;
compute stats indi_result_org_collab;
create table indi_funder_country_collab stored as parquet as create table indi_funder_country_collab stored as parquet as
with tmp as (select funder, project, country from organization_projects op with tmp as (select funder, project, country from organization_projects op
join organization o on o.id=op.id join organization o on o.id=op.id
@ -101,6 +119,8 @@ join tmp as f2 on f1.project=f2.project
where f1.country<>f2.country where f1.country<>f2.country
group by f1.funder, f2.country, f1.country; group by f1.funder, f2.country, f1.country;
compute stats indi_funder_country_collab;
create table indi_result_country_collab stored as parquet as create table indi_result_country_collab stored as parquet as
with tmp as with tmp as
(select country, ro.id as result,r.type from organization o (select country, ro.id as result,r.type from organization o
@ -112,6 +132,8 @@ join tmp as o2 on o1.result=o2.result
where o1.country<>o2.country where o1.country<>o2.country
group by o1.country, o2.country, o1.type; group by o1.country, o2.country, o1.type;
compute stats indi_result_country_collab;
---- Sprint 4 ---- ---- Sprint 4 ----
create table indi_pub_diamond stored as parquet as create table indi_pub_diamond stored as parquet as
select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal
@ -123,6 +145,8 @@ join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_onli
and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp
on pd.id=tmp.id; on pd.id=tmp.id;
compute stats indi_pub_diamond;
create table indi_pub_hybrid stored as parquet as create table indi_pub_hybrid stored as parquet as
select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid select distinct pd.id, coalesce(is_hybrid, 0) as is_hybrid
from publication_datasources pd from publication_datasources pd
@ -133,6 +157,8 @@ join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_onli
and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp and (ps.journal_is_in_doaj=false and ps.journal_is_oa=false)) tmp
on pd.id=tmp.id; on pd.id=tmp.id;
compute stats indi_pub_hybrid;
create table indi_pub_in_transformative stored as parquet as create table indi_pub_in_transformative stored as parquet as
select distinct pd.id, coalesce(is_transformative, 0) as is_transformative select distinct pd.id, coalesce(is_transformative, 0) as is_transformative
from publication pd from publication pd
@ -143,6 +169,8 @@ join stats_ext.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_onli
and ps.is_transformative_journal=true) tmp and ps.is_transformative_journal=true) tmp
on pd.id=tmp.id; on pd.id=tmp.id;
compute stats indi_pub_in_transformative;
create table indi_pub_closed_other_open stored as parquet as create table indi_pub_closed_other_open stored as parquet as
select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from result_instance ri select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from result_instance ri
left outer join left outer join
@ -153,11 +181,12 @@ where d.type like '%Journal%' and ri.accessright='Closed Access' and
(p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp (p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp
on tmp.id=ri.id; on tmp.id=ri.id;
compute stats indi_pub_closed_other_open;
---- Sprint 5 ---- ---- Sprint 5 ----
create table indi_result_no_of_copies stored as parquet as create table indi_result_no_of_copies stored as parquet as
select id, count(id) as number_of_copies from result_instance group by id; select id, count(id) as number_of_copies from result_instance group by id;
compute stats indi_result_no_of_copies;
---- Sprint 6 ---- ---- Sprint 6 ----
create table indi_pub_gold_oa stored as parquet as create table indi_pub_gold_oa stored as parquet as
WITH gold_oa AS ( WITH gold_oa AS (
@ -183,6 +212,8 @@ LEFT OUTER JOIN (
JOIN issn on issn.id=pd.datasource JOIN issn on issn.id=pd.datasource
JOIN gold_oa on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id; JOIN gold_oa on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id;
compute stats indi_pub_gold_oa;
create table indi_datasets_gold_oa stored as parquet as create table indi_datasets_gold_oa stored as parquet as
WITH gold_oa AS ( WITH gold_oa AS (
SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_1 as issn SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_1 as issn
@ -210,6 +241,8 @@ LEFT OUTER JOIN (
JOIN issn on issn.id=pd.datasource JOIN issn on issn.id=pd.datasource
JOIN gold_oa on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id; JOIN gold_oa on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id;
compute stats indi_datasets_gold_oa;
create table indi_software_gold_oa stored as parquet as create table indi_software_gold_oa stored as parquet as
WITH gold_oa AS ( WITH gold_oa AS (
SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_1 as issn SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_1 as issn
@ -237,6 +270,8 @@ LEFT OUTER JOIN (
JOIN issn on issn.id=pd.datasource JOIN issn on issn.id=pd.datasource
JOIN gold_oa on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id; JOIN gold_oa on issn.issn = gold_oa.issn) tmp ON pd.id=tmp.id;
compute stats indi_software_gold_oa;
create table indi_org_findable stored as parquet as create table indi_org_findable stored as parquet as
with result_with_pid as ( with result_with_pid as (
select ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro select ro.organization organization, count(distinct rp.id) no_result_with_pid from result_organization ro
@ -263,6 +298,8 @@ join result_with_pid_share on result_with_pid_share.organization=allresults.orga
left outer join ( left outer join (
select organization, abstract_share from result_with_abstract_share) tmp on tmp.organization=allresults.organization; select organization, abstract_share from result_with_abstract_share) tmp on tmp.organization=allresults.organization;
compute stats indi_org_findable;
create table indi_org_openess stored as parquet as create table indi_org_openess stored as parquet as
WITH datasets_oa as ( WITH datasets_oa as (
SELECT ro.organization, count(dg.id) no_oadatasets FROM indi_datasets_gold_oa dg SELECT ro.organization, count(dg.id) no_oadatasets FROM indi_datasets_gold_oa dg
@ -313,6 +350,8 @@ left outer join (
left outer join ( left outer join (
select organization,s from allsoftwaresshare) tmp1 on tmp1.organization=allpubsshare.organization; select organization,s from allsoftwaresshare) tmp1 on tmp1.organization=allpubsshare.organization;
compute stats indi_org_openess;
create table indi_pub_hybrid_oa_with_cc stored as parquet as create table indi_pub_hybrid_oa_with_cc stored as parquet as
WITH hybrid_oa AS ( WITH hybrid_oa AS (
SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_print as issn SELECT issn_l, journal_is_in_doaj, journal_is_oa, issn_print as issn
@ -343,6 +382,8 @@ LEFT OUTER JOIN (
JOIN indi_result_has_cc_licence cc on pd.id=cc.id JOIN indi_result_has_cc_licence cc on pd.id=cc.id
where cc.has_cc_license=1) tmp on pd.id=tmp.id; where cc.has_cc_license=1) tmp on pd.id=tmp.id;
compute stats indi_pub_hybrid_oa_with_cc;
create table indi_pub_downloads stored as parquet as create table indi_pub_downloads stored as parquet as
SELECT result_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats SELECT result_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats
join publication on result_id=id join publication on result_id=id
@ -350,6 +391,8 @@ where downloads>0
GROUP BY result_id GROUP BY result_id
order by no_dowloads desc; order by no_dowloads desc;
compute stats indi_pub_downloads;
create table indi_pub_downloads_datasource stored as parquet as create table indi_pub_downloads_datasource stored as parquet as
SELECT result_id, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats SELECT result_id, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats
join publication on result_id=id join publication on result_id=id
@ -357,15 +400,21 @@ where downloads>0
GROUP BY result_id, repository_id GROUP BY result_id, repository_id
order by result_id; order by result_id;
compute stats indi_pub_downloads_datasource;
create table indi_pub_downloads_year stored as parquet as create table indi_pub_downloads_year stored as parquet as
SELECT result_id, substring(us.`date`, 1,4) as `year`, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us SELECT result_id, substring(us.`date`, 1,4) as `year`, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us
join publication on result_id=id where downloads>0 join publication on result_id=id where downloads>0
GROUP BY result_id, `year` GROUP BY result_id, `year`
order by `year` asc; order by `year` asc;
compute stats indi_pub_downloads_year;
create table indi_pub_downloads_datasource_year stored as parquet as create table indi_pub_downloads_datasource_year stored as parquet as
SELECT result_id, substring(us.`date`, 1,4) as `year`, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us SELECT result_id, substring(us.`date`, 1,4) as `year`, repository_id, sum(downloads) no_dowloads from openaire_prod_usage_stats.usage_stats us
join publication on result_id=id join publication on result_id=id
where downloads>0 where downloads>0
GROUP BY result_id, repository_id, `year` GROUP BY result_id, repository_id, `year`
order by `year` asc, result_id; order by `year` asc, result_id;
compute stats indi_pub_downloads_datasource_year;

View File

@ -3,7 +3,7 @@
<parent> <parent>
<artifactId>dhp-workflows</artifactId> <artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<artifactId>dhp-usage-raw-data-update</artifactId> <artifactId>dhp-usage-raw-data-update</artifactId>

View File

@ -3,7 +3,7 @@
<parent> <parent>
<artifactId>dhp-workflows</artifactId> <artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<artifactId>dhp-usage-stats-build</artifactId> <artifactId>dhp-usage-stats-build</artifactId>

View File

@ -3,7 +3,7 @@
<parent> <parent>
<artifactId>dhp-workflows</artifactId> <artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>

View File

@ -6,7 +6,7 @@
<parent> <parent>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp</artifactId> <artifactId>dhp</artifactId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath> <relativePath>../pom.xml</relativePath>
</parent> </parent>

View File

@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp</artifactId> <artifactId>dhp</artifactId>
<version>1.2.4-SNAPSHOT</version> <version>1.2.5-SNAPSHOT</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<licenses> <licenses>
@ -551,6 +551,9 @@
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId> <artifactId>maven-site-plugin</artifactId>
<version>3.9.1</version> <version>3.9.1</version>
<configuration>
<skip>${dhp.site.skip}</skip>
</configuration>
</plugin> </plugin>
<plugin> <plugin>
@ -791,6 +794,7 @@
<dhp.spark.version>2.4.0.cloudera2</dhp.spark.version> <dhp.spark.version>2.4.0.cloudera2</dhp.spark.version>
<dhp.jackson.version>2.9.6</dhp.jackson.version> <dhp.jackson.version>2.9.6</dhp.jackson.version>
<dhp.commons.lang.version>3.5</dhp.commons.lang.version> <dhp.commons.lang.version>3.5</dhp.commons.lang.version>
<dhp.site.skip>true</dhp.site.skip>
<dhp.guava.version>11.0.2</dhp.guava.version> <dhp.guava.version>11.0.2</dhp.guava.version>
<scala.version>2.11.12</scala.version> <scala.version>2.11.12</scala.version>
<junit-jupiter.version>5.6.1</junit-jupiter.version> <junit-jupiter.version>5.6.1</junit-jupiter.version>