Compare commits
17 Commits
main
...
enrichment
Author | SHA1 | Date |
---|---|---|
Miriam Baglioni | 481c4e28d1 | |
Miriam Baglioni | 2e6af7c655 | |
Miriam Baglioni | de9d0ace38 | |
Miriam Baglioni | b0969461f8 | |
Miriam Baglioni | 1e233bedf6 | |
Miriam Baglioni | 30e0f60ac8 | |
Miriam Baglioni | 7501e823ed | |
Miriam Baglioni | d205bf78d8 | |
Miriam Baglioni | 550e1a4e33 | |
Miriam Baglioni | 8a39a85a5f | |
Miriam Baglioni | d1519fa28f | |
Miriam Baglioni | aecea5a095 | |
Miriam Baglioni | 1a8641227d | |
Miriam Baglioni | 157e6bf5e1 | |
Miriam Baglioni | a6c26a9e0e | |
Miriam Baglioni | 9bd5310112 | |
Miriam Baglioni | 7406c88276 |
|
@ -0,0 +1,982 @@
|
|||
|
||||
{
|
||||
|
||||
"indexed": {
|
||||
"date-parts": [
|
||||
[
|
||||
2022,
|
||||
4,
|
||||
5
|
||||
]
|
||||
],
|
||||
"date-time": "2022-04-05T11:07:00Z",
|
||||
"timestamp": 1649156820730
|
||||
},
|
||||
"reference-count": 63,
|
||||
"publisher": "Public Library of Science (PLoS)",
|
||||
"issue": "5",
|
||||
"license": [
|
||||
{
|
||||
"start": {
|
||||
"date-parts": [
|
||||
[
|
||||
2020,
|
||||
5,
|
||||
29
|
||||
]
|
||||
],
|
||||
"date-time": "2020-05-29T00:00:00Z",
|
||||
"timestamp": 1590710400000
|
||||
},
|
||||
"content-version": "vor",
|
||||
"delay-in-days": 0,
|
||||
"URL": "http://creativecommons.org/licenses/by/4.0/"
|
||||
}
|
||||
],
|
||||
"funder": [
|
||||
{
|
||||
"DOI": "10.13039/501100001602",
|
||||
"name": "Science Foundation Ireland",
|
||||
"doi-asserted-by": "crossref",
|
||||
"award": [
|
||||
"SFI/12/RC/2273"
|
||||
]
|
||||
},
|
||||
{
|
||||
"DOI": "10.13039/501100001602",
|
||||
"name": "Science Foundation Ireland",
|
||||
"doi-asserted-by": "crossref",
|
||||
"award": [
|
||||
"SFI/12/RC/2273"
|
||||
]
|
||||
},
|
||||
{
|
||||
"DOI": "10.13039/501100001602",
|
||||
"name": "Science Foundation Ireland",
|
||||
"doi-asserted-by": "crossref",
|
||||
"award": [
|
||||
"SFI/12/RC/2273"
|
||||
]
|
||||
},
|
||||
{
|
||||
"DOI": "10.13039/501100001602",
|
||||
"name": "Science Foundation Ireland",
|
||||
"doi-asserted-by": "crossref",
|
||||
"award": [
|
||||
"SFI/12/RC/2273"
|
||||
]
|
||||
}
|
||||
],
|
||||
"content-domain": {
|
||||
"domain": [
|
||||
"www.plosone.org"
|
||||
],
|
||||
"crossmark-restriction": false
|
||||
},
|
||||
"short-container-title": [
|
||||
"PLoS ONE"
|
||||
],
|
||||
"DOI": "10.1371/journal.pone.0233284",
|
||||
"type": "journal-article",
|
||||
"created": {
|
||||
"date-parts": [
|
||||
[
|
||||
2020,
|
||||
5,
|
||||
29
|
||||
]
|
||||
],
|
||||
"date-time": "2020-05-29T17:53:47Z",
|
||||
"timestamp": 1590774827000
|
||||
},
|
||||
"page": "e0233284",
|
||||
"update-policy": "http://dx.doi.org/10.1371/journal.pone.corrections_policy",
|
||||
"source": "Crossref",
|
||||
"is-referenced-by-count": 13,
|
||||
"title": [
|
||||
"Vancomycin and nisin A are effective against biofilms of multi-drug resistant Staphylococcus aureus isolates from human milk"
|
||||
],
|
||||
"prefix": "10.1371",
|
||||
"volume": "15",
|
||||
"author": [
|
||||
{
|
||||
"ORCID": "http://orcid.org/0000-0003-4107-0278",
|
||||
"authenticated-orcid": true,
|
||||
"given": "Angeliki",
|
||||
"family": "Angelopoulou",
|
||||
"sequence": "first",
|
||||
"affiliation": [
|
||||
|
||||
]
|
||||
},
|
||||
{
|
||||
"given": "Des",
|
||||
"family": "Field",
|
||||
"sequence": "additional",
|
||||
"affiliation": [
|
||||
|
||||
]
|
||||
},
|
||||
{
|
||||
"given": "Mariana",
|
||||
"family": "Pérez-Ibarreche",
|
||||
"sequence": "additional",
|
||||
"affiliation": [
|
||||
|
||||
]
|
||||
},
|
||||
{
|
||||
"ORCID": "http://orcid.org/0000-0001-8317-6455",
|
||||
"authenticated-orcid": true,
|
||||
"given": "Alicja K.",
|
||||
"family": "Warda",
|
||||
"sequence": "additional",
|
||||
"affiliation": [
|
||||
|
||||
]
|
||||
},
|
||||
{
|
||||
"given": "Colin",
|
||||
"family": "Hill",
|
||||
"sequence": "additional",
|
||||
"affiliation": [
|
||||
|
||||
]
|
||||
},
|
||||
{
|
||||
"given": "R. Paul",
|
||||
"family": "Ross",
|
||||
"sequence": "additional",
|
||||
"affiliation": [
|
||||
|
||||
]
|
||||
}
|
||||
],
|
||||
"member": "340",
|
||||
"published-online": {
|
||||
"date-parts": [
|
||||
[
|
||||
2020,
|
||||
5,
|
||||
29
|
||||
]
|
||||
]
|
||||
},
|
||||
"reference": [
|
||||
{
|
||||
"issue": "7–8",
|
||||
"key": "pone.0233284.ref001",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "509",
|
||||
"DOI": "10.1016/S0899-9007(00)00363-4",
|
||||
"article-title": "Breast milk: a truly functional food",
|
||||
"volume": "16",
|
||||
"author": "B. Lönnerdal",
|
||||
"year": "2000",
|
||||
"journal-title": "Nutrition"
|
||||
},
|
||||
{
|
||||
"key": "pone.0233284.ref002",
|
||||
"first-page": "1",
|
||||
"volume-title": "Mastitis: causes and management",
|
||||
"author": "WHO",
|
||||
"year": "2000"
|
||||
},
|
||||
{
|
||||
"issue": "1–2",
|
||||
"key": "pone.0233284.ref003",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "115",
|
||||
"DOI": "10.1038/pr.2014.178",
|
||||
"article-title": "Human milk and infant intestinal mucosal glycans guide succession of the neonatal intestinal microbiota",
|
||||
"volume": "77",
|
||||
"author": "DS Newburg",
|
||||
"year": "2015",
|
||||
"journal-title": "Pediatr Res"
|
||||
},
|
||||
{
|
||||
"issue": "7",
|
||||
"key": "pone.0233284.ref004",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "647",
|
||||
"DOI": "10.1001/jamapediatrics.2017.0378",
|
||||
"article-title": "Association between breast milk bacterial communities and establishment and development of the infant gut microbiome",
|
||||
"volume": "171",
|
||||
"author": "PS Pannaraj",
|
||||
"year": "2017",
|
||||
"journal-title": "JAMA Pediatr"
|
||||
},
|
||||
{
|
||||
"issue": "Suppl 2:",
|
||||
"key": "pone.0233284.ref005",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "S69",
|
||||
"DOI": "10.1542/peds.2008-1315i",
|
||||
"article-title": "Why mothers stop breastfeeding: mothers' self-reported reasons for stopping during the first year",
|
||||
"volume": "122",
|
||||
"author": "R Li",
|
||||
"year": "2008",
|
||||
"journal-title": "Pediatrics"
|
||||
},
|
||||
{
|
||||
"issue": "2",
|
||||
"key": "pone.0233284.ref006",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "83",
|
||||
"DOI": "10.1007/s00430-017-0532-z",
|
||||
"article-title": "The microbiology and treatment of human mastitis",
|
||||
"volume": "207",
|
||||
"author": "A Angelopoulou",
|
||||
"year": "2018",
|
||||
"journal-title": "Med Microbiol Immunol"
|
||||
},
|
||||
{
|
||||
"issue": "2",
|
||||
"key": "pone.0233284.ref007",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "169",
|
||||
"DOI": "10.3920/BM2013.0036",
|
||||
"article-title": "Probiotics for human lactational mastitis",
|
||||
"volume": "5",
|
||||
"author": "L Fernández",
|
||||
"year": "2014",
|
||||
"journal-title": "Benef Microbes"
|
||||
},
|
||||
{
|
||||
"issue": "15",
|
||||
"key": "pone.0233284.ref008",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "4650",
|
||||
"DOI": "10.1128/AEM.02599-07",
|
||||
"article-title": "Oral administration of Lactobacillus strains isolated from breast milk as an alternative for the treatment of infectious mastitis during lactation",
|
||||
"volume": "74",
|
||||
"author": "E Jiménez",
|
||||
"year": "2008",
|
||||
"journal-title": "Appl Environ Microbiol"
|
||||
},
|
||||
{
|
||||
"issue": "3",
|
||||
"key": "pone.0233284.ref009",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "406",
|
||||
"DOI": "10.1177/0890334415585078",
|
||||
"article-title": "Metagenomic analysis of milk of healthy and mastitis-suffering women",
|
||||
"volume": "31",
|
||||
"author": "E Jiménez",
|
||||
"year": "2015",
|
||||
"journal-title": "J Hum Lact"
|
||||
},
|
||||
{
|
||||
"issue": "2",
|
||||
"key": "pone.0233284.ref010",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "176",
|
||||
"DOI": "10.1086/589241",
|
||||
"article-title": "Risk of infection and death due to methicillin-resistant Staphylococcus aureus in long-term carriers",
|
||||
"volume": "47",
|
||||
"author": "R Datta",
|
||||
"year": "2008",
|
||||
"journal-title": "Clin Infect Dis"
|
||||
},
|
||||
{
|
||||
"issue": "4",
|
||||
"key": "pone.0233284.ref011",
|
||||
"doi-asserted-by": "crossref",
|
||||
"DOI": "10.1128/microbiolspec.GPP3-0023-2018",
|
||||
"article-title": "Staphylococcal biofilms",
|
||||
"volume": "6",
|
||||
"author": "M. Otto",
|
||||
"year": "2018",
|
||||
"journal-title": "Microbiol Spectr"
|
||||
},
|
||||
{
|
||||
"issue": "1",
|
||||
"key": "pone.0233284.ref012",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "9",
|
||||
"DOI": "10.1038/s41522-018-0053-6",
|
||||
"article-title": "Fighting biofilms with lantibiotics and other groups of bacteriocins",
|
||||
"volume": "4",
|
||||
"author": "H Mathur",
|
||||
"year": "2018",
|
||||
"journal-title": "NPJ Biofilms Microbiomes"
|
||||
},
|
||||
{
|
||||
"issue": "2",
|
||||
"key": "pone.0233284.ref013",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "310",
|
||||
"DOI": "10.1128/MMBR.00041-08",
|
||||
"article-title": "Signals, regulatory networks, and materials that build and break bacterial biofilms",
|
||||
"volume": "73",
|
||||
"author": "E Karatan",
|
||||
"year": "2009",
|
||||
"journal-title": "Microbiol Mol Biol Rev"
|
||||
},
|
||||
{
|
||||
"issue": "3",
|
||||
"key": "pone.0233284.ref014",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "147",
|
||||
"DOI": "10.1016/S1473-3099(01)00091-3",
|
||||
"article-title": "Vancomycin-resistant Staphylococcus aureus: a new model of antibiotic resistance",
|
||||
"volume": "1",
|
||||
"author": "K Hiramatsu",
|
||||
"year": "2001",
|
||||
"journal-title": "Lancet Infect Dis"
|
||||
},
|
||||
{
|
||||
"key": "pone.0233284.ref015",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "339",
|
||||
"DOI": "10.1146/annurev.mi.38.100184.002011",
|
||||
"article-title": "The structure and mode of action of glycopeptide antibiotics of the vancomycin group",
|
||||
"volume": "38",
|
||||
"author": "JC Barna",
|
||||
"year": "1984",
|
||||
"journal-title": "Annu Rev Microbiol"
|
||||
},
|
||||
{
|
||||
"key": "pone.0233284.ref016",
|
||||
"unstructured": "Health Service Executive Mastitis Factsheet for Health Care Professionals. Available at: https://www.breastfeeding.ie/Uploads/Mastitis.pdf"
|
||||
},
|
||||
{
|
||||
"issue": "3",
|
||||
"key": "pone.0233284.ref017",
|
||||
"first-page": "136",
|
||||
"article-title": "Breast infection: a review of diagnosis and management practices",
|
||||
"volume": "14",
|
||||
"author": "E Boakes",
|
||||
"year": "2018",
|
||||
"journal-title": "Eur J Breast Health"
|
||||
},
|
||||
{
|
||||
"key": "pone.0233284.ref018",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "1205",
|
||||
"DOI": "10.3389/fmicb.2017.01205",
|
||||
"article-title": "Bacteriocin-antimicrobial synergy: A medical and food perspective",
|
||||
"volume": "8",
|
||||
"author": "H Mathur",
|
||||
"year": "2017",
|
||||
"journal-title": "Front Microbiol"
|
||||
},
|
||||
{
|
||||
"issue": "1",
|
||||
"key": "pone.0233284.ref019",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "223",
|
||||
"DOI": "10.1016/S0005-2736(99)00208-4",
|
||||
"article-title": "The lantibiotic nisin, a special case or not?",
|
||||
"volume": "1462",
|
||||
"author": "E Breukink",
|
||||
"year": "1999",
|
||||
"journal-title": "Biochim Biophys Acta"
|
||||
},
|
||||
{
|
||||
"issue": "10",
|
||||
"key": "pone.0233284.ref020",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "963",
|
||||
"DOI": "10.1038/nsmb830",
|
||||
"article-title": "The nisin-lipid II complex reveals a pyrophosphate cage that provides a blueprint for novel antibiotics",
|
||||
"volume": "11",
|
||||
"author": "ST Hsu",
|
||||
"year": "2004",
|
||||
"journal-title": "Nat Struct Mol Biol"
|
||||
},
|
||||
{
|
||||
"issue": "5793",
|
||||
"key": "pone.0233284.ref021",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "1636",
|
||||
"DOI": "10.1126/science.1129818",
|
||||
"article-title": "An alternative bactericidal mechanism of action for lantibiotic peptides that target lipid II",
|
||||
"volume": "313",
|
||||
"author": "HE Hasper",
|
||||
"year": "2006",
|
||||
"journal-title": "Science"
|
||||
},
|
||||
{
|
||||
"issue": "3",
|
||||
"key": "pone.0233284.ref022",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "1772",
|
||||
"DOI": "10.1074/jbc.M006770200",
|
||||
"article-title": "Specific binding of nisin to the peptidoglycan precursor lipid II combines pore formation and inhibition of cell wall biosynthesis for potent antibiotic activity",
|
||||
"volume": "276",
|
||||
"author": "I Wiedemann",
|
||||
"year": "2001",
|
||||
"journal-title": "J Biol Chem"
|
||||
},
|
||||
{
|
||||
"key": "pone.0233284.ref023",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "104539",
|
||||
"DOI": "10.1016/j.idairyj.2019.104539",
|
||||
"article-title": "Bovine mastitis is a polymicrobial disease requiring a polydiagnostic approach",
|
||||
"volume": "99",
|
||||
"author": "A Angelopoulou",
|
||||
"year": "2019",
|
||||
"journal-title": "Int Dairy J"
|
||||
},
|
||||
{
|
||||
"issue": "4",
|
||||
"key": "pone.0233284.ref024",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "493",
|
||||
"DOI": "10.1093/ajcp/45.4_ts.493",
|
||||
"article-title": "Antibiotic susceptibility testing by a standardized single disk method",
|
||||
"volume": "45",
|
||||
"author": "AW Bauer",
|
||||
"year": "1966",
|
||||
"journal-title": "Am Journal Clin Pathol"
|
||||
},
|
||||
{
|
||||
"key": "pone.0233284.ref025",
|
||||
"unstructured": "v_9.0_Breakpoint_Tables.pdf. Available at: http://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Breakpoint_tables/v_9.0_Breakpoint_Tables.pdf (Accessed: 28th July 2019)."
|
||||
},
|
||||
{
|
||||
"issue": "1–2",
|
||||
"key": "pone.0233284.ref026",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "67",
|
||||
"DOI": "10.1016/j.vetmic.2010.05.044",
|
||||
"article-title": "(GTG)5-PCR fingerprinting for the classification and identification of coagulase-negative Staphylococcus species from bovine milk and teat apices: a comparison of type strains and field isolates",
|
||||
"volume": "147",
|
||||
"author": "G Braem",
|
||||
"year": "2011",
|
||||
"journal-title": "Vet Microbiol"
|
||||
},
|
||||
{
|
||||
"key": "pone.0233284.ref027",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "270",
|
||||
"DOI": "10.1186/s12859-015-0703-0",
|
||||
"article-title": "GelJ a tool for analyzing DNA fingerprint gel images",
|
||||
"volume": "16",
|
||||
"author": "J Heras",
|
||||
"year": "2015",
|
||||
"journal-title": "BMC bioinformatics"
|
||||
},
|
||||
{
|
||||
"issue": "4",
|
||||
"key": "pone.0233284.ref028",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "473",
|
||||
"DOI": "10.1111/j.1751-7915.2010.00184.x",
|
||||
"article-title": "Studies with bioengineered nisin peptides highlight the broad-spectrum potency of nisin V",
|
||||
"volume": "3",
|
||||
"author": "D Field",
|
||||
"year": "2010",
|
||||
"journal-title": "Microb Biotechnol"
|
||||
},
|
||||
{
|
||||
"issue": "11",
|
||||
"key": "pone.0233284.ref029",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "e79563",
|
||||
"DOI": "10.1371/journal.pone.0079563",
|
||||
"article-title": "Intensive mutagenesis of the nisin hinge leads to the rational design of enhanced derivatives",
|
||||
"volume": "8",
|
||||
"author": "B Healy",
|
||||
"year": "2013",
|
||||
"journal-title": "PLoS One"
|
||||
},
|
||||
{
|
||||
"issue": "10",
|
||||
"key": "pone.0233284.ref030",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "e46884",
|
||||
"DOI": "10.1371/journal.pone.0046884",
|
||||
"article-title": "Bioengineered nisin A derivatives with enhanced activity against both Gram positive and Gram negative pathogens",
|
||||
"volume": "7",
|
||||
"author": "D Field",
|
||||
"year": "2012",
|
||||
"journal-title": "PLoS One"
|
||||
},
|
||||
{
|
||||
"issue": "3",
|
||||
"key": "pone.0233284.ref031",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "e0119684",
|
||||
"DOI": "10.1371/journal.pone.0119684",
|
||||
"article-title": "A Bioengineered nisin derivative to control biofilms of Staphylococcus pseudintermedius",
|
||||
"volume": "10",
|
||||
"author": "D Field",
|
||||
"year": "2015a",
|
||||
"journal-title": "PLoS One"
|
||||
},
|
||||
{
|
||||
"issue": "2",
|
||||
"key": "pone.0233284.ref032",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "207",
|
||||
"DOI": "10.1007/s13765-012-3253-4",
|
||||
"article-title": "Biofilm formation, attachment, and cell hydrophobicity of foodborne pathogens under varied environmental conditions",
|
||||
"volume": "56",
|
||||
"author": "NY Choi",
|
||||
"year": "2013",
|
||||
"journal-title": "J Korean Soc Appl Biol Chem"
|
||||
},
|
||||
{
|
||||
"issue": "2",
|
||||
"key": "pone.0233284.ref033",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "175",
|
||||
"DOI": "10.1016/S0167-7012(00)00122-6",
|
||||
"article-title": "A modified microtiter-plate test for quantification of staphylococcal biofilm formation",
|
||||
"volume": "40",
|
||||
"author": "S Stepanović",
|
||||
"year": "2000",
|
||||
"journal-title": "J Microbiol Methods"
|
||||
},
|
||||
{
|
||||
"issue": "2",
|
||||
"key": "pone.0233284.ref034",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "225",
|
||||
"DOI": "10.1111/j.1574-695X.2011.00806.x",
|
||||
"article-title": "Characterization of Staphylococcus aureus strains involved in human and bovine mastitis",
|
||||
"volume": "62",
|
||||
"author": "S Delgado",
|
||||
"year": "2011",
|
||||
"journal-title": "FEMS Immunol Med Microbiol"
|
||||
},
|
||||
{
|
||||
"key": "pone.0233284.ref035",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "53",
|
||||
"DOI": "10.2174/1874285801711010053",
|
||||
"article-title": "Understanding the mechanism of bacterial biofilms resistance to antimicrobial agents",
|
||||
"volume": "11",
|
||||
"author": "S Singh",
|
||||
"year": "2017",
|
||||
"journal-title": "Open Microbiol J"
|
||||
},
|
||||
{
|
||||
"issue": "1",
|
||||
"key": "pone.0233284.ref036",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "61",
|
||||
"DOI": "10.2174/1389203053027584",
|
||||
"article-title": "Bacterial lantibiotics: strategies to improve therapeutic potential",
|
||||
"volume": "6",
|
||||
"author": "PD Cotter",
|
||||
"year": "2005",
|
||||
"journal-title": "Curr Protein Pept Sci"
|
||||
},
|
||||
{
|
||||
"issue": "5",
|
||||
"key": "pone.0233284.ref037",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "494",
|
||||
"DOI": "10.1016/j.ijantimicag.2015.07.011",
|
||||
"article-title": "Bacteriocins and their position in the next wave of conventional antibiotics",
|
||||
"volume": "46",
|
||||
"author": "VL Cavera",
|
||||
"year": "2015",
|
||||
"journal-title": "Int J Antimicrob Agents"
|
||||
},
|
||||
{
|
||||
"key": "pone.0233284.ref038",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "1363",
|
||||
"DOI": "10.3389/fmicb.2015.01363",
|
||||
"article-title": "Bioengineering lantibiotics for therapeutic success",
|
||||
"volume": "6",
|
||||
"author": "D Field",
|
||||
"year": "2015b",
|
||||
"journal-title": "Front Microbiol"
|
||||
},
|
||||
{
|
||||
"issue": "11",
|
||||
"key": "pone.0233284.ref039",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "5572",
|
||||
"DOI": "10.1128/AAC.00888-13",
|
||||
"article-title": "Effects of bacteriocins on methicillin-resistant Staphylococcus aureus biofilm",
|
||||
"volume": "57",
|
||||
"author": "K Okuda",
|
||||
"year": "2013",
|
||||
"journal-title": "Antimicrob Agents Chemother"
|
||||
},
|
||||
{
|
||||
"issue": "6",
|
||||
"key": "pone.0233284.ref040",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "511",
|
||||
"DOI": "10.1159/000335598",
|
||||
"article-title": "In vitro activities of nisin alone or in combination with vancomycin and ciprofloxacin against methicillin-resistant and methicillin-susceptible Staphylococcus aureus strains",
|
||||
"volume": "57",
|
||||
"author": "S Dosler",
|
||||
"year": "2011",
|
||||
"journal-title": "Chemotherapy"
|
||||
},
|
||||
{
|
||||
"issue": "18",
|
||||
"key": "pone.0233284.ref041",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "5809",
|
||||
"DOI": "10.1128/AEM.01104-07",
|
||||
"article-title": "Dissection and modulation of the four distinct activities of nisin by mutagenesis of rings A and B and by C-terminal truncation",
|
||||
"volume": "73",
|
||||
"author": "R Rink",
|
||||
"year": "2007",
|
||||
"journal-title": "Appl Environ Microbiol"
|
||||
},
|
||||
{
|
||||
"issue": "6",
|
||||
"key": "pone.0233284.ref042",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "806",
|
||||
"DOI": "10.1007/s00253-004-1599-1",
|
||||
"article-title": "Site-directed mutagenesis of the hinge region of nisinZ and properties of nisinZ mutants",
|
||||
"volume": "64",
|
||||
"author": "J Yuan",
|
||||
"year": "2004",
|
||||
"journal-title": "Appl Microbiol Biotechnol"
|
||||
},
|
||||
{
|
||||
"key": "pone.0233284.ref043",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "508",
|
||||
"DOI": "10.3389/fmicb.2016.00508",
|
||||
"article-title": "In vitro activities of nisin and nisin derivatives alone and in combination with antibiotics against Staphylococcus biofilms",
|
||||
"volume": "7",
|
||||
"author": "D Field",
|
||||
"year": "2016",
|
||||
"journal-title": "Front Microbiol"
|
||||
},
|
||||
{
|
||||
"issue": "18",
|
||||
"key": "pone.0233284.ref044",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "1573",
|
||||
"DOI": "10.2217/fmb-2019-0153",
|
||||
"article-title": "Nisin Z and lacticin 3147 improve efficacy of antibiotics against clinically significant bacteria",
|
||||
"volume": "14",
|
||||
"author": "JC Ellis",
|
||||
"year": "2020",
|
||||
"journal-title": "Future Microbiol"
|
||||
},
|
||||
{
|
||||
"issue": "3",
|
||||
"key": "pone.0233284.ref045",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "311",
|
||||
"DOI": "10.1177/0890334408317435",
|
||||
"article-title": "The bacteriocin nisin, an effective agent for the treatment of staphylococcal mastitis during lactation",
|
||||
"volume": "24",
|
||||
"author": "L Fernández",
|
||||
"year": "2008",
|
||||
"journal-title": "J Hum Lact"
|
||||
},
|
||||
{
|
||||
"issue": "1",
|
||||
"key": "pone.0233284.ref046",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "33",
|
||||
"DOI": "10.1159/000272223",
|
||||
"article-title": "Inflammatory breast diseases during lactation: milk stasis, puerperal mastitis, abscesses of the breast, and malignant tumors–current and evidence-based strategies for diagnosis and therapy",
|
||||
"volume": "5",
|
||||
"author": "M Abou-Dakn",
|
||||
"year": "2010",
|
||||
"journal-title": "Breast Care"
|
||||
},
|
||||
{
|
||||
"issue": "6",
|
||||
"key": "pone.0233284.ref047",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "430",
|
||||
"DOI": "10.1007/s12262-012-0776-1",
|
||||
"article-title": "Management of lactational mastitis and breast abscesses: review of current knowledge and practice",
|
||||
"volume": "75",
|
||||
"author": "K Kataria",
|
||||
"year": "2013",
|
||||
"journal-title": "Indian J Surg"
|
||||
},
|
||||
{
|
||||
"issue": "2",
|
||||
"key": "pone.0233284.ref048",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "77",
|
||||
"DOI": "10.1016/j.micres.2012.09.004",
|
||||
"article-title": "Genotypic diversity and virulent factors of Staphylococcus epidermidis isolated from human breast milk",
|
||||
"volume": "168",
|
||||
"author": "J Begović",
|
||||
"year": "2013",
|
||||
"journal-title": "Microbiol Res"
|
||||
},
|
||||
{
|
||||
"issue": "Pt 8",
|
||||
"key": "pone.0233284.ref049",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "761",
|
||||
"DOI": "10.1099/jmm.0.05453-0",
|
||||
"article-title": "Antimicrobial-resistance and enterotoxin-encoding genes among staphylococci isolated from expressed human breast milk",
|
||||
"volume": "53",
|
||||
"author": "LA Carneiro",
|
||||
"year": "2004",
|
||||
"journal-title": "J Med Microbiol"
|
||||
},
|
||||
{
|
||||
"issue": "2",
|
||||
"key": "pone.0233284.ref050",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "113",
|
||||
"DOI": "10.1007/s00284-015-0925-4",
|
||||
"article-title": "Antibiotic susceptibility of commensal bacteria from human milk",
|
||||
"volume": "72",
|
||||
"author": "PW Chen",
|
||||
"year": "2016",
|
||||
"journal-title": "Curr Microbiol"
|
||||
},
|
||||
{
|
||||
"key": "pone.0233284.ref051",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "2512",
|
||||
"DOI": "10.3389/fmicb.2018.02512",
|
||||
"article-title": "Microbial community dynamics in mother's milk and infant's mouth and gut in moderately preterm infants",
|
||||
"volume": "9",
|
||||
"author": "E Biagi",
|
||||
"year": "2018",
|
||||
"journal-title": "Front Microbiol"
|
||||
},
|
||||
{
|
||||
"key": "pone.0233284.ref052",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "4",
|
||||
"DOI": "10.3410/M4-4",
|
||||
"article-title": "Reduced vancomycin susceptibility among clinical Staphylococcus aureus isolates ('the MIC Creep'): implications for therapy",
|
||||
"volume": "4",
|
||||
"author": "A Dhand",
|
||||
"year": "2012",
|
||||
"journal-title": "F1000 Med Rep"
|
||||
},
|
||||
{
|
||||
"issue": "12",
|
||||
"key": "pone.0233284.ref053",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "1112",
|
||||
"DOI": "10.1136/jcp.2009.069021",
|
||||
"article-title": "Low concentrations of vancomycin stimulate biofilm formation in some clinical isolates of Staphylococcus epidermidis",
|
||||
"volume": "62",
|
||||
"author": "JS Cargill",
|
||||
"year": "2009",
|
||||
"journal-title": "J Clin Pathol"
|
||||
},
|
||||
{
|
||||
"issue": "2",
|
||||
"key": "pone.0233284.ref054",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "191",
|
||||
"DOI": "10.1002/jobm.201000221",
|
||||
"article-title": "Effect of sub-lethal doses of vancomycin and oxacillin on biofilm formation by vancomycin intermediate resistant Staphylococcus aureus",
|
||||
"volume": "51",
|
||||
"author": "ZA Mirani",
|
||||
"year": "2011",
|
||||
"journal-title": "J Basic Microbiol"
|
||||
},
|
||||
{
|
||||
"key": "pone.0233284.ref055",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "225",
|
||||
"DOI": "10.1016/j.micpath.2017.07.004",
|
||||
"article-title": "Vancomycin-induced biofilm formation by methicillin-resistant Staphylococcus aureus is associated with the secretion of membrane vesicles",
|
||||
"volume": "110",
|
||||
"author": "X He",
|
||||
"year": "2017",
|
||||
"journal-title": "Microb Pathog"
|
||||
},
|
||||
{
|
||||
"issue": "9",
|
||||
"key": "pone.0233284.ref056",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "1627",
|
||||
"DOI": "10.4315/0362-028X.JFP-12-001",
|
||||
"article-title": "Effects of nisin and lysozyme on growth inhibition and biofilm formation capacity of Staphylococcus aureus strains isolated from raw milk and cheese samples",
|
||||
"volume": "75",
|
||||
"author": "M Sudagidan",
|
||||
"year": "2012",
|
||||
"journal-title": "J Food Prot"
|
||||
},
|
||||
{
|
||||
"issue": "3",
|
||||
"key": "pone.0233284.ref057",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "253",
|
||||
"DOI": "10.1016/j.ijfoodmicro.2008.01.011",
|
||||
"article-title": "Nisin-bacteriophage cross-resistance in Staphylococcus aureus",
|
||||
"volume": "122",
|
||||
"author": "B Martinez",
|
||||
"year": "2008",
|
||||
"journal-title": "Int J Food Microbiol"
|
||||
},
|
||||
{
|
||||
"issue": "1",
|
||||
"key": "pone.0233284.ref058",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "82",
|
||||
"DOI": "10.2146/ajhp080434",
|
||||
"article-title": "Therapeutic monitoring of vancomycin in adult patients: a consensus review of the american society of health-system pharmacists, the infectious diseases society of america, and the society of infectious diseases pharmacists",
|
||||
"volume": "66",
|
||||
"author": "M Rybak",
|
||||
"year": "2009",
|
||||
"journal-title": "Am J Health Syst Pharm"
|
||||
},
|
||||
{
|
||||
"issue": "2",
|
||||
"key": "pone.0233284.ref059",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "277",
|
||||
"DOI": "10.1111/j.1574-695X.2007.00300.x",
|
||||
"article-title": "Increased tolerance of Staphylococcus aureus to vancomycin in viscous media",
|
||||
"volume": "51",
|
||||
"author": "V Kostenko",
|
||||
"year": "2007",
|
||||
"journal-title": "FEMS Immunol Med Microbiol"
|
||||
},
|
||||
{
|
||||
"key": "pone.0233284.ref060",
|
||||
"first-page": "107",
|
||||
"article-title": "Multidrug tolerance of biofilms and persister cells",
|
||||
"volume": "322",
|
||||
"author": "K. Lewis",
|
||||
"year": "2008",
|
||||
"journal-title": "Curr Top Microbiol Immunol"
|
||||
},
|
||||
{
|
||||
"issue": "6",
|
||||
"key": "pone.0233284.ref061",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "ftw056",
|
||||
"DOI": "10.1093/femspd/ftw056",
|
||||
"article-title": "Penetration barrier contributes to bacterial biofilm-associated resistance against only select antibiotics, and exhibits genus-, strain- and antibiotic-specific differences",
|
||||
"volume": "74",
|
||||
"author": "R Singh",
|
||||
"year": "2016",
|
||||
"journal-title": "Pathog Dis"
|
||||
},
|
||||
{
|
||||
"issue": "12",
|
||||
"key": "pone.0233284.ref062",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "7273",
|
||||
"DOI": "10.1128/AAC.03132-14",
|
||||
"article-title": "Extracellular DNA impedes the transport of vancomycin in Staphylococcus epidermidis biofilms preexposed to subinhibitory concentrations of vancomycin",
|
||||
"volume": "58",
|
||||
"author": "N Doroshenko",
|
||||
"year": "2014",
|
||||
"journal-title": "Antimicrob Agents Chemotherapy"
|
||||
},
|
||||
{
|
||||
"issue": "1",
|
||||
"key": "pone.0233284.ref063",
|
||||
"doi-asserted-by": "crossref",
|
||||
"first-page": "46",
|
||||
"DOI": "10.1007/s00776-005-0968-7",
|
||||
"article-title": "Antimicrobial susceptibility of Staphylococcus aureus and Staphylococcus epidermidis biofilms isolated from infected total hip arthroplasty cases",
|
||||
"volume": "11",
|
||||
"author": "S Nishimura",
|
||||
"year": "2006",
|
||||
"journal-title": "J Orthop Sci"
|
||||
}
|
||||
],
|
||||
"container-title": [
|
||||
"PLOS ONE"
|
||||
],
|
||||
"original-title": [
|
||||
|
||||
],
|
||||
"language": "en",
|
||||
"link": [
|
||||
{
|
||||
"URL": "https://dx.plos.org/10.1371/journal.pone.0233284",
|
||||
"content-type": "unspecified",
|
||||
"content-version": "vor",
|
||||
"intended-application": "similarity-checking"
|
||||
}
|
||||
],
|
||||
"deposited": {
|
||||
"date-parts": [
|
||||
[
|
||||
2020,
|
||||
5,
|
||||
29
|
||||
]
|
||||
],
|
||||
"date-time": "2020-05-29T17:54:37Z",
|
||||
"timestamp": 1590774877000
|
||||
},
|
||||
"score": 1,
|
||||
"resource": {
|
||||
"primary": {
|
||||
"URL": "https://dx.plos.org/10.1371/journal.pone.0233284"
|
||||
}
|
||||
},
|
||||
"subtitle": [
|
||||
|
||||
],
|
||||
"editor": [
|
||||
{
|
||||
"given": "Rita G.",
|
||||
"family": "Sobral",
|
||||
"sequence": "first",
|
||||
"affiliation": [
|
||||
|
||||
]
|
||||
}
|
||||
],
|
||||
"short-title": [
|
||||
|
||||
],
|
||||
"issued": {
|
||||
"date-parts": [
|
||||
[
|
||||
2020,
|
||||
5,
|
||||
29
|
||||
]
|
||||
]
|
||||
},
|
||||
"references-count": 63,
|
||||
"journal-issue": {
|
||||
"issue": "5",
|
||||
"published-online": {
|
||||
"date-parts": [
|
||||
[
|
||||
2020,
|
||||
5,
|
||||
29
|
||||
]
|
||||
]
|
||||
}
|
||||
},
|
||||
"URL": "http://dx.doi.org/10.1371/journal.pone.0233284",
|
||||
"relation": {
|
||||
|
||||
},
|
||||
"ISSN": [
|
||||
"1932-6203"
|
||||
],
|
||||
"issn-type": [
|
||||
{
|
||||
"value": "1932-6203",
|
||||
"type": "electronic"
|
||||
}
|
||||
],
|
||||
"subject": [
|
||||
"Multidisciplinary"
|
||||
],
|
||||
"published": {
|
||||
"date-parts": [
|
||||
[
|
||||
2020,
|
||||
5,
|
||||
29
|
||||
]
|
||||
]
|
||||
}
|
||||
|
||||
}
|
|
@ -475,6 +475,86 @@ class CrossrefMappingTest {
|
|||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
def testConvertArticleFromCrossRef2OafSFI(): Unit = {
|
||||
val json = Source
|
||||
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/sfi_funded_article.json"))
|
||||
.mkString
|
||||
assertNotNull(json)
|
||||
|
||||
assertFalse(json.isEmpty);
|
||||
|
||||
val resultList: List[Oaf] = Crossref2Oaf.convert(json)
|
||||
|
||||
assertTrue(resultList.nonEmpty)
|
||||
|
||||
val items = resultList.filter(p => p.isInstanceOf[Publication])
|
||||
|
||||
assert(items.nonEmpty)
|
||||
assert(items.size == 1)
|
||||
val result: Result = items.head.asInstanceOf[Publication]
|
||||
assertNotNull(result)
|
||||
|
||||
logger.info(mapper.writeValueAsString(result));
|
||||
|
||||
assertNotNull(result.getDataInfo, "Datainfo test not null Failed");
|
||||
assertNotNull(
|
||||
result.getDataInfo.getProvenanceaction,
|
||||
"DataInfo/Provenance test not null Failed"
|
||||
);
|
||||
assertFalse(
|
||||
result.getDataInfo.getProvenanceaction.getClassid.isEmpty,
|
||||
"DataInfo/Provenance/classId test not null Failed"
|
||||
);
|
||||
assertFalse(
|
||||
result.getDataInfo.getProvenanceaction.getClassname.isEmpty,
|
||||
"DataInfo/Provenance/className test not null Failed"
|
||||
);
|
||||
assertFalse(
|
||||
result.getDataInfo.getProvenanceaction.getSchemeid.isEmpty,
|
||||
"DataInfo/Provenance/SchemeId test not null Failed"
|
||||
);
|
||||
assertFalse(
|
||||
result.getDataInfo.getProvenanceaction.getSchemename.isEmpty,
|
||||
"DataInfo/Provenance/SchemeName test not null Failed"
|
||||
);
|
||||
|
||||
assertNotNull(result.getCollectedfrom, "CollectedFrom test not null Failed");
|
||||
assertFalse(result.getCollectedfrom.isEmpty);
|
||||
|
||||
val collectedFromList = result.getCollectedfrom.asScala
|
||||
assert(
|
||||
collectedFromList.exists(c => c.getKey.equalsIgnoreCase("10|openaire____::081b82f96300b6a6e3d282bad31cb6e2")),
|
||||
"Wrong collected from assertion"
|
||||
)
|
||||
|
||||
assert(
|
||||
collectedFromList.exists(c => c.getValue.equalsIgnoreCase("crossref")),
|
||||
"Wrong collected from assertion"
|
||||
)
|
||||
|
||||
val relevantDates = result.getRelevantdate.asScala
|
||||
|
||||
assert(
|
||||
relevantDates.exists(d => d.getQualifier.getClassid.equalsIgnoreCase("created")),
|
||||
"Missing relevant date of type created"
|
||||
)
|
||||
|
||||
val rels = resultList.filter(p => p.isInstanceOf[Relation]).asInstanceOf[List[Relation]]
|
||||
assertFalse(rels.isEmpty)
|
||||
rels.foreach(relation => {
|
||||
assertNotNull(relation)
|
||||
assertFalse(relation.getSource.isEmpty)
|
||||
assertFalse(relation.getTarget.isEmpty)
|
||||
assertFalse(relation.getRelClass.isEmpty)
|
||||
assertFalse(relation.getRelType.isEmpty)
|
||||
assertFalse(relation.getSubRelType.isEmpty)
|
||||
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
def testConvertFromCrossRef2OafIssue(): Unit = {
|
||||
val json = Source
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.countrypropagation;
|
||||
package eu.dnetlib.dhp;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
|
@ -23,4 +23,5 @@ public class KeyValueSet implements Serializable {
|
|||
public void setValueSet(ArrayList<String> valueSet) {
|
||||
this.valueSet = valueSet;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -4,22 +4,21 @@ package eu.dnetlib.dhp;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.*;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Country;
|
||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
||||
public class PropagationConstant {
|
||||
|
||||
|
@ -221,9 +220,28 @@ public class PropagationConstant {
|
|||
.orElse(Boolean.FALSE);
|
||||
}
|
||||
|
||||
public static void createCfHbforResult(SparkSession spark) {
|
||||
org.apache.spark.sql.Dataset<Row> cfhb = spark.sql(cfHbforResultQuery);
|
||||
cfhb.createOrReplaceTempView("cfhb");
|
||||
// of the results collects the distinct keys for collected from (at the level of the result) and hosted by
|
||||
// and produces pairs resultId, key for each distinct key associated to the result
|
||||
public static <R extends Result> void createCfHbforResult(SparkSession spark, String inputPath, String outputPath,
|
||||
Class<R> resultClazz) {
|
||||
readPath(spark, inputPath, resultClazz)
|
||||
.filter(
|
||||
(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() &&
|
||||
!r.getDataInfo().getInvisible())
|
||||
.flatMap((FlatMapFunction<R, EntityEntityRel>) r -> {
|
||||
Set<String> cfhb = r.getCollectedfrom().stream().map(cf -> cf.getKey()).collect(Collectors.toSet());
|
||||
cfhb.addAll(r.getInstance().stream().map(i -> i.getHostedby().getKey()).collect(Collectors.toSet()));
|
||||
return cfhb
|
||||
.stream()
|
||||
.map(value -> EntityEntityRel.newInstance(r.getId(), value))
|
||||
.collect(Collectors.toList())
|
||||
.iterator();
|
||||
}, Encoders.bean(EntityEntityRel.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath);
|
||||
|
||||
}
|
||||
|
||||
public static <R> Dataset<R> readPath(
|
||||
|
|
|
@ -64,12 +64,6 @@ public class SparkBulkTagJob {
|
|||
final String resultClassName = parser.get("resultTableName");
|
||||
log.info("resultTableName: {}", resultClassName);
|
||||
|
||||
final Boolean saveGraph = Optional
|
||||
.ofNullable(parser.get("saveGraph"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
log.info("saveGraph: {}", saveGraph);
|
||||
|
||||
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
|
@ -86,10 +80,9 @@ public class SparkBulkTagJob {
|
|||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
removeOutputDir(spark, outputPath);
|
||||
execBulkTag(spark, inputPath, outputPath, protoMappingParams, resultClazz, cc);
|
||||
});
|
||||
spark ->
|
||||
execBulkTag(spark, inputPath, outputPath, protoMappingParams, resultClazz, cc)
|
||||
);
|
||||
}
|
||||
|
||||
private static <R extends Result> void execBulkTag(
|
||||
|
@ -113,6 +106,13 @@ public class SparkBulkTagJob {
|
|||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath);
|
||||
|
||||
readPath(spark, outputPath, resultClazz)
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(inputPath);
|
||||
|
||||
}
|
||||
|
||||
public static <R> Dataset<R> readPath(
|
||||
|
|
|
@ -16,6 +16,7 @@ import javax.print.attribute.DocAttributeSet;
|
|||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
import org.apache.spark.api.java.function.ForeachFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
|
@ -34,6 +35,7 @@ import eu.dnetlib.dhp.bulktag.community.*;
|
|||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
import scala.Tuple2;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
|
@ -44,6 +46,11 @@ public class SparkEoscBulkTag implements Serializable {
|
|||
private static final Logger log = LoggerFactory.getLogger(SparkEoscBulkTag.class);
|
||||
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static String OPENAIRE_3 = "openaire3.0";
|
||||
private static String OPENAIRE_4 = "openaire-pub_4.0";
|
||||
private static String OPENAIRE_CRIS = "openaire-cris_1.1";
|
||||
private static String OPENAIRE_DATA = "openaire2.0_data";
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
|
@ -72,6 +79,9 @@ public class SparkEoscBulkTag implements Serializable {
|
|||
final String resultClassName = parser.get("resultTableName");
|
||||
log.info("resultTableName: {}", resultClassName);
|
||||
|
||||
final String resultType = parser.get("resultType");
|
||||
log.info("resultType: {}", resultType);
|
||||
|
||||
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
|
@ -82,41 +92,71 @@ public class SparkEoscBulkTag implements Serializable {
|
|||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
removeOutputDir(spark, workingPath);
|
||||
execBulkTag(spark, inputPath, workingPath, datasourceMapPath, resultClazz);
|
||||
selectCompliantDatasources(spark, inputPath, workingPath, datasourceMapPath);
|
||||
execBulkTag(spark, inputPath, workingPath, resultType, resultClazz);
|
||||
});
|
||||
}
|
||||
|
||||
private static void selectCompliantDatasources(SparkSession spark, String inputPath, String workingPath,
|
||||
String datasourceMapPath) {
|
||||
Dataset<Datasource> datasources = readPath(spark, inputPath + "datasource", Datasource.class)
|
||||
.filter((FilterFunction<Datasource>) ds -> {
|
||||
final String compatibility = ds.getOpenairecompatibility().getClassid();
|
||||
return compatibility.equalsIgnoreCase(OPENAIRE_3) ||
|
||||
compatibility.equalsIgnoreCase(OPENAIRE_4) ||
|
||||
compatibility.equalsIgnoreCase(OPENAIRE_CRIS) ||
|
||||
compatibility.equalsIgnoreCase(OPENAIRE_DATA);
|
||||
});
|
||||
|
||||
Dataset<DatasourceMaster> datasourceMaster = readPath(spark, datasourceMapPath, DatasourceMaster.class);
|
||||
|
||||
datasources
|
||||
.joinWith(datasourceMaster, datasources.col("id").equalTo(datasourceMaster.col("master")), "left")
|
||||
.map(
|
||||
(MapFunction<Tuple2<Datasource, DatasourceMaster>, DatasourceMaster>) t2 -> t2._2(),
|
||||
Encoders.bean(DatasourceMaster.class))
|
||||
.filter(Objects::nonNull)
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingPath + "datasource");
|
||||
}
|
||||
|
||||
private static <R extends Result> void execBulkTag(
|
||||
SparkSession spark,
|
||||
String inputPath,
|
||||
String workingPath,
|
||||
String datasourceMapPath,
|
||||
String resultType,
|
||||
Class<R> resultClazz) {
|
||||
|
||||
List<String> hostedByList = readPath(spark, datasourceMapPath, DatasourceMaster.class)
|
||||
List<String> hostedByList = readPath(spark, workingPath + "datasource", DatasourceMaster.class)
|
||||
.map((MapFunction<DatasourceMaster, String>) dm -> dm.getMaster(), Encoders.STRING())
|
||||
.collectAsList();
|
||||
|
||||
readPath(spark, inputPath, resultClazz)
|
||||
.map(patchResult(), Encoders.bean(resultClazz))
|
||||
.filter(Objects::nonNull)
|
||||
readPath(spark, inputPath + resultType, resultClazz)
|
||||
.map(
|
||||
(MapFunction<R, R>) value -> enrich(value, hostedByList),
|
||||
Encoders.bean(resultClazz))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingPath);
|
||||
.json(workingPath + resultType);
|
||||
|
||||
readPath(spark, workingPath, resultClazz)
|
||||
readPath(spark, workingPath + resultType, resultClazz)
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(inputPath);
|
||||
.json(inputPath + resultType);
|
||||
|
||||
}
|
||||
|
||||
private static <R extends Result> R enrich(R value, List<String> hostedByList) {
|
||||
if (value.getDataInfo().getDeletedbyinference() == null) {
|
||||
value.getDataInfo().setDeletedbyinference(false);
|
||||
}
|
||||
if (value.getContext() == null) {
|
||||
value.setContext(new ArrayList<>());
|
||||
}
|
||||
if (value
|
||||
.getInstance()
|
||||
.stream()
|
||||
|
|
|
@ -8,10 +8,11 @@ import java.util.Arrays;
|
|||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import eu.dnetlib.dhp.countrypropagation.pojo.CountrySbs;
|
||||
import eu.dnetlib.dhp.countrypropagation.pojo.DatasourceCountry;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
import org.apache.spark.api.java.function.ForeachFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
|
@ -20,8 +21,7 @@ import org.apache.spark.sql.SparkSession;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.EntityEntityRel;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||
|
@ -57,8 +57,8 @@ public class PrepareDatasourceCountryAssociation {
|
|||
String inputPath = parser.get("sourcePath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath {}: ", outputPath);
|
||||
final String workingPath = parser.get("workingPath");
|
||||
log.info("workingPath {}: ", workingPath);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
|
||||
|
@ -66,13 +66,13 @@ public class PrepareDatasourceCountryAssociation {
|
|||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
removeOutputDir(spark, outputPath);
|
||||
removeOutputDir(spark, workingPath + "/datasourceCountry");
|
||||
prepareDatasourceCountryAssociation(
|
||||
spark,
|
||||
Arrays.asList(parser.get("whitelist").split(";")),
|
||||
Arrays.asList(parser.get("allowedtypes").split(";")),
|
||||
inputPath,
|
||||
outputPath);
|
||||
workingPath + "/datasourceCountry");
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -2,20 +2,19 @@
|
|||
package eu.dnetlib.dhp.countrypropagation;
|
||||
|
||||
import static eu.dnetlib.dhp.PropagationConstant.*;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import eu.dnetlib.dhp.countrypropagation.pojo.CountrySbs;
|
||||
import eu.dnetlib.dhp.countrypropagation.pojo.DatasourceCountry;
|
||||
import eu.dnetlib.dhp.countrypropagation.pojo.ResultCountrySet;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.io.compress.GzipCodec;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||
import org.apache.spark.api.java.function.ForeachFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||
import org.apache.spark.sql.*;
|
||||
|
@ -23,6 +22,8 @@ import org.apache.spark.sql.Dataset;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.dhp.EntityEntityRel;
|
||||
import eu.dnetlib.dhp.PropagationConstant;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import scala.Tuple2;
|
||||
|
@ -45,19 +46,18 @@ public class PrepareResultCountrySet {
|
|||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
String workingPath = parser.get("workingPath");
|
||||
log.info("workingPath: {}", workingPath);
|
||||
|
||||
String inputPath = parser.get("sourcePath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
|
||||
String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
||||
final String datasourcecountrypath = parser.get("preparedInfoPath");
|
||||
log.info("preparedInfoPath: {}", datasourcecountrypath);
|
||||
|
||||
final String resultClassName = parser.get("resultTableName");
|
||||
log.info("resultTableName: {}", resultClassName);
|
||||
|
||||
final String resultType = resultClassName.substring(resultClassName.lastIndexOf(".") + 1).toLowerCase();
|
||||
log.info("resultType: {}", resultType);
|
||||
|
||||
|
||||
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
|
@ -66,50 +66,31 @@ public class PrepareResultCountrySet {
|
|||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
removeOutputDir(spark, outputPath);
|
||||
removeOutputDir(spark, workingPath + "/preparedInfo/" + resultType);
|
||||
getPotentialResultToUpdate(
|
||||
spark,
|
||||
inputPath,
|
||||
outputPath,
|
||||
datasourcecountrypath,
|
||||
workingPath,
|
||||
resultClazz);
|
||||
resultType,
|
||||
resultClazz);
|
||||
});
|
||||
}
|
||||
|
||||
private static <R extends Result> void getPotentialResultToUpdate(
|
||||
SparkSession spark,
|
||||
String inputPath,
|
||||
String outputPath,
|
||||
String datasourcecountrypath,
|
||||
String workingPath,
|
||||
String resultType,
|
||||
Class<R> resultClazz) {
|
||||
|
||||
// selects all the results non deleted by inference and non invisible
|
||||
Dataset<R> result = readPath(spark, inputPath, resultClazz)
|
||||
.filter(
|
||||
(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() &&
|
||||
!r.getDataInfo().getInvisible());
|
||||
final String datasourcecountrypath = workingPath + "/datasourceCountry";
|
||||
final String cfhbpath = workingPath + "/resultCfHb/" + resultType;
|
||||
final String outputPath = workingPath + "/preparedInfo/" + resultType;
|
||||
|
||||
// of the results collects the distinct keys for collected from (at the level of the result) and hosted by
|
||||
// and produces pairs resultId, key for each distinct key associated to the result
|
||||
result.flatMap((FlatMapFunction<R, EntityEntityRel>) r -> {
|
||||
Set<String> cfhb = r.getCollectedfrom().stream().map(cf -> cf.getKey()).collect(Collectors.toSet());
|
||||
cfhb.addAll(r.getInstance().stream().map(i -> i.getHostedby().getKey()).collect(Collectors.toSet()));
|
||||
return cfhb
|
||||
.stream()
|
||||
.map(value -> EntityEntityRel.newInstance(r.getId(), value))
|
||||
.collect(Collectors.toList())
|
||||
.iterator();
|
||||
}, Encoders.bean(EntityEntityRel.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingPath + "/resultCfHb");
|
||||
PropagationConstant.createCfHbforResult(spark, inputPath, cfhbpath, resultClazz);
|
||||
|
||||
Dataset<DatasourceCountry> datasource_country = readPath(spark, datasourcecountrypath, DatasourceCountry.class);
|
||||
|
||||
Dataset<EntityEntityRel> cfhb = readPath(spark, workingPath + "/resultCfHb", EntityEntityRel.class);
|
||||
Dataset<EntityEntityRel> cfhb = readPath(spark, cfhbpath, EntityEntityRel.class);
|
||||
|
||||
datasource_country
|
||||
.joinWith(
|
||||
|
|
|
@ -9,6 +9,8 @@ import java.util.List;
|
|||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.dhp.countrypropagation.pojo.CountrySbs;
|
||||
import eu.dnetlib.dhp.countrypropagation.pojo.ResultCountrySet;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
|
@ -47,15 +49,17 @@ public class SparkCountryPropagationJob {
|
|||
String sourcePath = parser.get("sourcePath");
|
||||
log.info("sourcePath: {}", sourcePath);
|
||||
|
||||
String preparedInfoPath = parser.get("preparedInfoPath");
|
||||
log.info("preparedInfoPath: {}", preparedInfoPath);
|
||||
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
String workingPath = parser.get("workingPath");
|
||||
log.info("workingPath: {}", workingPath);
|
||||
|
||||
final String resultClassName = parser.get("resultTableName");
|
||||
log.info("resultTableName: {}", resultClassName);
|
||||
|
||||
final String resultType = resultClassName.substring(resultClassName.lastIndexOf(".") + 1).toLowerCase();
|
||||
log.info("resultType: {}", resultType);
|
||||
|
||||
|
||||
|
||||
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
|
@ -63,12 +67,12 @@ public class SparkCountryPropagationJob {
|
|||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
removeOutputDir(spark, outputPath);
|
||||
removeOutputDir(spark, workingPath + "/" + resultType);
|
||||
execPropagation(
|
||||
spark,
|
||||
sourcePath,
|
||||
preparedInfoPath,
|
||||
outputPath,
|
||||
workingPath,
|
||||
resultType,
|
||||
resultClazz);
|
||||
});
|
||||
}
|
||||
|
@ -76,18 +80,15 @@ public class SparkCountryPropagationJob {
|
|||
private static <R extends Result> void execPropagation(
|
||||
SparkSession spark,
|
||||
String sourcePath,
|
||||
String preparedInfoPath,
|
||||
String outputPath,
|
||||
String workingPath,
|
||||
String resultType,
|
||||
Class<R> resultClazz) {
|
||||
|
||||
log.info("Reading Graph table from: {}", sourcePath);
|
||||
Dataset<R> res = readPath(spark, sourcePath, resultClazz);
|
||||
|
||||
log.info("Reading prepared info: {}", preparedInfoPath);
|
||||
Dataset<ResultCountrySet> prepared = spark
|
||||
.read()
|
||||
.json(preparedInfoPath)
|
||||
.as(Encoders.bean(ResultCountrySet.class));
|
||||
log.info("Reading prepared info: {}", workingPath + "/preparedInfo/" + resultType);
|
||||
Dataset<ResultCountrySet> prepared = readPath(spark, workingPath + "/preparedInfo/" + resultType, ResultCountrySet.class);
|
||||
|
||||
res
|
||||
.joinWith(prepared, res.col("id").equalTo(prepared.col("resultId")), "left_outer")
|
||||
|
@ -95,7 +96,13 @@ public class SparkCountryPropagationJob {
|
|||
.write()
|
||||
.option("compression", "gzip")
|
||||
.mode(SaveMode.Overwrite)
|
||||
.json(outputPath);
|
||||
.json(workingPath + "/" + resultType);
|
||||
|
||||
readPath(spark, workingPath + "/" + resultType, resultClazz)
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(sourcePath);
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.countrypropagation;
|
||||
package eu.dnetlib.dhp.countrypropagation.pojo;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.countrypropagation;
|
||||
package eu.dnetlib.dhp.countrypropagation.pojo;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
|
@ -1,5 +1,7 @@
|
|||
|
||||
package eu.dnetlib.dhp.countrypropagation;
|
||||
package eu.dnetlib.dhp.countrypropagation.pojo;
|
||||
|
||||
import eu.dnetlib.dhp.countrypropagation.pojo.CountrySbs;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
|
@ -1,7 +1,9 @@
|
|||
|
||||
package eu.dnetlib.dhp.orcidtoresultfromsemrel;
|
||||
|
||||
public class AutoritativeAuthor {
|
||||
import java.io.Serializable;
|
||||
|
||||
public class AutoritativeAuthor implements Serializable {
|
||||
|
||||
private String name;
|
||||
private String surname;
|
||||
|
@ -40,4 +42,13 @@ public class AutoritativeAuthor {
|
|||
this.orcid = orcid;
|
||||
}
|
||||
|
||||
public static AutoritativeAuthor newInstance(String name, String surname, String fullname, String orcid) {
|
||||
AutoritativeAuthor aa = new AutoritativeAuthor();
|
||||
aa.name = name;
|
||||
aa.surname = surname;
|
||||
aa.fullname = fullname;
|
||||
aa.orcid = orcid;
|
||||
return aa;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
|
||||
package eu.dnetlib.dhp.orcidtoresultfromsemrel;
|
||||
|
||||
import static eu.dnetlib.dhp.PropagationConstant.*;
|
||||
import static eu.dnetlib.dhp.PropagationConstant.readPath;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class PrepareResultOrcidAssociationStep0 implements Serializable {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(PrepareResultOrcidAssociationStep0.class);
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConf = IOUtils
|
||||
.toString(
|
||||
PrepareResultOrcidAssociationStep0.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult0_parameters.json"));
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConf);
|
||||
parser.parseArgument(args);
|
||||
|
||||
Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
String inputPath = parser.get("sourcePath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
||||
final List<String> allowedsemrel = Arrays
|
||||
.stream(parser.get("allowedsemrels").split(";"))
|
||||
.map(s -> s.toLowerCase())
|
||||
.collect(Collectors.toList());
|
||||
|
||||
log.info("allowedSemRel: {}", new Gson().toJson(allowedsemrel));
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
|
||||
selectRelations(
|
||||
spark, inputPath, outputPath, allowedsemrel);
|
||||
});
|
||||
}
|
||||
|
||||
private static void selectRelations(SparkSession spark, String inputPath, String outputPath,
|
||||
List<String> allowedsemrel) {
|
||||
|
||||
readPath(spark, inputPath, Relation.class)
|
||||
.filter(
|
||||
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference()
|
||||
&& allowedsemrel.contains(r.getRelClass().toLowerCase()))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath);
|
||||
}
|
||||
|
||||
}
|
|
@ -2,26 +2,37 @@
|
|||
package eu.dnetlib.dhp.orcidtoresultfromsemrel;
|
||||
|
||||
import static eu.dnetlib.dhp.PropagationConstant.*;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
import org.apache.spark.api.java.function.ForeachFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.apache.spark.sql.sources.v2.reader.InputPartition;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.gson.Gson;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class PrepareResultOrcidAssociationStep1 {
|
||||
private static final Logger log = LoggerFactory.getLogger(PrepareResultOrcidAssociationStep1.class);
|
||||
|
@ -42,83 +53,112 @@ public class PrepareResultOrcidAssociationStep1 {
|
|||
String inputPath = parser.get("sourcePath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
|
||||
final String outputPath = parser.get("outputPath");
|
||||
final String outputPath = parser.get("workingPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
||||
final String resultClassName = parser.get("resultTableName");
|
||||
log.info("resultTableName: {}", resultClassName);
|
||||
|
||||
final List<String> allowedsemrel = Arrays.asList(parser.get("allowedsemrels").split(";"));
|
||||
final List<String> allowedsemrel = Arrays
|
||||
.stream(parser.get("allowedsemrels").split(";"))
|
||||
.map(s -> s.toLowerCase())
|
||||
.collect(Collectors.toList());
|
||||
|
||||
log.info("allowedSemRel: {}", new Gson().toJson(allowedsemrel));
|
||||
|
||||
final List<String> allowedPids = Arrays.asList(parser.get("allowedpids").split(";"));
|
||||
log.info("allowedPids: {}", new Gson().toJson(allowedPids));
|
||||
|
||||
final String resultType = resultClassName.substring(resultClassName.lastIndexOf(".") + 1).toLowerCase();
|
||||
log.info("resultType: {}", resultType);
|
||||
|
||||
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
|
||||
|
||||
String inputRelationPath = inputPath + "/relation";
|
||||
log.info("inputRelationPath: {}", inputRelationPath);
|
||||
|
||||
String inputResultPath = inputPath + "/" + resultType;
|
||||
log.info("inputResultPath: {}", inputResultPath);
|
||||
|
||||
String outputResultPath = outputPath + "/" + resultType;
|
||||
log.info("outputResultPath: {}", outputResultPath);
|
||||
|
||||
runWithSparkHiveSession(
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
removeOutputDir(spark, outputPath);
|
||||
// removeOutputDir(spark, outputPath);
|
||||
prepareInfo(
|
||||
spark, inputRelationPath, inputResultPath, outputResultPath, resultClazz, allowedsemrel);
|
||||
spark, inputPath, outputPath, resultType, resultClazz, allowedsemrel, allowedPids);
|
||||
});
|
||||
}
|
||||
|
||||
private static <R extends Result> void prepareInfo(
|
||||
SparkSession spark,
|
||||
String inputRelationPath,
|
||||
String inputResultPath,
|
||||
String outputResultPath,
|
||||
String inputPath,
|
||||
String outputPath,
|
||||
String resultType,
|
||||
Class<R> resultClazz,
|
||||
List<String> allowedsemrel) {
|
||||
List<String> allowedsemrel,
|
||||
List<String> allowedPids) {
|
||||
|
||||
Dataset<Relation> relation = readPath(spark, inputRelationPath, Relation.class);
|
||||
relation.createOrReplaceTempView("relation");
|
||||
final String inputResultPath = inputPath + "/" + resultType;
|
||||
|
||||
Dataset<Relation> relation = readPath(spark, outputPath + "/relationSubset", Relation.class);
|
||||
|
||||
log.info("Reading Graph table from: {}", inputResultPath);
|
||||
Dataset<R> result = readPath(spark, inputResultPath, resultClazz);
|
||||
result.createOrReplaceTempView("result");
|
||||
|
||||
String query = "SELECT target resultId, author authorList"
|
||||
+ " FROM (SELECT id, collect_set(named_struct('name', name, 'surname', surname, 'fullname', fullname, 'orcid', orcid)) author "
|
||||
+ " FROM ( "
|
||||
+ " SELECT DISTINCT id, MyT.fullname, MyT.name, MyT.surname, MyP.value orcid "
|
||||
+ " FROM result "
|
||||
+ " LATERAL VIEW EXPLODE (author) a AS MyT "
|
||||
+ " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP "
|
||||
+ " WHERE lower(MyP.qualifier.classid) = '" + ModelConstants.ORCID + "' or "
|
||||
+ " lower(MyP.qualifier.classid) = '" + ModelConstants.ORCID_PENDING + "') tmp "
|
||||
+ " GROUP BY id) r_t "
|
||||
+ " JOIN ("
|
||||
+ " SELECT source, target "
|
||||
+ " FROM relation "
|
||||
+ " WHERE datainfo.deletedbyinference = false "
|
||||
+ getConstraintList(" lower(relclass) = '", allowedsemrel)
|
||||
+ " ) rel_rel "
|
||||
+ " ON source = id";
|
||||
final String resultOutputPath = outputPath + "/resultSubset/" + resultType;
|
||||
|
||||
log.info("executedQuery: {}", query);
|
||||
spark
|
||||
.sql(query)
|
||||
.as(Encoders.bean(ResultOrcidList.class))
|
||||
readPath(spark, inputResultPath, resultClazz)
|
||||
.filter(
|
||||
(FilterFunction<R>) r -> !r.getDataInfo().getDeletedbyinference() && !r.getDataInfo().getInvisible())
|
||||
.filter(
|
||||
(FilterFunction<R>) r -> Optional
|
||||
.ofNullable(r.getAuthor())
|
||||
.map(
|
||||
al -> al
|
||||
.stream()
|
||||
.anyMatch(
|
||||
a -> hasAllowedPid(a, allowedPids)))
|
||||
.orElse(false)
|
||||
|
||||
)
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(resultOutputPath);
|
||||
|
||||
Dataset<R> result = readPath(spark, resultOutputPath, resultClazz);
|
||||
|
||||
// result.foreach((ForeachFunction<R>) r -> System.out.println(new ObjectMapper().writeValueAsString(r)));
|
||||
|
||||
result
|
||||
.joinWith(relation, result.col("id").equalTo(relation.col("source")))
|
||||
.map((MapFunction<Tuple2<R, Relation>, ResultOrcidList>) t2 -> {
|
||||
ResultOrcidList rol = new ResultOrcidList();
|
||||
rol.setResultId(t2._2().getTarget());
|
||||
List<AutoritativeAuthor> aal = new ArrayList<>();
|
||||
t2._1().getAuthor().stream().forEach(a -> {
|
||||
a.getPid().stream().forEach(p -> {
|
||||
if (allowedPids.contains(p.getQualifier().getClassid().toLowerCase())) {
|
||||
aal
|
||||
.add(
|
||||
AutoritativeAuthor
|
||||
.newInstance(a.getName(), a.getSurname(), a.getFullname(), p.getValue()));
|
||||
}
|
||||
});
|
||||
});
|
||||
rol.setAuthorList(aal);
|
||||
return rol;
|
||||
}, Encoders.bean(ResultOrcidList.class))
|
||||
.write()
|
||||
.option("compression", "gzip")
|
||||
.mode(SaveMode.Overwrite)
|
||||
.json(outputResultPath);
|
||||
.json(outputPath + "/" + resultType);
|
||||
|
||||
}
|
||||
|
||||
private static boolean hasAllowedPid(Author a, List<String> allowedPids) {
|
||||
Optional<List<StructuredProperty>> oPid = Optional.ofNullable(a.getPid());
|
||||
if (!oPid.isPresent()) {
|
||||
return false;
|
||||
}
|
||||
return oPid.get().stream().anyMatch(p -> allowedPids.contains(p.getQualifier().getClassid().toLowerCase()));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -10,6 +10,8 @@ import java.util.Set;
|
|||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.io.compress.GzipCodec;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||
import org.apache.spark.sql.*;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -50,7 +52,7 @@ public class PrepareResultOrcidAssociationStep2 {
|
|||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
removeOutputDir(spark, outputPath);
|
||||
// removeOutputDir(spark, outputPath);
|
||||
mergeInfo(spark, inputPath, outputPath);
|
||||
});
|
||||
}
|
||||
|
@ -63,33 +65,31 @@ public class PrepareResultOrcidAssociationStep2 {
|
|||
.union(readPath(spark, inputPath + "/software", ResultOrcidList.class));
|
||||
|
||||
resultOrcidAssoc
|
||||
.toJavaRDD()
|
||||
.mapToPair(r -> new Tuple2<>(r.getResultId(), r))
|
||||
.reduceByKey(
|
||||
(a, b) -> {
|
||||
if (a == null) {
|
||||
return b;
|
||||
}
|
||||
if (b == null) {
|
||||
return a;
|
||||
}
|
||||
.groupByKey((MapFunction<ResultOrcidList, String>) rol -> rol.getResultId(), Encoders.STRING())
|
||||
.mapGroups((MapGroupsFunction<String, ResultOrcidList, ResultOrcidList>) (k, it) -> {
|
||||
ResultOrcidList resultOrcidList = it.next();
|
||||
if (it.hasNext()) {
|
||||
Set<String> orcid_set = new HashSet<>();
|
||||
a.getAuthorList().stream().forEach(aa -> orcid_set.add(aa.getOrcid()));
|
||||
b
|
||||
.getAuthorList()
|
||||
.stream()
|
||||
.forEach(
|
||||
aa -> {
|
||||
if (!orcid_set.contains(aa.getOrcid())) {
|
||||
a.getAuthorList().add(aa);
|
||||
orcid_set.add(aa.getOrcid());
|
||||
}
|
||||
});
|
||||
return a;
|
||||
})
|
||||
.map(Tuple2::_2)
|
||||
.map(r -> OBJECT_MAPPER.writeValueAsString(r))
|
||||
.saveAsTextFile(outputPath, GzipCodec.class);
|
||||
resultOrcidList.getAuthorList().stream().forEach(aa -> orcid_set.add(aa.getOrcid()));
|
||||
it
|
||||
.forEachRemaining(
|
||||
val -> val
|
||||
.getAuthorList()
|
||||
.stream()
|
||||
.forEach(
|
||||
aa -> {
|
||||
if (!orcid_set.contains(aa.getOrcid())) {
|
||||
resultOrcidList.getAuthorList().add(aa);
|
||||
orcid_set.add(aa.getOrcid());
|
||||
}
|
||||
}));
|
||||
}
|
||||
return resultOrcidList;
|
||||
}, Encoders.bean(ResultOrcidList.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
package eu.dnetlib.dhp.orcidtoresultfromsemrel;
|
||||
|
||||
import static eu.dnetlib.dhp.PropagationConstant.*;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
@ -56,25 +56,16 @@ public class SparkOrcidToResultFromSemRelJob {
|
|||
final String resultClassName = parser.get("resultTableName");
|
||||
log.info("resultTableName: {}", resultClassName);
|
||||
|
||||
final Boolean saveGraph = Optional
|
||||
.ofNullable(parser.get("saveGraph"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
log.info("saveGraph: {}", saveGraph);
|
||||
|
||||
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
|
||||
|
||||
runWithSparkHiveSession(
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
removeOutputDir(spark, outputPath);
|
||||
if (saveGraph) {
|
||||
execPropagation(spark, possibleUpdates, inputPath, outputPath, resultClazz);
|
||||
}
|
||||
execPropagation(spark, possibleUpdates, inputPath, outputPath, resultClazz);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -51,8 +51,7 @@ public class SparkResultToProjectThroughSemRelJob {
|
|||
final String alreadyLinkedPath = parser.get("alreadyLinkedPath");
|
||||
log.info("alreadyLinkedPath {}: ", alreadyLinkedPath);
|
||||
|
||||
final Boolean saveGraph = Boolean.valueOf(parser.get("saveGraph"));
|
||||
log.info("saveGraph: {}", saveGraph);
|
||||
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
|
||||
|
@ -60,11 +59,9 @@ public class SparkResultToProjectThroughSemRelJob {
|
|||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
if (isTest(parser)) {
|
||||
removeOutputDir(spark, outputPath);
|
||||
}
|
||||
|
||||
execPropagation(
|
||||
spark, outputPath, alreadyLinkedPath, potentialUpdatePath, saveGraph);
|
||||
spark, outputPath, alreadyLinkedPath, potentialUpdatePath);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -72,13 +69,12 @@ public class SparkResultToProjectThroughSemRelJob {
|
|||
SparkSession spark,
|
||||
String outputPath,
|
||||
String alreadyLinkedPath,
|
||||
String potentialUpdatePath,
|
||||
Boolean saveGraph) {
|
||||
String potentialUpdatePath) {
|
||||
|
||||
Dataset<ResultProjectSet> toaddrelations = readPath(spark, potentialUpdatePath, ResultProjectSet.class);
|
||||
Dataset<ResultProjectSet> alreadyLinked = readPath(spark, alreadyLinkedPath, ResultProjectSet.class);
|
||||
|
||||
if (saveGraph) {
|
||||
|
||||
toaddrelations
|
||||
.joinWith(
|
||||
alreadyLinked,
|
||||
|
@ -89,7 +85,7 @@ public class SparkResultToProjectThroughSemRelJob {
|
|||
.mode(SaveMode.Append)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static FlatMapFunction<Tuple2<ResultProjectSet, ResultProjectSet>, Relation> mapRelationRn() {
|
||||
|
|
|
@ -56,11 +56,7 @@ public class SparkResultToCommunityFromOrganizationJob {
|
|||
final String resultClassName = parser.get("resultTableName");
|
||||
log.info("resultTableName: {}", resultClassName);
|
||||
|
||||
final Boolean saveGraph = Optional
|
||||
.ofNullable(parser.get("saveGraph"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
log.info("saveGraph: {}", saveGraph);
|
||||
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
||||
|
@ -72,10 +68,9 @@ public class SparkResultToCommunityFromOrganizationJob {
|
|||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
removeOutputDir(spark, outputPath);
|
||||
if (saveGraph) {
|
||||
|
||||
execPropagation(spark, inputPath, outputPath, resultClazz, possibleupdatespath);
|
||||
}
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -99,6 +94,12 @@ public class SparkResultToCommunityFromOrganizationJob {
|
|||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath);
|
||||
|
||||
readPath(spark, outputPath, resultClazz)
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(inputPath);
|
||||
}
|
||||
|
||||
private static <R extends Result> MapFunction<Tuple2<R, ResultCommunityList>, R> resultCommunityFn() {
|
||||
|
|
|
@ -70,13 +70,10 @@ public class SparkResultToCommunityThroughSemRelJob {
|
|||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
if (isTest(parser)) {
|
||||
removeOutputDir(spark, outputPath);
|
||||
}
|
||||
if (saveGraph) {
|
||||
|
||||
execPropagation(
|
||||
spark, inputPath, outputPath, preparedInfoPath, resultClazz);
|
||||
}
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -100,6 +97,12 @@ public class SparkResultToCommunityThroughSemRelJob {
|
|||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath);
|
||||
|
||||
readPath(spark, outputPath, resultClazz)
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(inputPath);
|
||||
}
|
||||
|
||||
private static <R extends Result> MapFunction<Tuple2<R, ResultCommunityList>, R> contextUpdaterFn() {
|
||||
|
|
|
@ -23,4 +23,11 @@ public class DatasourceOrganization implements Serializable {
|
|||
public void setOrganizationId(String organizationId) {
|
||||
this.organizationId = organizationId;
|
||||
}
|
||||
|
||||
public static DatasourceOrganization newInstance(String datasourceId, String organizationId) {
|
||||
DatasourceOrganization dso = new DatasourceOrganization();
|
||||
dso.datasourceId = datasourceId;
|
||||
dso.organizationId = organizationId;
|
||||
return dso;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,17 +2,17 @@
|
|||
package eu.dnetlib.dhp.resulttoorganizationfrominstrepo;
|
||||
|
||||
import static eu.dnetlib.dhp.PropagationConstant.*;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.io.compress.GzipCodec;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
|
@ -28,6 +28,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class PrepareResultInstRepoAssociation {
|
||||
|
||||
|
@ -49,14 +50,11 @@ public class PrepareResultInstRepoAssociation {
|
|||
Boolean isSparkSessionManaged = isSparkSessionManaged(parser);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
String inputPath = parser.get("sourcePath");
|
||||
final String inputPath = parser.get("sourcePath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
|
||||
final String datasourceOrganizationPath = parser.get("datasourceOrganizationPath");
|
||||
log.info("datasourceOrganizationPath {}: ", datasourceOrganizationPath);
|
||||
|
||||
final String alreadyLinkedPath = parser.get("alreadyLinkedPath");
|
||||
log.info("alreadyLinkedPath {}: ", alreadyLinkedPath);
|
||||
final String workingPath = parser.get("workingPath");
|
||||
log.info("workingPath: {}", workingPath);
|
||||
|
||||
List<String> blacklist = Optional
|
||||
.ofNullable(parser.get("blacklist"))
|
||||
|
@ -64,82 +62,92 @@ public class PrepareResultInstRepoAssociation {
|
|||
.orElse(new ArrayList<>());
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.set("hive.metastore.uris", parser.get("hive_metastore_uris"));
|
||||
|
||||
runWithSparkHiveSession(
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
readNeededResources(spark, inputPath);
|
||||
readNeededResources(spark, inputPath, workingPath, blacklist);
|
||||
|
||||
removeOutputDir(spark, datasourceOrganizationPath);
|
||||
prepareDatasourceOrganization(spark, datasourceOrganizationPath, blacklist);
|
||||
prepareDatasourceOrganization(spark, workingPath);
|
||||
|
||||
removeOutputDir(spark, alreadyLinkedPath);
|
||||
prepareAlreadyLinkedAssociation(spark, alreadyLinkedPath);
|
||||
prepareAlreadyLinkedAssociation(spark, workingPath);
|
||||
});
|
||||
}
|
||||
|
||||
private static void readNeededResources(SparkSession spark, String inputPath) {
|
||||
Dataset<Datasource> datasource = readPath(spark, inputPath + "/datasource", Datasource.class);
|
||||
datasource.createOrReplaceTempView("datasource");
|
||||
|
||||
Dataset<Relation> relation = readPath(spark, inputPath + "/relation", Relation.class);
|
||||
relation.createOrReplaceTempView("relation");
|
||||
|
||||
Dataset<Organization> organization = readPath(spark, inputPath + "/organization", Organization.class);
|
||||
organization.createOrReplaceTempView("organization");
|
||||
}
|
||||
|
||||
private static void prepareDatasourceOrganization(
|
||||
SparkSession spark, String datasourceOrganizationPath, List<String> blacklist) {
|
||||
|
||||
final String blacklisted = blacklist
|
||||
.stream()
|
||||
.map(s -> " AND id != '" + s + "'")
|
||||
.collect(Collectors.joining());
|
||||
|
||||
String query = "SELECT source datasourceId, target organizationId "
|
||||
+ "FROM ( SELECT id "
|
||||
+ "FROM datasource "
|
||||
+ "WHERE datasourcetype.classid = '"
|
||||
+ INSTITUTIONAL_REPO_TYPE
|
||||
+ "' "
|
||||
+ "AND datainfo.deletedbyinference = false " + blacklisted + " ) d "
|
||||
+ "JOIN ( SELECT source, target "
|
||||
+ "FROM relation "
|
||||
+ "WHERE lower(relclass) = '"
|
||||
+ ModelConstants.IS_PROVIDED_BY.toLowerCase()
|
||||
+ "' "
|
||||
+ "AND datainfo.deletedbyinference = false ) rel "
|
||||
+ "ON d.id = rel.source ";
|
||||
|
||||
spark
|
||||
.sql(query)
|
||||
.as(Encoders.bean(DatasourceOrganization.class))
|
||||
private static void readNeededResources(SparkSession spark, String inputPath, String workingPath,
|
||||
List<String> blacklist) {
|
||||
readPath(spark, inputPath + "/datasource", Datasource.class)
|
||||
.filter(
|
||||
(FilterFunction<Datasource>) ds -> !blacklist.contains(ds.getId()) &&
|
||||
!ds.getDataInfo().getDeletedbyinference() &&
|
||||
ds.getDatasourcetype().getClassid().equals(INSTITUTIONAL_REPO_TYPE))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(datasourceOrganizationPath);
|
||||
.json(workingPath + "/datasource");
|
||||
|
||||
readPath(spark, inputPath + "/relation", Relation.class)
|
||||
.filter(
|
||||
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
|
||||
(r.getRelClass().toLowerCase().equals(ModelConstants.IS_PROVIDED_BY.toLowerCase()) ||
|
||||
r.getRelClass().toLowerCase().equals(ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase())))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingPath + "/relation");
|
||||
}
|
||||
|
||||
private static void prepareDatasourceOrganization(
|
||||
SparkSession spark, String workingPath) {
|
||||
|
||||
Dataset<Datasource> datasource = readPath(spark, workingPath + "/datasource", Datasource.class);
|
||||
|
||||
Dataset<Relation> relation = readPath(spark, workingPath + "/relation", Relation.class)
|
||||
.filter(
|
||||
(FilterFunction<Relation>) r -> r
|
||||
.getRelClass()
|
||||
.toLowerCase()
|
||||
.equals(ModelConstants.IS_PROVIDED_BY.toLowerCase()));
|
||||
|
||||
datasource
|
||||
.joinWith(relation, datasource.col("id").equalTo(relation.col("source")))
|
||||
.map(
|
||||
(MapFunction<Tuple2<Datasource, Relation>, DatasourceOrganization>) t2 -> DatasourceOrganization
|
||||
.newInstance(t2._2().getSource(), t2._2().getTarget()),
|
||||
Encoders.bean(DatasourceOrganization.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingPath + "/ datasourceOrganization");
|
||||
;
|
||||
|
||||
}
|
||||
|
||||
private static void prepareAlreadyLinkedAssociation(
|
||||
SparkSession spark, String alreadyLinkedPath) {
|
||||
String query = "Select source key, collect_set(target) valueSet "
|
||||
+ "from relation "
|
||||
+ "where datainfo.deletedbyinference = false "
|
||||
+ "and lower(relClass) = '"
|
||||
+ ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase()
|
||||
+ "' "
|
||||
+ "group by source";
|
||||
SparkSession spark, String workingPath) {
|
||||
|
||||
readPath(spark, workingPath + "/relation", Relation.class)
|
||||
.filter(
|
||||
(FilterFunction<Relation>) r -> r
|
||||
.getRelClass()
|
||||
.toLowerCase()
|
||||
.equals(ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase()))
|
||||
.groupByKey((MapFunction<Relation, String>) r -> r.getSource(), Encoders.STRING())
|
||||
.mapGroups((MapGroupsFunction<String, Relation, KeyValueSet>) (k, it) -> {
|
||||
Set<String> values = new HashSet<>();
|
||||
KeyValueSet kvs = new KeyValueSet();
|
||||
kvs.setKey(k);
|
||||
values.add(it.next().getTarget());
|
||||
it.forEachRemaining(r -> values.add(r.getTarget()));
|
||||
kvs.setValueSet(new ArrayList<>(values));
|
||||
return kvs;
|
||||
}, Encoders.bean(KeyValueSet.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingPath + "/alreadyLinked");
|
||||
|
||||
spark
|
||||
.sql(query)
|
||||
.as(Encoders.bean(KeyValueSet.class))
|
||||
// TODO retry to stick with datasets
|
||||
.toJavaRDD()
|
||||
.map(r -> OBJECT_MAPPER.writeValueAsString(r))
|
||||
.saveAsTextFile(alreadyLinkedPath, GzipCodec.class);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -11,10 +11,7 @@ import java.util.Optional;
|
|||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.apache.spark.sql.*;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -71,12 +68,6 @@ public class SparkResultToOrganizationFromIstRepoJob {
|
|||
final String resultClassName = parser.get("resultTableName");
|
||||
log.info("resultTableName: {}", resultClassName);
|
||||
|
||||
final Boolean saveGraph = Optional
|
||||
.ofNullable(parser.get("saveGraph"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
log.info("saveGraph: {}", saveGraph);
|
||||
|
||||
Class<? extends Result> resultClazz = (Class<? extends Result>) Class.forName(resultClassName);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
|
@ -86,15 +77,15 @@ public class SparkResultToOrganizationFromIstRepoJob {
|
|||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
if (saveGraph) {
|
||||
execPropagation(
|
||||
spark,
|
||||
datasourceorganization,
|
||||
alreadylinked,
|
||||
inputPath,
|
||||
outputPath,
|
||||
resultClazz);
|
||||
}
|
||||
|
||||
execPropagation(
|
||||
spark,
|
||||
datasourceorganization,
|
||||
alreadylinked,
|
||||
inputPath,
|
||||
outputPath,
|
||||
resultClazz);
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -119,9 +110,15 @@ public class SparkResultToOrganizationFromIstRepoJob {
|
|||
"left_outer")
|
||||
.flatMap(createRelationFn(), Encoders.bean(Relation.class))
|
||||
.write()
|
||||
.mode(SaveMode.Append)
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath);
|
||||
|
||||
readPath(spark, outputPath, Relation.class)
|
||||
.write()
|
||||
.mode(SaveMode.Append)
|
||||
.option("compression", "gzip")
|
||||
.json(inputPath.substring(0, inputPath.lastIndexOf("/") + 1) + "relation");
|
||||
}
|
||||
|
||||
private static FlatMapFunction<Tuple2<KeyValueSet, KeyValueSet>, Relation> createRelationFn() {
|
||||
|
@ -159,8 +156,16 @@ public class SparkResultToOrganizationFromIstRepoJob {
|
|||
|
||||
Dataset<R> result = readPath(spark, inputPath, resultClazz);
|
||||
result.createOrReplaceTempView("result");
|
||||
createCfHbforResult(spark);
|
||||
|
||||
Dataset<Row> cfhb = spark
|
||||
.sql(
|
||||
"select distinct r.id, inst.collectedfrom.key cf, inst.hostedby.key hb "
|
||||
+
|
||||
"from result r " +
|
||||
"lateral view explode(instance) i as inst " +
|
||||
"where r.datainfo.deletedbyinference=false");
|
||||
// createCfHbforResult(spark);
|
||||
cfhb.createOrReplaceTempView("cfhb");
|
||||
dsOrg.createOrReplaceTempView("rels");
|
||||
|
||||
return spark
|
||||
|
|
|
@ -98,13 +98,13 @@ public class SparkResultToOrganizationFromSemRel implements Serializable {
|
|||
String leavesPath,
|
||||
String childParentPath,
|
||||
String resultOrganizationPath,
|
||||
String graphPath,
|
||||
String relationPath,
|
||||
String workingPath,
|
||||
String outputPath,
|
||||
int iterations) {
|
||||
if (iterations == 1) {
|
||||
doPropagateOnce(
|
||||
spark, leavesPath, childParentPath, resultOrganizationPath, graphPath,
|
||||
spark, leavesPath, childParentPath, resultOrganizationPath, relationPath,
|
||||
workingPath, outputPath);
|
||||
} else {
|
||||
|
||||
|
@ -123,26 +123,26 @@ public class SparkResultToOrganizationFromSemRel implements Serializable {
|
|||
notReachedFirstParent);
|
||||
|
||||
doPropagate(
|
||||
spark, leavesPath, childParentPath, resultOrganizationPath, graphPath,
|
||||
spark, leavesPath, childParentPath, resultOrganizationPath, relationPath,
|
||||
workingPath, outputPath, propagationCounter);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static void doPropagateOnce(SparkSession spark, String leavesPath, String childParentPath,
|
||||
String resultOrganizationPath, String graphPath, String workingPath,
|
||||
String resultOrganizationPath, String relationPath, String workingPath,
|
||||
String outputPath) {
|
||||
|
||||
StepActions
|
||||
.execStep(
|
||||
spark, graphPath, workingPath + NEW_RELATION_PATH,
|
||||
spark, relationPath, workingPath + NEW_RELATION_PATH,
|
||||
leavesPath, childParentPath, resultOrganizationPath);
|
||||
|
||||
addNewRelations(spark, workingPath + NEW_RELATION_PATH, outputPath);
|
||||
}
|
||||
|
||||
private static void doPropagate(SparkSession spark, String leavesPath, String childParentPath,
|
||||
String resultOrganizationPath, String graphPath, String workingPath, String outputPath,
|
||||
String resultOrganizationPath, String relationPath, String workingPath, String outputPath,
|
||||
PropagationCounter propagationCounter) {
|
||||
int iteration = 0;
|
||||
long leavesCount;
|
||||
|
@ -151,7 +151,7 @@ public class SparkResultToOrganizationFromSemRel implements Serializable {
|
|||
iteration++;
|
||||
StepActions
|
||||
.execStep(
|
||||
spark, graphPath, workingPath + NEW_RELATION_PATH,
|
||||
spark, relationPath, workingPath + NEW_RELATION_PATH,
|
||||
leavesPath, childParentPath, resultOrganizationPath);
|
||||
StepActions
|
||||
.prepareForNextStep(
|
||||
|
@ -225,7 +225,6 @@ public class SparkResultToOrganizationFromSemRel implements Serializable {
|
|||
|
||||
, Encoders.bean(Relation.class))
|
||||
.write()
|
||||
|
||||
.mode(SaveMode.Append)
|
||||
.option("compression", "gzip")
|
||||
.json(outputPath);
|
||||
|
|
|
@ -27,10 +27,10 @@ import scala.Tuple2;
|
|||
public class StepActions implements Serializable {
|
||||
|
||||
public static void execStep(SparkSession spark,
|
||||
String graphPath, String newRelationPath,
|
||||
String relationPath, String newRelationPath,
|
||||
String leavesPath, String chldParentOrgPath, String resultOrgPath) {
|
||||
|
||||
Dataset<Relation> relationGraph = readPath(spark, graphPath, Relation.class);
|
||||
Dataset<Relation> relationGraph = readPath(spark, relationPath, Relation.class);
|
||||
// select only the relation source target among those proposed by propagation that are not already existent
|
||||
getNewRels(
|
||||
newRelationPath, relationGraph,
|
||||
|
|
|
@ -29,6 +29,13 @@
|
|||
"paramLongName": "isSparkSessionManaged",
|
||||
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
|
||||
"paramName": "rt",
|
||||
"paramLongName": "resultType",
|
||||
"paramDescription": "the result type",
|
||||
"paramRequired": true
|
||||
}
|
||||
|
||||
]
|
|
@ -11,17 +11,12 @@
|
|||
"paramDescription": "the name of the result table we are currently working on",
|
||||
"paramRequired": true
|
||||
},
|
||||
|
||||
{
|
||||
"paramName": "out",
|
||||
"paramLongName": "outputPath",
|
||||
"paramDescription": "the path used to store temporary output files",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "p",
|
||||
"paramLongName": "preparedInfoPath",
|
||||
"paramName": "wp",
|
||||
"paramLongName": "workingPath",
|
||||
"paramDescription": "the path where prepared info have been stored",
|
||||
"paramRequired": false
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "ssm",
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "out",
|
||||
"paramLongName": "outputPath",
|
||||
"paramName": "wp",
|
||||
"paramLongName": "workingPath",
|
||||
"paramDescription": "the path used to store temporary output files",
|
||||
"paramRequired": true
|
||||
},
|
||||
|
|
|
@ -5,12 +5,6 @@
|
|||
"paramDescription": "the path of the sequencial file to read",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName":"out",
|
||||
"paramLongName":"outputPath",
|
||||
"paramDescription": "the output path",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName":"w",
|
||||
"paramLongName":"workingPath",
|
||||
|
@ -23,12 +17,7 @@
|
|||
"paramDescription": "the name of the result table we are currently working on",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "p",
|
||||
"paramLongName": "preparedInfoPath",
|
||||
"paramDescription": "the path where prepared info have been stored",
|
||||
"paramRequired": true
|
||||
},
|
||||
|
||||
{
|
||||
"paramName": "ssm",
|
||||
"paramLongName": "isSparkSessionManaged",
|
||||
|
|
|
@ -5,12 +5,6 @@
|
|||
"paramDescription": "the path of the sequencial file to read",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName":"sg",
|
||||
"paramLongName":"saveGraph",
|
||||
"paramDescription": "true if the new version of the graph must be saved",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName":"h",
|
||||
"paramLongName":"hive_metastore_uris",
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
[
|
||||
{
|
||||
"paramName":"s",
|
||||
"paramLongName":"sourcePath",
|
||||
"paramDescription": "the path of the sequencial file to read",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName":"as",
|
||||
"paramLongName":"allowedsemrels",
|
||||
"paramDescription": "the allowed sematinc relations for propagation",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "out",
|
||||
"paramLongName": "outputPath",
|
||||
"paramDescription": "the path used to store temporary output files",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "ssm",
|
||||
"paramLongName": "isSparkSessionManaged",
|
||||
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||
"paramRequired": false
|
||||
}
|
||||
]
|
|
@ -12,14 +12,14 @@
|
|||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName":"h",
|
||||
"paramLongName":"hive_metastore_uris",
|
||||
"paramDescription": "the hive metastore uris",
|
||||
"paramName":"ap",
|
||||
"paramLongName":"allowedpids",
|
||||
"paramDescription": "the allowed pid type to be used for propagation",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "out",
|
||||
"paramLongName": "outputPath",
|
||||
"paramName": "wp",
|
||||
"paramLongName": "workingPath",
|
||||
"paramDescription": "the path used to store temporary output files",
|
||||
"paramRequired": true
|
||||
},
|
||||
|
|
|
@ -1,184 +0,0 @@
|
|||
<workflow-app name="project_to_result_propagation" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<description>the source path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>allowedsemrels</name>
|
||||
<description>the allowed semantics </description>
|
||||
</property>
|
||||
<property>
|
||||
<name>outputPath</name>
|
||||
<description>the output path</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<global>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>${oozieActionShareLibForSpark2}</value>
|
||||
</property>
|
||||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="reset_outputpath"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="reset_outputpath">
|
||||
<fs>
|
||||
<delete path="${outputPath}"/>
|
||||
<mkdir path="${outputPath}"/>
|
||||
</fs>
|
||||
<ok to="copy_entities"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<fork name="copy_entities">
|
||||
<path start="copy_relation"/>
|
||||
<path start="copy_publication"/>
|
||||
<path start="copy_dataset"/>
|
||||
<path start="copy_orp"/>
|
||||
<path start="copy_software"/>
|
||||
<path start="copy_organization"/>
|
||||
<path start="copy_projects"/>
|
||||
<path start="copy_datasources"/>
|
||||
</fork>
|
||||
|
||||
<action name="copy_relation">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/relation</arg>
|
||||
<arg>${nameNode}/${outputPath}/relation</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_publication">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/publication</arg>
|
||||
<arg>${nameNode}/${outputPath}/publication</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_dataset">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/dataset</arg>
|
||||
<arg>${nameNode}/${outputPath}/dataset</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_orp">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/otherresearchproduct</arg>
|
||||
<arg>${nameNode}/${outputPath}/otherresearchproduct</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_software">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/software</arg>
|
||||
<arg>${nameNode}/${outputPath}/software</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_organization">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/organization</arg>
|
||||
<arg>${nameNode}/${outputPath}/organization</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_projects">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/project</arg>
|
||||
<arg>${nameNode}/${outputPath}/project</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_datasources">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/datasource</arg>
|
||||
<arg>${nameNode}/${outputPath}/datasource</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="wait" to="prepare_project_results_association"/>
|
||||
|
||||
<action name="prepare_project_results_association">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>PrepareProjectResultsAssociation</name>
|
||||
<class>eu.dnetlib.dhp.projecttoresult.PrepareProjectResultsAssociation</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
|
||||
<arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--potentialUpdatePath</arg><arg>${workingDir}/preparedInfo/potentialUpdates</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||
</spark>
|
||||
<ok to="apply_propagation"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="apply_propagation">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>ProjectToResultPropagation</name>
|
||||
<class>eu.dnetlib.dhp.projecttoresult.SparkResultToProjectThroughSemRelJob</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.dynamicAllocation.enabled=true
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/relation</arg>
|
||||
<arg>--potentialUpdatePath</arg><arg>${workingDir}/preparedInfo/potentialUpdates</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
|
||||
</workflow-app>
|
|
@ -6,21 +6,9 @@
|
|||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName":"h",
|
||||
"paramLongName":"hive_metastore_uris",
|
||||
"paramDescription": "the hive metastore uris",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName":"dop",
|
||||
"paramLongName":"datasourceOrganizationPath",
|
||||
"paramDescription": "path where to store/find association from datasource and organization",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName":"alp",
|
||||
"paramLongName":"alreadyLinkedPath",
|
||||
"paramDescription": "path where to store/find already linked results and organizations",
|
||||
"paramName":"wp",
|
||||
"paramLongName":"workingPath",
|
||||
"paramDescription": "path where to store/find prepared/ filtered data",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
|
|
|
@ -1,195 +0,0 @@
|
|||
<workflow-app name="affiliation_from_semrel_propagation" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<description>the source path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>outputPath</name>
|
||||
<description>sets the outputPath</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<global>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>${oozieActionShareLibForSpark2}</value>
|
||||
</property>
|
||||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="resume_from"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<decision name="resume_from">
|
||||
<switch>
|
||||
<case to="prepare_info">${wf:conf('resumeFrom') eq 'PrepareInfo'}</case>
|
||||
<default to="reset_outputpath"/> <!-- first action to be done when downloadDump is to be performed -->
|
||||
</switch>
|
||||
</decision>
|
||||
|
||||
<action name="reset_outputpath">
|
||||
<fs>
|
||||
<delete path="${outputPath}"/>
|
||||
<mkdir path="${outputPath}"/>
|
||||
</fs>
|
||||
<ok to="copy_entities"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<fork name="copy_entities">
|
||||
<path start="copy_relation"/>
|
||||
<path start="copy_publication"/>
|
||||
<path start="copy_dataset"/>
|
||||
<path start="copy_orp"/>
|
||||
<path start="copy_software"/>
|
||||
<path start="copy_organization"/>
|
||||
<path start="copy_projects"/>
|
||||
<path start="copy_datasources"/>
|
||||
</fork>
|
||||
|
||||
<action name="copy_relation">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/relation</arg>
|
||||
<arg>${nameNode}/${outputPath}/relation</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_publication">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/publication</arg>
|
||||
<arg>${nameNode}/${outputPath}/publication</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_dataset">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/dataset</arg>
|
||||
<arg>${nameNode}/${outputPath}/dataset</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_orp">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/otherresearchproduct</arg>
|
||||
<arg>${nameNode}/${outputPath}/otherresearchproduct</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_software">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/software</arg>
|
||||
<arg>${nameNode}/${outputPath}/software</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_organization">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/organization</arg>
|
||||
<arg>${nameNode}/${outputPath}/organization</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_projects">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/project</arg>
|
||||
<arg>${nameNode}/${outputPath}/project</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_datasources">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/datasource</arg>
|
||||
<arg>${nameNode}/${outputPath}/datasource</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="wait" to="prepare_info"/>
|
||||
|
||||
|
||||
<action name="prepare_info">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>PrepareResultOrganizationAssociation</name>
|
||||
<class>eu.dnetlib.dhp.resulttoorganizationfromsemrel.PrepareInfo</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--graphPath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--leavesPath</arg><arg>${workingDir}/preparedInfo/leavesPath</arg>
|
||||
<arg>--childParentPath</arg><arg>${workingDir}/preparedInfo/childParentPath</arg>
|
||||
<arg>--resultOrgPath</arg><arg>${workingDir}/preparedInfo/resultOrgPath</arg>
|
||||
<arg>--relationPath</arg><arg>${workingDir}/preparedInfo/relation</arg>
|
||||
</spark>
|
||||
<ok to="apply_resulttoorganization_propagation"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="apply_resulttoorganization_propagation">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>resultToOrganizationFromSemRel</name>
|
||||
<class>eu.dnetlib.dhp.resulttoorganizationfromsemrel.SparkResultToOrganizationFromSemRel</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.dynamicAllocation.enabled=true
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--relationPath</arg><arg>${workingDir}/preparedInfo/relation</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/relation</arg>
|
||||
<arg>--leavesPath</arg><arg>${workingDir}/preparedInfo/leavesPath</arg>
|
||||
<arg>--childParentPath</arg><arg>${workingDir}/preparedInfo/childParentPath</arg>
|
||||
<arg>--resultOrgPath</arg><arg>${workingDir}/preparedInfo/resultOrgPath</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--workingDir</arg><arg>${workingDir}/working</arg>
|
||||
<arg>--iterations</arg><arg>${iterations}</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
|
||||
<end name="End"/>
|
||||
|
||||
</workflow-app>
|
|
@ -0,0 +1,30 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveMetastoreUris</name>
|
||||
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveJdbcUrl</name>
|
||||
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveDbName</name>
|
||||
<value>openaire</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,9 @@
|
|||
## This is a classpath-based import file (this header is required)
|
||||
orcid_propagation classpath eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app
|
||||
bulk_tagging classpath eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app
|
||||
affiliation_inst_repo classpath eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app
|
||||
affiliation_semantic_relation classpath eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfromsemrel/oozie_app
|
||||
community_organization classpath eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app
|
||||
result_project classpath eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app
|
||||
community_sem_rel classpath eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app
|
||||
country_propagation classpath eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app
|
|
@ -0,0 +1,312 @@
|
|||
<workflow-app name="enrichment_main" xmlns="uri:oozie:workflow:0.5">
|
||||
|
||||
<parameters>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<description>the source path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>allowedsemrelsorcidprop</name>
|
||||
<description>the semantic relationships allowed for propagation</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>allowedsemrelsresultproject</name>
|
||||
<description>the allowed semantics </description>
|
||||
</property>
|
||||
<property>
|
||||
<name>allowedsemrelscommunitysemrel</name>
|
||||
<description>the semantic relationships allowed for propagation</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>datasourceWhitelistForCountryPropagation</name>
|
||||
<description>the white list</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>allowedtypes</name>
|
||||
<description>the allowed types</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>outputPath</name>
|
||||
<description>the output path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>organizationtoresultcommunitymap</name>
|
||||
<description>organization community map</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>isLookUpUrl</name>
|
||||
<description>the isLookup service endpoint</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>pathMap</name>
|
||||
<description>the json path associated to each selection field</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>blacklist</name>
|
||||
<description>list of datasources in blacklist for the affiliation from instrepo propagation</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hiveDbName</name>
|
||||
<description>the target hive database name</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveJdbcUrl</name>
|
||||
<description>hive server jdbc url</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hiveMetastoreUris</name>
|
||||
<description>hive server metastore URIs</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorMemory</name>
|
||||
<description>memory for individual executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorCores</name>
|
||||
<description>number of cores used by single executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozieActionShareLibForSpark2</name>
|
||||
<description>oozie action sharelib for spark 2.*</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2ExtraListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||
<description>spark 2.* extra listeners classname</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2SqlQueryExecutionListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||
<description>spark 2.* sql query execution listeners classname</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2YarnHistoryServerAddress</name>
|
||||
<description>spark 2.* yarn history server address</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2EventLogDir</name>
|
||||
<description>spark 2.* event log dir location</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<global>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>mapreduce.job.queuename</name>
|
||||
<value>${queueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.launcher.mapred.job.queue.name</name>
|
||||
<value>${oozieLauncherQueueName}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>${oozieActionShareLibForSpark2}</value>
|
||||
</property>
|
||||
|
||||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="resumeFrom"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<decision name="resumeFrom">
|
||||
<switch>
|
||||
<case to="bulk_tagging">${wf:conf('resumeFrom') eq 'BulkTagging'}</case>
|
||||
<case to="affiliation_inst_repo">${wf:conf('resumeFrom') eq 'AffiliationInstitutionalRepository'}</case>
|
||||
<case to="affiliation_semantic_relation">${wf:conf('resumeFrom') eq 'AffiliationSemanticRelation'}</case>
|
||||
<case to="community_organization">${wf:conf('resumeFrom') eq 'CommunityOrganization'}</case>
|
||||
<case to="result_project">${wf:conf('resumeFrom') eq 'ResultProject'}</case>
|
||||
<case to="community_sem_rel">${wf:conf('resumeFrom') eq 'CommunitySemanticRelation'}</case>
|
||||
<case to="country_propagation">${wf:conf('resumeFrom') eq 'CountryPropagation'}</case>
|
||||
<default to="orcid_propagation"/>
|
||||
</switch>
|
||||
</decision>
|
||||
|
||||
|
||||
<action name="orcid_propagation">
|
||||
<sub-workflow>
|
||||
<app-path>${wf:appPath()}/orcid_propagation
|
||||
</app-path>
|
||||
<propagate-configuration/>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<value>${sourcePath}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>allowedsemrels</name>
|
||||
<value>${allowedsemrelsorcidprop}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>outputPath</name>
|
||||
<value>${outputPath}</value>
|
||||
</property>
|
||||
</configuration>
|
||||
</sub-workflow>
|
||||
<ok to="bulk_tagging" />
|
||||
<error to="Kill" />
|
||||
</action>
|
||||
|
||||
<action name="bulk_tagging">
|
||||
<sub-workflow>
|
||||
<app-path>${wf:appPath()}/bulk_tagging
|
||||
</app-path>
|
||||
<propagate-configuration/>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<value>${outputPath}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>isLookUpUrl</name>
|
||||
<value>${isLookUpUrl}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>pathMap</name>
|
||||
<value>${pathMap}</value>
|
||||
</property>
|
||||
</configuration>
|
||||
</sub-workflow>
|
||||
<ok to="affiliation_inst_repo" />
|
||||
<error to="Kill" />
|
||||
</action>
|
||||
|
||||
<action name="affiliation_inst_repo">
|
||||
<sub-workflow>
|
||||
<app-path>${wf:appPath()}/affiliation_inst_repo
|
||||
</app-path>
|
||||
<propagate-configuration/>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<value>${outputPath}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>outputPath</name>
|
||||
<value>${workingDir}/relations</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>blacklist</name>
|
||||
<value>${blacklist}</value>
|
||||
</property>
|
||||
</configuration>
|
||||
</sub-workflow>
|
||||
<ok to="affiliation_semantic_relation" />
|
||||
<error to="Kill" />
|
||||
</action>
|
||||
|
||||
<action name="affiliation_semantic_relation">
|
||||
<sub-workflow>
|
||||
<app-path>${wf:appPath()}/affiliation_semantic_relation
|
||||
</app-path>
|
||||
<propagate-configuration/>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<value>${outputPath}</value>
|
||||
</property>
|
||||
</configuration>
|
||||
</sub-workflow>
|
||||
<ok to="community_organization" />
|
||||
<error to="Kill" />
|
||||
</action>
|
||||
|
||||
<action name="community_organization">
|
||||
<sub-workflow>
|
||||
<app-path>${wf:appPath()}/community_organization
|
||||
</app-path>
|
||||
<propagate-configuration/>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<value>${outputPath}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>organizationtoresultcommunitymap</name>
|
||||
<value>${organizationtoresultcommunitymap}</value>
|
||||
</property>
|
||||
</configuration>
|
||||
</sub-workflow>
|
||||
<ok to="result_project" />
|
||||
<error to="Kill" />
|
||||
</action>
|
||||
|
||||
<action name="result_project">
|
||||
<sub-workflow>
|
||||
<app-path>${wf:appPath()}/result_project
|
||||
</app-path>
|
||||
<propagate-configuration/>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<value>${outputPath}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>allowedsemrels</name>
|
||||
<value>${allowedsemrelsresultproject}</value>
|
||||
</property>
|
||||
</configuration>
|
||||
</sub-workflow>
|
||||
<ok to="community_sem_rel" />
|
||||
<error to="Kill" />
|
||||
</action>
|
||||
|
||||
<action name="community_sem_rel">
|
||||
<sub-workflow>
|
||||
<app-path>${wf:appPath()}/community_sem_rel
|
||||
</app-path>
|
||||
<propagate-configuration/>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<value>${outputPath}</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>allowedsemrels</name>
|
||||
<value>${allowedsemrelscommunitysemrel}</value>
|
||||
</property>
|
||||
</configuration>
|
||||
</sub-workflow>
|
||||
<ok to="country_propagation" />
|
||||
<error to="Kill" />
|
||||
</action>
|
||||
|
||||
<action name="country_propagation">
|
||||
<sub-workflow>
|
||||
<app-path>${wf:appPath()}/country_propagation
|
||||
</app-path>
|
||||
<propagate-configuration/>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<value>${outputPath}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>whitelist</name>
|
||||
<value>${datasourceWhitelistForCountryPropagation}</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>allowedtypes</name>
|
||||
<value>${allowedtupes}</value>
|
||||
</property>
|
||||
</configuration>
|
||||
</sub-workflow>
|
||||
<ok to="End" />
|
||||
<error to="Kill" />
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
|
||||
</workflow-app>
|
|
@ -44,65 +44,12 @@
|
|||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="reset_outputpath"/>
|
||||
<start to="fork_exec_bulktag"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="reset_outputpath">
|
||||
<fs>
|
||||
<delete path="${outputPath}"/>
|
||||
<mkdir path="${outputPath}"/>
|
||||
</fs>
|
||||
<ok to="copy_entities"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<fork name="copy_entities">
|
||||
<path start="copy_relation"/>
|
||||
<path start="copy_organization"/>
|
||||
<path start="copy_projects"/>
|
||||
<path start="copy_datasources"/>
|
||||
</fork>
|
||||
|
||||
<action name="copy_relation">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/relation</arg>
|
||||
<arg>${nameNode}/${outputPath}/relation</arg>
|
||||
</distcp>
|
||||
<ok to="copy_wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_organization">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/organization</arg>
|
||||
<arg>${nameNode}/${outputPath}/organization</arg>
|
||||
</distcp>
|
||||
<ok to="copy_wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_projects">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/project</arg>
|
||||
<arg>${nameNode}/${outputPath}/project</arg>
|
||||
</distcp>
|
||||
<ok to="copy_wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_datasources">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/datasource</arg>
|
||||
<arg>${nameNode}/${outputPath}/datasource</arg>
|
||||
</distcp>
|
||||
<ok to="copy_wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="copy_wait" to="fork_exec_bulktag"/>
|
||||
|
||||
<fork name="fork_exec_bulktag">
|
||||
<path start="join_bulktag_publication"/>
|
||||
|
@ -130,7 +77,7 @@
|
|||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/bulktag/publication</arg>
|
||||
<arg>--pathMap</arg><arg>${pathMap}</arg>
|
||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
</spark>
|
||||
|
@ -157,7 +104,7 @@
|
|||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/bulktag/dataset</arg>
|
||||
<arg>--pathMap</arg><arg>${pathMap}</arg>
|
||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
</spark>
|
||||
|
@ -184,7 +131,7 @@
|
|||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/bulktag/otherresearchproduct</arg>
|
||||
<arg>--pathMap</arg><arg>${pathMap}</arg>
|
||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
</spark>
|
||||
|
@ -211,7 +158,7 @@
|
|||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/bulktag/software</arg>
|
||||
<arg>--pathMap</arg><arg>${pathMap}</arg>
|
||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
</spark>
|
||||
|
@ -239,7 +186,7 @@
|
|||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${outputPath}</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/eoscTag</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/bulktag</arg>
|
||||
</spark>
|
||||
<ok to="eosc_get_datasource_master"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -283,7 +230,7 @@
|
|||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${outputPath}/publication</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/eoscContextTag/publication</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/bulktag/publication</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--datasourceMapPath</arg><arg>${workingDir}/datasourcemaster</arg>
|
||||
</spark>
|
||||
|
@ -309,7 +256,7 @@
|
|||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${outputPath}/dataset</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/eoscContextTag/dataset</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/bulktag/dataset</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--datasourceMapPath</arg><arg>${workingDir}/datasourcemaster</arg>
|
||||
</spark>
|
||||
|
@ -334,7 +281,7 @@
|
|||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${outputPath}/software</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/eoscContextTag/software</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/bulktag/software</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--datasourceMapPath</arg><arg>${workingDir}/datasourcemaster</arg>
|
||||
</spark>
|
||||
|
@ -359,14 +306,24 @@
|
|||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${outputPath}/otherresearchproduct</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/eoscContextTag/otherresearchproduct</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/bulktag/otherresearchproduct</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--datasourceMapPath</arg><arg>${workingDir}/datasourcemaster</arg>
|
||||
</spark>
|
||||
<ok to="wait_eosc_context_tag"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<join name="wait_eosc_context_tag" to="End"/>
|
||||
<join name="wait_eosc_context_tag" to="reset_workingDir"/>
|
||||
|
||||
<action name="reset_workingDir">
|
||||
<fs>
|
||||
<delete path="${workingDir}"/>
|
||||
<mkdir path="${workingDir}"/>
|
||||
</fs>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
|
||||
</workflow-app>
|
|
@ -12,11 +12,6 @@
|
|||
<name>allowedtypes</name>
|
||||
<description>the allowed types</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>outputPath</name>
|
||||
<description>the output path</description>
|
||||
</property>
|
||||
|
||||
</parameters>
|
||||
|
||||
<global>
|
||||
|
@ -30,65 +25,13 @@
|
|||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="reset_outputpath"/>
|
||||
<start to="prepare_datasource_country_association"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="reset_outputpath">
|
||||
<fs>
|
||||
<delete path="${outputPath}"/>
|
||||
<mkdir path="${outputPath}"/>
|
||||
</fs>
|
||||
<ok to="copy_entities"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<fork name="copy_entities">
|
||||
<path start="copy_relation"/>
|
||||
<path start="copy_organization"/>
|
||||
<path start="copy_projects"/>
|
||||
<path start="copy_datasources"/>
|
||||
</fork>
|
||||
|
||||
<action name="copy_relation">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/relation</arg>
|
||||
<arg>${nameNode}/${outputPath}/relation</arg>
|
||||
</distcp>
|
||||
<ok to="copy_wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_organization">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/organization</arg>
|
||||
<arg>${nameNode}/${outputPath}/organization</arg>
|
||||
</distcp>
|
||||
<ok to="copy_wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_projects">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/project</arg>
|
||||
<arg>${nameNode}/${outputPath}/project</arg>
|
||||
</distcp>
|
||||
<ok to="copy_wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_datasources">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/datasource</arg>
|
||||
<arg>${nameNode}/${outputPath}/datasource</arg>
|
||||
</distcp>
|
||||
<ok to="copy_wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="copy_wait" to="prepare_datasource_country_association"/>
|
||||
|
||||
<action name="prepare_datasource_country_association">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
|
@ -110,20 +53,20 @@
|
|||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--whitelist</arg><arg>${whitelist}</arg>
|
||||
<arg>--allowedtypes</arg><arg>${allowedtypes}</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/country</arg>
|
||||
</spark>
|
||||
<ok to="fork_join_prepare_result_country"/>
|
||||
<ok to="fork_prepare_result_country"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<fork name="fork_join_prepare_result_country">
|
||||
<path start="join_prepareresult_publication"/>
|
||||
<path start="join_prepareresult_dataset"/>
|
||||
<path start="join_prepareresult_otherresearchproduct"/>
|
||||
<path start="join_prepareresult_software"/>
|
||||
<fork name="fork_prepare_result_country">
|
||||
<path start="prepareresult_publication"/>
|
||||
<path start="prepareresult_dataset"/>
|
||||
<path start="prepareresult_otherresearchproduct"/>
|
||||
<path start="prepareresult_software"/>
|
||||
</fork>
|
||||
|
||||
<action name="join_prepareresult_publication">
|
||||
<action name="prepareresult_publication">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
|
@ -144,16 +87,14 @@
|
|||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/publication</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/workingP</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/country</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||
</spark>
|
||||
<ok to="wait_prepare"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="join_prepareresult_dataset">
|
||||
<action name="prepareresult_dataset">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
|
@ -174,16 +115,14 @@
|
|||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/dataset</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/workingD</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/country</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||
</spark>
|
||||
<ok to="wait_prepare"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="join_prepareresult_otherresearchproduct">
|
||||
<action name="prepareresult_otherresearchproduct">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
|
@ -204,16 +143,14 @@
|
|||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/otherresearchproduct</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/workingO</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/country</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||
</spark>
|
||||
<ok to="wait_prepare"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="join_prepareresult_software">
|
||||
<action name="prepareresult_software">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
|
@ -234,10 +171,8 @@
|
|||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/software</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/workingS</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/country</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
|
||||
</spark>
|
||||
<ok to="wait_prepare"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -245,14 +180,14 @@
|
|||
|
||||
<join name="wait_prepare" to="fork_join_apply_country_propagation"/>
|
||||
|
||||
<fork name="fork_join_apply_country_propagation">
|
||||
<path start="join_propagation_publication"/>
|
||||
<path start="join_propagation_dataset"/>
|
||||
<path start="join_propagation_otherresearchproduct"/>
|
||||
<path start="join_propagation_software"/>
|
||||
<fork name="fork_apply_country_propagation">
|
||||
<path start="propagation_publication"/>
|
||||
<path start="propagation_dataset"/>
|
||||
<path start="propagation_otherresearchproduct"/>
|
||||
<path start="propagation_software"/>
|
||||
</fork>
|
||||
|
||||
<action name="join_propagation_publication">
|
||||
<action name="propagation_publication">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
|
@ -273,15 +208,15 @@
|
|||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/publication</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/country</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
|
||||
|
||||
</spark>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="join_propagation_dataset">
|
||||
<action name="propagation_dataset">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
|
@ -302,15 +237,15 @@
|
|||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/dataset</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/country</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
|
||||
|
||||
</spark>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="join_propagation_otherresearchproduct">
|
||||
<action name="propagation_otherresearchproduct">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
|
@ -331,15 +266,15 @@
|
|||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/otherresearchproduct</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/country</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
|
||||
|
||||
</spark>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="join_propagation_software">
|
||||
<action name="propagation_software">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
|
@ -360,16 +295,22 @@
|
|||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/software</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/country</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
|
||||
</spark>
|
||||
</spark>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="wait" to="End"/>
|
||||
|
||||
<join name="wait" to="reset_workingDir"/>
|
||||
<action name="reset_workingDir">
|
||||
<fs>
|
||||
<delete path="${workingDir}"/>
|
||||
<mkdir path="${workingDir}"/>
|
||||
</fs>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<end name="End"/>
|
||||
|
||||
</workflow-app>
|
|
@ -80,7 +80,37 @@
|
|||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="copy_wait" to="fork_prepare_assoc_step1"/>
|
||||
<join name="copy_wait" to="prepare_relations"/>
|
||||
|
||||
<action name="prepare_relations">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>ORCIDPropagation-PreparePhase0-SelectRELATIONS</name>
|
||||
<class>eu.dnetlib.dhp.orcidtoresultfromsemrel.PrepareResultOrcidAssociationStep0</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.dynamicAllocation.enabled=true
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
--conf spark.speculation=false
|
||||
--conf spark.hadoop.mapreduce.map.speculative=false
|
||||
--conf spark.hadoop.mapreduce.reduce.speculative=false
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/orcidprop/relationSubset</arg>
|
||||
<arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
|
||||
</spark>
|
||||
<ok to="fork_prepare_assoc_step1"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<fork name="fork_prepare_assoc_step1">
|
||||
<path start="join_prepare_publication"/>
|
||||
|
@ -112,10 +142,10 @@
|
|||
--conf spark.hadoop.mapreduce.reduce.speculative=false
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetOrcidAssoc</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/orcidprop</arg>
|
||||
<arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
|
||||
<arg>--allowedpids</arg><arg>${allowedpids}</arg>
|
||||
</spark>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -140,10 +170,10 @@
|
|||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetOrcidAssoc</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/orcidprop</arg>
|
||||
<arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
|
||||
<arg>--allowedpids</arg><arg>${allowedpids}</arg>
|
||||
</spark>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -168,10 +198,10 @@
|
|||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetOrcidAssoc</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/orcidprop</arg>
|
||||
<arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
|
||||
<arg>--allowedpids</arg><arg>${allowedpids}</arg>
|
||||
</spark>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -196,10 +226,10 @@
|
|||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetOrcidAssoc</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/orcidprop</arg>
|
||||
<arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
|
||||
<arg>--allowedpids</arg><arg>${allowedpids}</arg>
|
||||
</spark>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -225,8 +255,8 @@
|
|||
--conf spark.dynamicAllocation.enabled=true
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/preparedInfo/targetOrcidAssoc</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo/mergedOrcidAssoc</arg>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/orcidprop</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/orcidprop//mergedOrcidAssoc</arg>
|
||||
</spark>
|
||||
<ok to="fork-join-exec-propagation"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -261,9 +291,8 @@
|
|||
--conf spark.hadoop.mapreduce.reduce.speculative=false
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/preparedInfo/mergedOrcidAssoc</arg>
|
||||
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcidprop/mergedOrcidAssoc</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
|
||||
</spark>
|
||||
|
@ -292,9 +321,8 @@
|
|||
--conf spark.hadoop.mapreduce.map.speculative=false
|
||||
--conf spark.hadoop.mapreduce.reduce.speculative=false
|
||||
</spark-opts>
|
||||
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/preparedInfo/mergedOrcidAssoc</arg>
|
||||
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcidprop/mergedOrcidAssoc</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
|
||||
</spark>
|
||||
|
@ -323,9 +351,8 @@
|
|||
--conf spark.hadoop.mapreduce.map.speculative=false
|
||||
--conf spark.hadoop.mapreduce.reduce.speculative=false
|
||||
</spark-opts>
|
||||
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/preparedInfo/mergedOrcidAssoc</arg>
|
||||
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcidprop/mergedOrcidAssoc</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
|
||||
</spark>
|
||||
|
@ -354,9 +381,8 @@
|
|||
--conf spark.hadoop.mapreduce.map.speculative=false
|
||||
--conf spark.hadoop.mapreduce.reduce.speculative=false
|
||||
</spark-opts>
|
||||
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/preparedInfo/mergedOrcidAssoc</arg>
|
||||
<arg>--possibleUpdatesPath</arg><arg>${workingDir}/orcidprop/mergedOrcidAssoc</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
|
||||
</spark>
|
||||
|
@ -364,7 +390,16 @@
|
|||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="wait2" to="End"/>
|
||||
<join name="wait2" to="reset_workingDir"/>
|
||||
|
||||
<action name="reset_workingDir">
|
||||
<fs>
|
||||
<delete path="${workingDir}"/>
|
||||
<mkdir path="${workingDir}"/>
|
||||
</fs>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
|
|
@ -0,0 +1,94 @@
|
|||
<workflow-app name="project_to_result_propagation" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<description>the source path</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>allowedsemrels</name>
|
||||
<description>the allowed semantics </description>
|
||||
</property>
|
||||
|
||||
</parameters>
|
||||
|
||||
<global>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>${oozieActionShareLibForSpark2}</value>
|
||||
</property>
|
||||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="prepare_project_results_association"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
|
||||
<action name="prepare_project_results_association">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>PrepareProjectResultsAssociation</name>
|
||||
<class>eu.dnetlib.dhp.projecttoresult.PrepareProjectResultsAssociation</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
|
||||
<arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--potentialUpdatePath</arg><arg>${workingDir}/resultproject/preparedInfo/potentialUpdates</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/resultproject/preparedInfo/alreadyLinked</arg>
|
||||
</spark>
|
||||
<ok to="apply_propagation"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="apply_propagation">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>ProjectToResultPropagation</name>
|
||||
<class>eu.dnetlib.dhp.projecttoresult.SparkResultToProjectThroughSemRelJob</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.dynamicAllocation.enabled=true
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--outputPath</arg><arg>${sourcePath}/relation</arg>
|
||||
<arg>--potentialUpdatePath</arg><arg>${workingDir}/resultproject/preparedInfo/potentialUpdates</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/resultproject/preparedInfo/alreadyLinked</arg>
|
||||
</spark>
|
||||
<ok to="reset_workingDir"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<action name="reset_workingDir">
|
||||
<fs>
|
||||
<delete path="${workingDir}"/>
|
||||
<mkdir path="${workingDir}"/>
|
||||
</fs>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<end name="End"/>
|
||||
|
||||
</workflow-app>
|
|
@ -8,10 +8,7 @@
|
|||
<name>organizationtoresultcommunitymap</name>
|
||||
<description>organization community map</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>outputPath</name>
|
||||
<description>the output path</description>
|
||||
</property>
|
||||
|
||||
</parameters>
|
||||
|
||||
<global>
|
||||
|
@ -25,66 +22,12 @@
|
|||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="reset_outputpath"/>
|
||||
<start to="prepare_result_communitylist"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="reset_outputpath">
|
||||
<fs>
|
||||
<delete path="${outputPath}"/>
|
||||
<mkdir path="${outputPath}"/>
|
||||
</fs>
|
||||
<ok to="copy_entities"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<fork name="copy_entities">
|
||||
<path start="copy_relation"/>
|
||||
<path start="copy_organization"/>
|
||||
<path start="copy_projects"/>
|
||||
<path start="copy_datasources"/>
|
||||
</fork>
|
||||
|
||||
<action name="copy_relation">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/relation</arg>
|
||||
<arg>${nameNode}/${outputPath}/relation</arg>
|
||||
</distcp>
|
||||
<ok to="copy_wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_organization">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/organization</arg>
|
||||
<arg>${nameNode}/${outputPath}/organization</arg>
|
||||
</distcp>
|
||||
<ok to="copy_wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_projects">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/project</arg>
|
||||
<arg>${nameNode}/${outputPath}/project</arg>
|
||||
</distcp>
|
||||
<ok to="copy_wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_datasources">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/datasource</arg>
|
||||
<arg>${nameNode}/${outputPath}/datasource</arg>
|
||||
</distcp>
|
||||
<ok to="copy_wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="copy_wait" to="prepare_result_communitylist"/>
|
||||
|
||||
<action name="prepare_result_communitylist">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
|
@ -104,7 +47,7 @@
|
|||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/communityorganization/preparedInfo/resultCommunityList</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--organizationtoresultcommunitymap</arg><arg>${organizationtoresultcommunitymap}</arg>
|
||||
</spark>
|
||||
|
@ -137,12 +80,12 @@
|
|||
--conf spark.dynamicAllocation.enabled=true
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/communityorganization/preparedInfo/resultCommunityList</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/communityorganization/publication</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
|
||||
</spark>
|
||||
<ok to="wait2"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -166,12 +109,12 @@
|
|||
--conf spark.dynamicAllocation.enabled=true
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/communityorganization/preparedInfo/resultCommunityList</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/communityorganization/dataset</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
|
||||
</spark>
|
||||
<ok to="wait2"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -195,12 +138,12 @@
|
|||
--conf spark.dynamicAllocation.enabled=true
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/communityorganization/preparedInfo/resultCommunityList</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/communityorganization/otherresearchproduct</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
|
||||
</spark>
|
||||
<ok to="wait2"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -224,19 +167,27 @@
|
|||
--conf spark.dynamicAllocation.enabled=true
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/resultCommunityList</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/communityorganization/preparedInfo/resultCommunityList</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/communityorganization/software</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
|
||||
</spark>
|
||||
<ok to="wait2"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="wait2" to="End"/>
|
||||
<join name="wait2" to="reset_workingDir"/>
|
||||
|
||||
<action name="reset_workingDir">
|
||||
<fs>
|
||||
<delete path="${workingDir}"/>
|
||||
<mkdir path="${workingDir}"/>
|
||||
</fs>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<end name="End"/>
|
||||
|
||||
</workflow-app>
|
|
@ -18,73 +18,13 @@
|
|||
</property>
|
||||
</parameters>
|
||||
|
||||
<start to="reset_outputpath"/>
|
||||
<start to="fork_prepare_assoc_step1"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="reset_outputpath">
|
||||
<fs>
|
||||
<delete path="${outputPath}"/>
|
||||
<mkdir path="${outputPath}"/>
|
||||
</fs>
|
||||
<ok to="copy_entities"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<fork name="copy_entities">
|
||||
<path start="copy_relation"/>
|
||||
<path start="copy_organization"/>
|
||||
<path start="copy_projects"/>
|
||||
<path start="copy_datasources"/>
|
||||
</fork>
|
||||
|
||||
<action name="copy_relation">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<arg>${nameNode}/${sourcePath}/relation</arg>
|
||||
<arg>${nameNode}/${outputPath}/relation</arg>
|
||||
</distcp>
|
||||
<ok to="copy_wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_organization">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<arg>${nameNode}/${sourcePath}/organization</arg>
|
||||
<arg>${nameNode}/${outputPath}/organization</arg>
|
||||
</distcp>
|
||||
<ok to="copy_wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_projects">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<arg>${nameNode}/${sourcePath}/project</arg>
|
||||
<arg>${nameNode}/${outputPath}/project</arg>
|
||||
</distcp>
|
||||
<ok to="copy_wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_datasources">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<arg>${nameNode}/${sourcePath}/datasource</arg>
|
||||
<arg>${nameNode}/${outputPath}/datasource</arg>
|
||||
</distcp>
|
||||
<ok to="copy_wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="copy_wait" to="fork_prepare_assoc_step1"/>
|
||||
|
||||
<fork name="fork_prepare_assoc_step1">
|
||||
<path start="join_prepare_publication"/>
|
||||
|
@ -114,7 +54,7 @@
|
|||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetCommunityAssoc</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc</arg>
|
||||
<arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
|
||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
</spark>
|
||||
|
@ -143,7 +83,7 @@
|
|||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetCommunityAssoc</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc</arg>
|
||||
<arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
|
||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
</spark>
|
||||
|
@ -172,7 +112,7 @@
|
|||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetCommunityAssoc</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc</arg>
|
||||
<arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
|
||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
</spark>
|
||||
|
@ -201,7 +141,7 @@
|
|||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo/targetCommunityAssoc</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc</arg>
|
||||
<arg>--allowedsemrels</arg><arg>${allowedsemrels}</arg>
|
||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
</spark>
|
||||
|
@ -229,8 +169,8 @@
|
|||
--conf spark.dynamicAllocation.enabled=true
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/preparedInfo/targetCommunityAssoc</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo/mergedCommunityAssoc</arg>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc</arg>
|
||||
</spark>
|
||||
<ok to="fork-join-exec-propagation"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -261,12 +201,12 @@
|
|||
--conf spark.dynamicAllocation.enabled=true
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/mergedCommunityAssoc</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/communitysemrel/publication</arg>
|
||||
|
||||
</spark>
|
||||
<ok to="wait2"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -290,12 +230,12 @@
|
|||
--conf spark.dynamicAllocation.enabled=true
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/mergedCommunityAssoc</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/communitysemrel/dataset</arg>
|
||||
|
||||
</spark>
|
||||
<ok to="wait2"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -319,12 +259,12 @@
|
|||
--conf spark.dynamicAllocation.enabled=true
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/mergedCommunityAssoc</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/communitysemrel/otherresearchproduct</arg>
|
||||
|
||||
</spark>
|
||||
<ok to="wait2"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -348,19 +288,26 @@
|
|||
--conf spark.dynamicAllocation.enabled=true
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
</spark-opts>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo/mergedCommunityAssoc</arg>
|
||||
<arg>--preparedInfoPath</arg><arg>${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc</arg>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
|
||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/communitysemrel/software</arg>
|
||||
|
||||
</spark>
|
||||
<ok to="wait2"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="wait2" to="End"/>
|
||||
|
||||
<join name="wait2" to="reset_workingDir"/>
|
||||
<action name="reset_workingDir">
|
||||
<fs>
|
||||
<delete path="${workingDir}"/>
|
||||
<mkdir path="${workingDir}"/>
|
||||
</fs>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
<end name="End"/>
|
||||
|
||||
</workflow-app>
|
|
@ -21,105 +21,12 @@
|
|||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="reset_outputpath"/>
|
||||
<start to="prepare_result_organization_association"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="reset_outputpath">
|
||||
<fs>
|
||||
<delete path="${outputPath}"/>
|
||||
<mkdir path="${outputPath}"/>
|
||||
</fs>
|
||||
<ok to="copy_entities"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<fork name="copy_entities">
|
||||
<path start="copy_relation"/>
|
||||
<path start="copy_publication"/>
|
||||
<path start="copy_dataset"/>
|
||||
<path start="copy_orp"/>
|
||||
<path start="copy_software"/>
|
||||
<path start="copy_organization"/>
|
||||
<path start="copy_projects"/>
|
||||
<path start="copy_datasources"/>
|
||||
</fork>
|
||||
|
||||
<action name="copy_relation">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/relation</arg>
|
||||
<arg>${nameNode}/${outputPath}/relation</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_publication">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/publication</arg>
|
||||
<arg>${nameNode}/${outputPath}/publication</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_dataset">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/dataset</arg>
|
||||
<arg>${nameNode}/${outputPath}/dataset</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_orp">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/otherresearchproduct</arg>
|
||||
<arg>${nameNode}/${outputPath}/otherresearchproduct</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_software">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/software</arg>
|
||||
<arg>${nameNode}/${outputPath}/software</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_organization">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/organization</arg>
|
||||
<arg>${nameNode}/${outputPath}/organization</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_projects">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/project</arg>
|
||||
<arg>${nameNode}/${outputPath}/project</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="copy_datasources">
|
||||
<distcp xmlns="uri:oozie:distcp-action:0.2">
|
||||
<arg>${nameNode}/${sourcePath}/datasource</arg>
|
||||
<arg>${nameNode}/${outputPath}/datasource</arg>
|
||||
</distcp>
|
||||
<ok to="wait"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="wait" to="prepare_result_organization_association"/>
|
||||
|
||||
<action name="prepare_result_organization_association">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
|
@ -138,9 +45,7 @@
|
|||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||
<arg>--workingPath</arg><arg>${workingDir}/affiliationInstRepo</arg>
|
||||
<arg>--blacklist</arg><arg>${blacklist}</arg>
|
||||
</spark>
|
||||
<ok to="fork_join_apply_resulttoorganization_propagation"/>
|
||||
|
@ -174,8 +79,8 @@
|
|||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/relation</arg>
|
||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
</spark>
|
||||
|
@ -203,8 +108,8 @@
|
|||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/relation</arg>
|
||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
</spark>
|
||||
|
@ -232,8 +137,8 @@
|
|||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/relation</arg>
|
||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
</spark>
|
||||
|
@ -261,8 +166,8 @@
|
|||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
|
||||
<arg>--outputPath</arg><arg>${outputPath}/relation</arg>
|
||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization</arg>
|
||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
</spark>
|
|
@ -0,0 +1,97 @@
|
|||
<workflow-app name="affiliation_from_semrel_propagation" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>sourcePath</name>
|
||||
<description>the source path</description>
|
||||
</property>
|
||||
|
||||
</parameters>
|
||||
|
||||
<global>
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>${oozieActionShareLibForSpark2}</value>
|
||||
</property>
|
||||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="prepare_info"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<action name="prepare_info">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>PrepareResultOrganizationAssociation</name>
|
||||
<class>eu.dnetlib.dhp.resulttoorganizationfromsemrel.PrepareInfo</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--graphPath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--leavesPath</arg><arg>${workingDir}/affiliationSemanticRelation/preparedInfo/leavesPath</arg>
|
||||
<arg>--childParentPath</arg><arg>${workingDir}/affiliationSemanticRelation/preparedInfo/childParentPath</arg>
|
||||
<arg>--resultOrgPath</arg><arg>${workingDir}/affiliationSemanticRelation/preparedInfo/resultOrgPath</arg>
|
||||
<arg>--relationPath</arg><arg>${workingDir}/affiliationSemanticRelation/preparedInfo/relation</arg>
|
||||
</spark>
|
||||
<ok to="apply_resulttoorganization_propagation"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="apply_resulttoorganization_propagation">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>resultToOrganizationFromSemRel</name>
|
||||
<class>eu.dnetlib.dhp.resulttoorganizationfromsemrel.SparkResultToOrganizationFromSemRel</class>
|
||||
<jar>dhp-enrichment-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.dynamicAllocation.enabled=true
|
||||
--conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors}
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--relationPath</arg><arg>${workingDir}/affiliationSemanticRelation/preparedInfo/relation</arg>
|
||||
<arg>--outputPath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--leavesPath</arg><arg>${workingDir}/affiliationSemanticRelation/preparedInfo/leavesPath</arg>
|
||||
<arg>--childParentPath</arg><arg>${workingDir}/affiliationSemanticRelation/preparedInfo/childParentPath</arg>
|
||||
<arg>--resultOrgPath</arg><arg>${workingDir}/affiliationSemanticRelation/preparedInfo/resultOrgPath</arg>
|
||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||
<arg>--workingDir</arg><arg>${workingDir}/affiliationSemanticRelation/working</arg>
|
||||
<arg>--iterations</arg><arg>${iterations}</arg>
|
||||
</spark>
|
||||
<ok to="reset_workingDir"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="reset_workingDir">
|
||||
<fs>
|
||||
<delete path="${workingDir}"/>
|
||||
<mkdir path="${workingDir}"/>
|
||||
</fs>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<end name="End"/>
|
||||
|
||||
</workflow-app>
|
|
@ -27,16 +27,11 @@ import org.slf4j.LoggerFactory;
|
|||
*/
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.bulktag.eosc.DatasourceMaster;
|
||||
import eu.dnetlib.dhp.bulktag.eosc.SparkEoscBulkTag;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
||||
|
||||
//"50|475c1990cbb2::0fecfb874d9395aa69d2f4d7cd1acbea" has instance hostedby eosc
|
||||
//"50|475c1990cbb2::3185cd5d8a2b0a06bb9b23ef11748eb1" has instance hostedby eosc
|
||||
//"50|475c1990cbb2::449f28eefccf9f70c04ad70d61e041c7" has two instance one hostedby eosc
|
||||
//"50|475c1990cbb2::3894c94123e96df8a21249957cf160cb" has EoscTag
|
||||
|
||||
public class EOSCContextTaggingTest {
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
@ -78,6 +73,22 @@ public class EOSCContextTaggingTest {
|
|||
@Test
|
||||
void EoscContextTagTest() throws Exception {
|
||||
|
||||
//"50|475c1990cbb2::0fecfb874d9395aa69d2f4d7cd1acbea" has instance hostedby eosc (cris)
|
||||
//"50|475c1990cbb2::3185cd5d8a2b0a06bb9b23ef11748eb1" has instance hostedby eosc (zenodo)
|
||||
//"50|475c1990cbb2::449f28eefccf9f70c04ad70d61e041c7" has two instance one hostedby eosc (wrong compatibility)
|
||||
//"50|475c1990cbb2::3894c94123e96df8a21249957cf160cb" has EoscTag
|
||||
|
||||
spark
|
||||
.read()
|
||||
.textFile(getClass().getResource("/eu/dnetlib/dhp/bulktag/eosc/datasource/datasource_1").getPath())
|
||||
.map(
|
||||
(MapFunction<String, Datasource>) value -> OBJECT_MAPPER.readValue(value, Datasource.class),
|
||||
Encoders.bean(Datasource.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingDir.toString() + "/input/datasource");
|
||||
|
||||
spark
|
||||
.read()
|
||||
.textFile(getClass().getResource("/eu/dnetlib/dhp/bulktag/eosc/dataset/dataset_10.json").getPath())
|
||||
|
@ -94,17 +105,24 @@ public class EOSCContextTaggingTest {
|
|||
new String[] {
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
workingDir.toString() + "/input/dataset",
|
||||
"-workingPath", workingDir.toString() + "/working/dataset",
|
||||
workingDir.toString() + "/input/",
|
||||
"-workingPath", workingDir.toString() + "/working/",
|
||||
"-datasourceMapPath",
|
||||
getClass()
|
||||
.getResource("/eu/dnetlib/dhp/bulktag/eosc/datasourceMasterAssociation/datasourceMaster")
|
||||
.getPath(),
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset"
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-resultType", "dataset"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
2, sc
|
||||
.textFile(workingDir.toString() + "/working/datasource")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, DatasourceMaster.class))
|
||||
.count());
|
||||
|
||||
JavaRDD<Dataset> tmp = sc
|
||||
.textFile(workingDir.toString() + "/input/dataset")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
|
||||
|
@ -113,7 +131,7 @@ public class EOSCContextTaggingTest {
|
|||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
4,
|
||||
2,
|
||||
tmp
|
||||
.filter(
|
||||
s -> s.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
|
||||
|
@ -140,17 +158,17 @@ public class EOSCContextTaggingTest {
|
|||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
0,
|
||||
tmp
|
||||
.filter(
|
||||
d -> d.getId().equals("50|475c1990cbb2::3894c94123e96df8a21249957cf160cb")
|
||||
d -> d.getId().equals("50|475c1990cbb2::449f28eefccf9f70c04ad70d61e041c7")
|
||||
&&
|
||||
d.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
0,
|
||||
tmp
|
||||
.filter(
|
||||
d -> d.getId().equals("50|475c1990cbb2::3894c94123e96df8a21249957cf160cb")
|
||||
|
@ -159,4 +177,62 @@ public class EOSCContextTaggingTest {
|
|||
.count());
|
||||
}
|
||||
|
||||
@Test
|
||||
void EoscContextTagTestEmptyDatasource() throws Exception {
|
||||
|
||||
spark
|
||||
.read()
|
||||
.textFile(getClass().getResource("/eu/dnetlib/dhp/bulktag/eosc/dataset/dataset_10.json").getPath())
|
||||
.map(
|
||||
(MapFunction<String, Dataset>) value -> OBJECT_MAPPER.readValue(value, Dataset.class),
|
||||
Encoders.bean(Dataset.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingDir.toString() + "/input/dataset");
|
||||
|
||||
spark
|
||||
.read()
|
||||
.textFile(getClass().getResource("/eu/dnetlib/dhp/bulktag/eosc/datasource/datasource").getPath())
|
||||
.map(
|
||||
(MapFunction<String, Datasource>) value -> OBJECT_MAPPER.readValue(value, Datasource.class),
|
||||
Encoders.bean(Datasource.class))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.json(workingDir.toString() + "/input/datasource");
|
||||
|
||||
SparkEoscBulkTag
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
workingDir.toString() + "/input/",
|
||||
"-workingPath", workingDir.toString() + "/working/",
|
||||
"-datasourceMapPath",
|
||||
getClass()
|
||||
.getResource("/eu/dnetlib/dhp/bulktag/eosc/datasourceMasterAssociation/datasourceMaster")
|
||||
.getPath(),
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-resultType", "dataset"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Dataset> tmp = sc
|
||||
.textFile(workingDir.toString() + "/input/dataset")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
|
||||
|
||||
Assertions.assertEquals(10, tmp.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
0,
|
||||
tmp
|
||||
.filter(
|
||||
s -> s.getContext().stream().anyMatch(c -> c.getId().equals("eosc")))
|
||||
.count());
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,634 @@
|
|||
package eu.dnetlib.dhp.countrypropagation;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import eu.dnetlib.dhp.countrypropagation.pojo.DatasourceCountry;
|
||||
import eu.dnetlib.dhp.countrypropagation.pojo.ResultCountrySet;
|
||||
import eu.dnetlib.dhp.schema.oaf.Country;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.api.java.function.FlatMapFunction;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import scala.Tuple2;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 23/11/22
|
||||
*/
|
||||
public class CountryPropagationAllStepsTest {
|
||||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static SparkSession spark;
|
||||
|
||||
private static Path workingDir;
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files.createTempDirectory(DatasourceCountryPreparationTest.class.getSimpleName());
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.setAppName(DatasourceCountryPreparationTest.class.getSimpleName());
|
||||
|
||||
conf.setMaster("local[*]");
|
||||
conf.set("spark.driver.host", "localhost");
|
||||
conf.set("hive.metastore.local", "true");
|
||||
conf.set("spark.ui.enabled", "false");
|
||||
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(DatasourceCountryPreparationTest.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
public static void afterAll() throws IOException {
|
||||
FileUtils.deleteDirectory(workingDir.toFile());
|
||||
spark.stop();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void allStepsTest() throws Exception {
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/countrypropagation/graph")
|
||||
.getPath();
|
||||
|
||||
PrepareDatasourceCountryAssociation
|
||||
.main(
|
||||
new String[] {
|
||||
"--isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"--sourcePath", sourcePath,
|
||||
"--workingPath", workingDir.toString() + "/country",
|
||||
"--allowedtypes", "pubsrepository::institutional",
|
||||
"--whitelist",
|
||||
"10|openaire____::3795d6478e30e2c9f787d427ff160944;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0;10|eurocrisdris::fe4903425d9040f680d8610d9079ea14;10|openaire____::5b76240cc27a58c6f7ceef7d8c36660e;10|openaire____::172bbccecf8fca44ab6a6653e84cb92a;10|openaire____::149c6590f8a06b46314eed77bfca693f;10|eurocrisdris::a6026877c1a174d60f81fd71f62df1c1;10|openaire____::4692342f0992d91f9e705c26959f09e0;10|openaire____::8d529dbb05ec0284662b391789e8ae2a;10|openaire____::345c9d171ef3c5d706d08041d506428c;10|opendoar____::1c1d4df596d01da60385f0bb17a4a9e0;10|opendoar____::7a614fd06c325499f1680b9896beedeb;10|opendoar____::1ee3dfcd8a0645a25a35977997223d22;10|opendoar____::d296c101daa88a51f6ca8cfc1ac79b50;10|opendoar____::798ed7d4ee7138d49b8828958048130a;10|openaire____::c9d2209ecc4d45ba7b4ca7597acb88a2;10|eurocrisdris::c49e0fe4b9ba7b7fab717d1f0f0a674d;10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539;10|eurocrisdris::432ca599953ff50cd4eeffe22faf3e48"
|
||||
});
|
||||
|
||||
|
||||
sc.textFile(
|
||||
getClass()
|
||||
.getResource("/eu/dnetlib/dhp/countrypropagation/graph/publication")
|
||||
.getPath()).saveAsTextFile(workingDir.toString() + "/source/publication");
|
||||
|
||||
sc
|
||||
.textFile(
|
||||
getClass()
|
||||
.getResource("/eu/dnetlib/dhp/countrypropagation/graph/software")
|
||||
.getPath()).saveAsTextFile(workingDir.toString() + "/source/software");
|
||||
|
||||
|
||||
verifyDatasourceCountry();
|
||||
|
||||
PrepareResultCountrySet
|
||||
.main(
|
||||
new String[] {
|
||||
"--isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"--workingPath", workingDir.toString() + "/country",
|
||||
"--sourcePath", workingDir.toString() + "/source/publication",
|
||||
"--resultTableName", Publication.class.getCanonicalName()
|
||||
});
|
||||
|
||||
verifyResultCountrySet();
|
||||
|
||||
PrepareResultCountrySet
|
||||
.main(
|
||||
new String[] {
|
||||
"--isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"--workingPath", workingDir.toString() + "/country",
|
||||
"--sourcePath", workingDir.toString() + "/source/software",
|
||||
"--resultTableName", Software.class.getCanonicalName()
|
||||
});
|
||||
|
||||
SparkCountryPropagationJob
|
||||
.main(
|
||||
new String[] {
|
||||
"--isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"--sourcePath",workingDir.toString() + "/source/publication",
|
||||
"-resultTableName", Publication.class.getCanonicalName(),
|
||||
"-workingPath", workingDir.toString() +"/country"
|
||||
});
|
||||
|
||||
verifyPropagationPublication();
|
||||
|
||||
|
||||
|
||||
|
||||
SparkCountryPropagationJob
|
||||
.main(
|
||||
new String[] {
|
||||
"--isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"--sourcePath",workingDir.toString() + "/source/software",
|
||||
"-resultTableName", Software.class.getCanonicalName(),
|
||||
"-workingPath", workingDir.toString() + "/country"
|
||||
});
|
||||
|
||||
|
||||
verifyPropagationSoftware();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
void verifyDatasourceCountry(){
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<DatasourceCountry> tmp = sc
|
||||
.textFile(workingDir.toString() + "/country/datasourceCountry")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, DatasourceCountry.class));
|
||||
|
||||
Assertions.assertEquals(3, tmp.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, tmp
|
||||
.filter(
|
||||
dsc -> dsc
|
||||
.getDataSourceId()
|
||||
.equals("10|eurocrisdris::fe4903425d9040f680d8610d9079ea14"))
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, tmp
|
||||
.filter(
|
||||
dsc -> dsc
|
||||
.getDataSourceId()
|
||||
.equals("10|opendoar____::f0dd4a99fba6075a9494772b58f95280"))
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, tmp
|
||||
.filter(
|
||||
dsc -> dsc
|
||||
.getDataSourceId()
|
||||
.equals("10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539"))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"NL", tmp
|
||||
.filter(
|
||||
dsc -> dsc
|
||||
.getDataSourceId()
|
||||
.equals("10|eurocrisdris::fe4903425d9040f680d8610d9079ea14"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getCountry()
|
||||
.getClassid());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"Netherlands", tmp
|
||||
.filter(
|
||||
dsc -> dsc
|
||||
.getDataSourceId()
|
||||
.equals("10|eurocrisdris::fe4903425d9040f680d8610d9079ea14"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getCountry()
|
||||
.getClassname());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"IT", tmp
|
||||
.filter(
|
||||
dsc -> dsc
|
||||
.getDataSourceId()
|
||||
.equals("10|opendoar____::f0dd4a99fba6075a9494772b58f95280"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getCountry()
|
||||
.getClassid());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"Italy", tmp
|
||||
.filter(
|
||||
dsc -> dsc
|
||||
.getDataSourceId()
|
||||
.equals("10|opendoar____::f0dd4a99fba6075a9494772b58f95280"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getCountry()
|
||||
.getClassname());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"FR", tmp
|
||||
.filter(
|
||||
dsc -> dsc
|
||||
.getDataSourceId()
|
||||
.equals("10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getCountry()
|
||||
.getClassid());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"France", tmp
|
||||
.filter(
|
||||
dsc -> dsc
|
||||
.getDataSourceId()
|
||||
.equals("10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getCountry()
|
||||
.getClassname());
|
||||
|
||||
tmp.foreach(e -> System.out.println(OBJECT_MAPPER.writeValueAsString(e)));
|
||||
}
|
||||
|
||||
void verifyResultCountrySet(){
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<ResultCountrySet> tmp = sc
|
||||
.textFile(workingDir.toString() + "/country/preparedInfo/publication")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, ResultCountrySet.class));
|
||||
|
||||
Assertions.assertEquals(5, tmp.count());
|
||||
|
||||
ResultCountrySet rc = tmp
|
||||
.filter(r -> r.getResultId().equals("50|06cdd3ff4700::49ec404cee4e1452808aabeaffbd3072"))
|
||||
.collect()
|
||||
.get(0);
|
||||
Assertions.assertEquals(1, rc.getCountrySet().size());
|
||||
Assertions.assertEquals("NL", rc.getCountrySet().get(0).getClassid());
|
||||
Assertions.assertEquals("Netherlands", rc.getCountrySet().get(0).getClassname());
|
||||
|
||||
rc = tmp
|
||||
.filter(r -> r.getResultId().equals("50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b"))
|
||||
.collect()
|
||||
.get(0);
|
||||
Assertions.assertEquals(1, rc.getCountrySet().size());
|
||||
Assertions.assertEquals("NL", rc.getCountrySet().get(0).getClassid());
|
||||
Assertions.assertEquals("Netherlands", rc.getCountrySet().get(0).getClassname());
|
||||
|
||||
rc = tmp
|
||||
.filter(r -> r.getResultId().equals("50|355e65625b88::e7d48a470b13bda61f7ebe3513e20cb6"))
|
||||
.collect()
|
||||
.get(0);
|
||||
Assertions.assertEquals(2, rc.getCountrySet().size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
rc
|
||||
.getCountrySet()
|
||||
.stream()
|
||||
.anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy")));
|
||||
Assertions
|
||||
.assertTrue(
|
||||
rc
|
||||
.getCountrySet()
|
||||
.stream()
|
||||
.anyMatch(cs -> cs.getClassid().equals("FR") && cs.getClassname().equals("France")));
|
||||
|
||||
rc = tmp
|
||||
.filter(r -> r.getResultId().equals("50|355e65625b88::74009c567c81b4aa55c813db658734df"))
|
||||
.collect()
|
||||
.get(0);
|
||||
Assertions.assertEquals(2, rc.getCountrySet().size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
rc
|
||||
.getCountrySet()
|
||||
.stream()
|
||||
.anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy")));
|
||||
Assertions
|
||||
.assertTrue(
|
||||
rc
|
||||
.getCountrySet()
|
||||
.stream()
|
||||
.anyMatch(cs -> cs.getClassid().equals("NL") && cs.getClassname().equals("Netherlands")));
|
||||
|
||||
rc = tmp
|
||||
.filter(r -> r.getResultId().equals("50|355e65625b88::54a1c76f520bb2c8da27d12e42891088"))
|
||||
.collect()
|
||||
.get(0);
|
||||
Assertions.assertEquals(2, rc.getCountrySet().size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
rc
|
||||
.getCountrySet()
|
||||
.stream()
|
||||
.anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy")));
|
||||
Assertions
|
||||
.assertTrue(
|
||||
rc
|
||||
.getCountrySet()
|
||||
.stream()
|
||||
.anyMatch(cs -> cs.getClassid().equals("FR") && cs.getClassname().equals("France")));
|
||||
}
|
||||
|
||||
void verifyPropagationPublication(){
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Publication> tmp = sc
|
||||
.textFile(workingDir.toString() + "/country/publication")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Publication.class));
|
||||
|
||||
Assertions.assertEquals(12, tmp.count());
|
||||
|
||||
Assertions.assertEquals(5, tmp.filter(r -> r.getCountry().size() > 0).count());
|
||||
|
||||
tmp
|
||||
.foreach(
|
||||
r -> r.getCountry().stream().forEach(c -> Assertions.assertEquals("dnet:countries", c.getSchemeid())));
|
||||
tmp
|
||||
.foreach(
|
||||
r -> r
|
||||
.getCountry()
|
||||
.stream()
|
||||
.forEach(c -> Assertions.assertEquals("dnet:countries", c.getSchemename())));
|
||||
tmp
|
||||
.foreach(
|
||||
r -> r
|
||||
.getCountry()
|
||||
.stream()
|
||||
.forEach(c -> Assertions.assertFalse(c.getDataInfo().getDeletedbyinference())));
|
||||
tmp.foreach(r -> r.getCountry().stream().forEach(c -> Assertions.assertFalse(c.getDataInfo().getInvisible())));
|
||||
tmp.foreach(r -> r.getCountry().stream().forEach(c -> Assertions.assertTrue(c.getDataInfo().getInferred())));
|
||||
tmp
|
||||
.foreach(
|
||||
r -> r.getCountry().stream().forEach(c -> Assertions.assertEquals("0.85", c.getDataInfo().getTrust())));
|
||||
tmp
|
||||
.foreach(
|
||||
r -> r
|
||||
.getCountry()
|
||||
.stream()
|
||||
.forEach(c -> Assertions.assertEquals("propagation", c.getDataInfo().getInferenceprovenance())));
|
||||
tmp
|
||||
.foreach(
|
||||
r -> r
|
||||
.getCountry()
|
||||
.stream()
|
||||
.forEach(
|
||||
c -> Assertions
|
||||
.assertEquals("country:instrepos", c.getDataInfo().getProvenanceaction().getClassid())));
|
||||
tmp
|
||||
.foreach(
|
||||
r -> r
|
||||
.getCountry()
|
||||
.stream()
|
||||
.forEach(
|
||||
c -> Assertions
|
||||
.assertEquals(
|
||||
"dnet:provenanceActions", c.getDataInfo().getProvenanceaction().getSchemeid())));
|
||||
tmp
|
||||
.foreach(
|
||||
r -> r
|
||||
.getCountry()
|
||||
.stream()
|
||||
.forEach(
|
||||
c -> Assertions
|
||||
.assertEquals(
|
||||
"dnet:provenanceActions", c.getDataInfo().getProvenanceaction().getSchemename())));
|
||||
|
||||
List<Country> countries = tmp
|
||||
.filter(r -> r.getId().equals("50|06cdd3ff4700::49ec404cee4e1452808aabeaffbd3072"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getCountry();
|
||||
Assertions.assertEquals(1, countries.size());
|
||||
Assertions.assertEquals("NL", countries.get(0).getClassid());
|
||||
Assertions.assertEquals("Netherlands", countries.get(0).getClassname());
|
||||
|
||||
countries = tmp
|
||||
.filter(r -> r.getId().equals("50|07b5c0ccd4fe::e7f5459cc97865f2af6e3da964c1250b"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getCountry();
|
||||
Assertions.assertEquals(1, countries.size());
|
||||
Assertions.assertEquals("NL", countries.get(0).getClassid());
|
||||
Assertions.assertEquals("Netherlands", countries.get(0).getClassname());
|
||||
|
||||
countries = tmp
|
||||
.filter(r -> r.getId().equals("50|355e65625b88::e7d48a470b13bda61f7ebe3513e20cb6"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getCountry();
|
||||
Assertions.assertEquals(2, countries.size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
countries.stream().anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy")));
|
||||
Assertions
|
||||
.assertTrue(
|
||||
countries.stream().anyMatch(cs -> cs.getClassid().equals("FR") && cs.getClassname().equals("France")));
|
||||
|
||||
countries = tmp
|
||||
.filter(r -> r.getId().equals("50|355e65625b88::74009c567c81b4aa55c813db658734df"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getCountry();
|
||||
Assertions.assertEquals(2, countries.size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
countries.stream().anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy")));
|
||||
Assertions
|
||||
.assertTrue(
|
||||
countries
|
||||
.stream()
|
||||
.anyMatch(cs -> cs.getClassid().equals("NL") && cs.getClassname().equals("Netherlands")));
|
||||
|
||||
countries = tmp
|
||||
.filter(r -> r.getId().equals("50|355e65625b88::54a1c76f520bb2c8da27d12e42891088"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getCountry();
|
||||
Assertions.assertEquals(2, countries.size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
countries.stream().anyMatch(cs -> cs.getClassid().equals("IT") && cs.getClassname().equals("Italy")));
|
||||
Assertions
|
||||
.assertTrue(
|
||||
countries.stream().anyMatch(cs -> cs.getClassid().equals("FR") && cs.getClassname().equals("France")));
|
||||
}
|
||||
|
||||
void verifyPropagationSoftware(){
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
JavaRDD<Software> tmp = sc
|
||||
.textFile(workingDir.toString() + "/source/software")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
|
||||
|
||||
Assertions.assertEquals(10, tmp.count());
|
||||
|
||||
Dataset<Software> verificationDs = spark.createDataset(tmp.rdd(), Encoders.bean(Software.class));
|
||||
|
||||
Assertions.assertEquals(6, verificationDs.filter("size(country) > 0").count());
|
||||
Assertions.assertEquals(3, verificationDs.filter("size(country) = 1").count());
|
||||
Assertions.assertEquals(3, verificationDs.filter("size(country) = 2").count());
|
||||
Assertions.assertEquals(0, verificationDs.filter("size(country) > 2").count());
|
||||
|
||||
Dataset<String> countryExploded = verificationDs
|
||||
.flatMap(
|
||||
(FlatMapFunction<Software, Country>) row -> row.getCountry().iterator(), Encoders.bean(Country.class))
|
||||
.map((MapFunction<Country, String>) Qualifier::getClassid, Encoders.STRING());
|
||||
|
||||
Assertions.assertEquals(9, countryExploded.count());
|
||||
|
||||
Assertions.assertEquals(1, countryExploded.filter("value = 'FR'").count());
|
||||
Assertions.assertEquals(1, countryExploded.filter("value = 'TR'").count());
|
||||
Assertions.assertEquals(2, countryExploded.filter("value = 'IT'").count());
|
||||
Assertions.assertEquals(1, countryExploded.filter("value = 'US'").count());
|
||||
Assertions.assertEquals(1, countryExploded.filter("value = 'MX'").count());
|
||||
Assertions.assertEquals(1, countryExploded.filter("value = 'CH'").count());
|
||||
Assertions.assertEquals(2, countryExploded.filter("value = 'JP'").count());
|
||||
|
||||
Dataset<Tuple2<String, String>> countryExplodedWithCountryclassid = verificationDs
|
||||
.flatMap((FlatMapFunction<Software, Tuple2<String, String>>) row -> {
|
||||
List<Tuple2<String, String>> prova = new ArrayList<>();
|
||||
List<Country> countryList = row.getCountry();
|
||||
countryList
|
||||
.forEach(
|
||||
c -> prova
|
||||
.add(
|
||||
new Tuple2<>(
|
||||
row.getId(), c.getClassid())));
|
||||
return prova.iterator();
|
||||
}, Encoders.tuple(Encoders.STRING(), Encoders.STRING()));
|
||||
|
||||
Assertions.assertEquals(9, countryExplodedWithCountryclassid.count());
|
||||
|
||||
//countryExplodedWithCountryclassid.show(false);
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
countryExplodedWithCountryclassid
|
||||
.filter(
|
||||
"_1 = '50|od______1582::6e7a9b21a2feef45673890432af34244' and _2 = 'FR' ")
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
countryExplodedWithCountryclassid
|
||||
.filter(
|
||||
"_1 = '50|dedup_wf_001::40ea2f24181f6ae77b866ebcbffba523' and _2 = 'TR' ")
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
2,
|
||||
countryExplodedWithCountryclassid
|
||||
.filter(
|
||||
"_1 = '50|od______1106::2b7ca9726230be8e862be224fd463ac4' and (_2 = 'IT' or _2 = 'MX') ")
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
2,
|
||||
countryExplodedWithCountryclassid
|
||||
.filter(
|
||||
"_1 = '50|od_______935::46a0ad9964171c3dd13373f5427b9a1c' and (_2 = 'IT' or _2 = 'US') ")
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
countryExplodedWithCountryclassid
|
||||
.filter(
|
||||
"_1 = '50|dedup_wf_001::b67bc915603fc01e445f2b5888ba7218' and _2 = 'JP'")
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
2,
|
||||
countryExplodedWithCountryclassid
|
||||
.filter(
|
||||
"_1 = '50|od_______109::f375befa62a741e9250e55bcfa88f9a6' and (_2 = 'CH' or _2 = 'JP') ")
|
||||
.count());
|
||||
|
||||
Dataset<Tuple2<String, String>> countryExplodedWithCountryclassname = verificationDs
|
||||
.flatMap(
|
||||
(FlatMapFunction<Software, Tuple2<String, String>>) row -> {
|
||||
List<Tuple2<String, String>> prova = new ArrayList<>();
|
||||
List<Country> countryList = row.getCountry();
|
||||
countryList
|
||||
.forEach(
|
||||
c -> prova
|
||||
.add(
|
||||
new Tuple2<>(
|
||||
row.getId(),
|
||||
c.getClassname())));
|
||||
return prova.iterator();
|
||||
},
|
||||
Encoders.tuple(Encoders.STRING(), Encoders.STRING()));
|
||||
|
||||
//countryExplodedWithCountryclassname.show(false);
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
countryExplodedWithCountryclassname
|
||||
.filter(
|
||||
"_1 = '50|od______1582::6e7a9b21a2feef45673890432af34244' and _2 = 'France' ")
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
countryExplodedWithCountryclassname
|
||||
.filter(
|
||||
"_1 = '50|dedup_wf_001::40ea2f24181f6ae77b866ebcbffba523' and _2 = 'Turkey' ")
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
2,
|
||||
countryExplodedWithCountryclassname
|
||||
.filter(
|
||||
"_1 = '50|od______1106::2b7ca9726230be8e862be224fd463ac4' and (_2 = 'Italy' or _2 = 'Mexico') ")
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
2,
|
||||
countryExplodedWithCountryclassname
|
||||
.filter(
|
||||
"_1 = '50|od_______935::46a0ad9964171c3dd13373f5427b9a1c' and (_2 = 'Italy' or _2 = 'United States') ")
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
countryExplodedWithCountryclassname
|
||||
.filter(
|
||||
"_1 = '50|dedup_wf_001::b67bc915603fc01e445f2b5888ba7218' and _2 = 'Japan' ")
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
2,
|
||||
countryExplodedWithCountryclassname
|
||||
.filter(
|
||||
"_1 = '50|od_______109::f375befa62a741e9250e55bcfa88f9a6' and (_2 = 'Switzerland' or _2 = 'Japan') ")
|
||||
.count());
|
||||
|
||||
Dataset<Tuple2<String, String>> countryExplodedWithCountryProvenance = verificationDs
|
||||
.flatMap(
|
||||
(FlatMapFunction<Software, Tuple2<String, String>>) row -> {
|
||||
List<Tuple2<String, String>> prova = new ArrayList<>();
|
||||
List<Country> countryList = row.getCountry();
|
||||
countryList
|
||||
.forEach(
|
||||
c -> prova
|
||||
.add(
|
||||
new Tuple2<>(
|
||||
row.getId(),
|
||||
c
|
||||
.getDataInfo()
|
||||
.getInferenceprovenance())));
|
||||
return prova.iterator();
|
||||
},
|
||||
Encoders.tuple(Encoders.STRING(), Encoders.STRING()));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
7, countryExplodedWithCountryProvenance.filter("_2 = 'propagation'").count());
|
||||
}
|
||||
}
|
|
@ -7,6 +7,7 @@ import java.nio.file.Path;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.antlr.v4.runtime.misc.Utils;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
|
@ -69,26 +70,35 @@ public class CountryPropagationJobTest {
|
|||
|
||||
@Test
|
||||
void testCountryPropagationSoftware() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/countrypropagation/graph/software")
|
||||
.getPath();
|
||||
final String preparedInfoPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/countrypropagation/preparedInfo/software")
|
||||
.getPath();
|
||||
SparkCountryPropagationJob
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
sc
|
||||
.textFile(
|
||||
getClass()
|
||||
.getResource("/eu/dnetlib/dhp/countrypropagation/graph/software")
|
||||
.getPath()).saveAsTextFile(workingDir.toString() + "/source/software");
|
||||
|
||||
|
||||
sc
|
||||
.textFile(
|
||||
getClass()
|
||||
.getResource("/eu/dnetlib/dhp/countrypropagation/preparedInfo/software")
|
||||
.getPath()).saveAsTextFile(workingDir.toString() + "/preparedInfo/software");
|
||||
|
||||
SparkCountryPropagationJob
|
||||
.main(
|
||||
new String[] {
|
||||
"--isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"--sourcePath", sourcePath,
|
||||
"--sourcePath",workingDir.toString() + "/source/software",
|
||||
"-resultTableName", Software.class.getCanonicalName(),
|
||||
"-outputPath", workingDir.toString() + "/software",
|
||||
"-preparedInfoPath", preparedInfoPath
|
||||
"-workingPath", workingDir.toString(),
|
||||
"-resultType", "software"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
|
||||
JavaRDD<Software> tmp = sc
|
||||
.textFile(workingDir.toString() + "/software")
|
||||
.textFile(workingDir.toString() + "/source/software")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
|
||||
|
||||
Assertions.assertEquals(10, tmp.count());
|
||||
|
@ -130,7 +140,7 @@ public class CountryPropagationJobTest {
|
|||
|
||||
Assertions.assertEquals(9, countryExplodedWithCountryclassid.count());
|
||||
|
||||
countryExplodedWithCountryclassid.show(false);
|
||||
//countryExplodedWithCountryclassid.show(false);
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
|
@ -190,7 +200,7 @@ public class CountryPropagationJobTest {
|
|||
},
|
||||
Encoders.tuple(Encoders.STRING(), Encoders.STRING()));
|
||||
|
||||
countryExplodedWithCountryclassname.show(false);
|
||||
//countryExplodedWithCountryclassname.show(false);
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
|
@ -259,23 +269,31 @@ public class CountryPropagationJobTest {
|
|||
|
||||
@Test
|
||||
void testCountryPropagationPublication() throws Exception {
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/countrypropagation/graph/publication")
|
||||
.getPath();
|
||||
final String preparedInfoPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/countrypropagation/preparedInfo/publication")
|
||||
.getPath();
|
||||
SparkCountryPropagationJob
|
||||
.main(
|
||||
new String[] {
|
||||
"--isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"--sourcePath", sourcePath,
|
||||
"-resultTableName", Publication.class.getCanonicalName(),
|
||||
"-outputPath", workingDir.toString() + "/publication",
|
||||
"-preparedInfoPath", preparedInfoPath
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
sc
|
||||
.textFile(
|
||||
getClass()
|
||||
.getResource("/eu/dnetlib/dhp/countrypropagation/graph/publication")
|
||||
.getPath()).saveAsTextFile(workingDir.toString() + "/source/publication");
|
||||
|
||||
|
||||
sc
|
||||
.textFile(
|
||||
getClass()
|
||||
.getResource("/eu/dnetlib/dhp/countrypropagation/preparedInfo/publication")
|
||||
.getPath()).saveAsTextFile(workingDir.toString() + "/preparedInfo/publication");
|
||||
|
||||
SparkCountryPropagationJob
|
||||
.main(
|
||||
new String[] {
|
||||
"--isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"--sourcePath",workingDir.toString() + "/source/publication",
|
||||
"-resultTableName", Publication.class.getCanonicalName(),
|
||||
"-workingPath", workingDir.toString(),
|
||||
"-resultType", "publication"
|
||||
});
|
||||
|
||||
|
||||
|
||||
JavaRDD<Publication> tmp = sc
|
||||
.textFile(workingDir.toString() + "/publication")
|
||||
|
|
|
@ -5,6 +5,7 @@ import java.io.IOException;
|
|||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import eu.dnetlib.dhp.countrypropagation.pojo.DatasourceCountry;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
|
@ -63,7 +64,7 @@ public class DatasourceCountryPreparationTest {
|
|||
new String[] {
|
||||
"--isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"--sourcePath", sourcePath,
|
||||
"--outputPath", workingDir.toString() + "/datasourceCountry",
|
||||
"--workingPath", workingDir.toString() + "/country",
|
||||
"--allowedtypes", "pubsrepository::institutional",
|
||||
"--whitelist",
|
||||
"10|openaire____::3795d6478e30e2c9f787d427ff160944;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0;10|eurocrisdris::fe4903425d9040f680d8610d9079ea14;10|openaire____::5b76240cc27a58c6f7ceef7d8c36660e;10|openaire____::172bbccecf8fca44ab6a6653e84cb92a;10|openaire____::149c6590f8a06b46314eed77bfca693f;10|eurocrisdris::a6026877c1a174d60f81fd71f62df1c1;10|openaire____::4692342f0992d91f9e705c26959f09e0;10|openaire____::8d529dbb05ec0284662b391789e8ae2a;10|openaire____::345c9d171ef3c5d706d08041d506428c;10|opendoar____::1c1d4df596d01da60385f0bb17a4a9e0;10|opendoar____::7a614fd06c325499f1680b9896beedeb;10|opendoar____::1ee3dfcd8a0645a25a35977997223d22;10|opendoar____::d296c101daa88a51f6ca8cfc1ac79b50;10|opendoar____::798ed7d4ee7138d49b8828958048130a;10|openaire____::c9d2209ecc4d45ba7b4ca7597acb88a2;10|eurocrisdris::c49e0fe4b9ba7b7fab717d1f0f0a674d;10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539;10|eurocrisdris::432ca599953ff50cd4eeffe22faf3e48"
|
||||
|
@ -72,7 +73,7 @@ public class DatasourceCountryPreparationTest {
|
|||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<DatasourceCountry> tmp = sc
|
||||
.textFile(workingDir.toString() + "/datasourceCountry")
|
||||
.textFile(workingDir.toString() + "/country/datasourceCountry")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, DatasourceCountry.class));
|
||||
|
||||
Assertions.assertEquals(3, tmp.count());
|
||||
|
|
|
@ -1,12 +1,11 @@
|
|||
|
||||
package eu.dnetlib.dhp.countrypropagation;
|
||||
|
||||
import static eu.dnetlib.dhp.PropagationConstant.isSparkSessionManaged;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import eu.dnetlib.dhp.countrypropagation.pojo.ResultCountrySet;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
|
@ -61,25 +60,25 @@ public class ResultCountryPreparationTest {
|
|||
.getResource("/eu/dnetlib/dhp/countrypropagation/graph/publication")
|
||||
.getPath();
|
||||
|
||||
final String preparedInfoPath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/countrypropagation/datasourcecountry")
|
||||
.getPath();
|
||||
|
||||
PrepareResultCountrySet
|
||||
.main(
|
||||
new String[] {
|
||||
"--isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"--workingPath", workingDir.toString() + "/working",
|
||||
"--sourcePath", sourcePath,
|
||||
"--outputPath", workingDir.toString() + "/resultCountry",
|
||||
"--preparedInfoPath", preparedInfoPath,
|
||||
"--resultTableName", Publication.class.getCanonicalName()
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
sc.textFile(getClass()
|
||||
.getResource("/eu/dnetlib/dhp/countrypropagation/datasourcecountry")
|
||||
.getPath()).saveAsTextFile(workingDir+"/country/datasourceCountry"); ;
|
||||
|
||||
PrepareResultCountrySet
|
||||
.main(
|
||||
new String[] {
|
||||
"--isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"--workingPath", workingDir.toString() + "/country",
|
||||
"--sourcePath", sourcePath,
|
||||
"--resultTableName", Publication.class.getCanonicalName()
|
||||
});
|
||||
|
||||
|
||||
|
||||
JavaRDD<ResultCountrySet> tmp = sc
|
||||
.textFile(workingDir.toString() + "/resultCountry")
|
||||
.textFile(workingDir.toString() + "/country/preparedInfo/publication")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, ResultCountrySet.class));
|
||||
|
||||
Assertions.assertEquals(5, tmp.count());
|
||||
|
|
|
@ -80,7 +80,6 @@ public class OrcidPropagationJobTest {
|
|||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-hive_metastore_uris", "",
|
||||
"-saveGraph", "true",
|
||||
"-resultTableName", Dataset.class.getCanonicalName(),
|
||||
"-outputPath", workingDir.toString() + "/dataset",
|
||||
"-possibleUpdatesPath", possibleUpdatesPath
|
||||
|
@ -125,8 +124,6 @@ public class OrcidPropagationJobTest {
|
|||
.getPath(),
|
||||
"-hive_metastore_uris",
|
||||
"",
|
||||
"-saveGraph",
|
||||
"true",
|
||||
"-resultTableName",
|
||||
"eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath",
|
||||
|
@ -193,8 +190,6 @@ public class OrcidPropagationJobTest {
|
|||
.getPath(),
|
||||
"-hive_metastore_uris",
|
||||
"",
|
||||
"-saveGraph",
|
||||
"true",
|
||||
"-resultTableName",
|
||||
"eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath",
|
||||
|
|
|
@ -0,0 +1,207 @@
|
|||
|
||||
package eu.dnetlib.dhp.orcidtoresultfromsemrel;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.neethi.Assertion;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.gson.Gson;
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
||||
public class PrepareStep1Test {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(PrepareStep1Test.class);
|
||||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static SparkSession spark;
|
||||
|
||||
private static Path workingDir;
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files.createTempDirectory(PrepareStep1Test.class.getSimpleName());
|
||||
log.info("using work dir {}", workingDir);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.setAppName(PrepareStep1Test.class.getSimpleName());
|
||||
|
||||
conf.setMaster("local[*]");
|
||||
conf.set("spark.driver.host", "localhost");
|
||||
conf.set("spark.ui.enabled", "false");
|
||||
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(PrepareStep1Test.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
public static void afterAll() throws IOException {
|
||||
FileUtils.deleteDirectory(workingDir.toFile());
|
||||
spark.stop();
|
||||
}
|
||||
|
||||
@Test
|
||||
void noMatchTest() throws Exception {
|
||||
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/orcidtoresultfromsemrel/preparestep1")
|
||||
.getPath();
|
||||
|
||||
PrepareResultOrcidAssociationStep1
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-resultTableName", Dataset.class.getCanonicalName(),
|
||||
"-outputPath", workingDir.toString() + "/preparedInfo",
|
||||
"-allowedsemrels", "IsSupplementedBy;IsSupplementTo",
|
||||
"-allowedpids", "orcid;orcid_pending"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<ResultOrcidList> tmp = sc
|
||||
.textFile(workingDir.toString() + "/preparedInfo/dataset")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, ResultOrcidList.class));
|
||||
|
||||
Assertions.assertEquals(0, tmp.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
7, sc
|
||||
.textFile(workingDir.toString() + "/preparedInfo/relationSubset")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
0, sc
|
||||
.textFile(workingDir.toString() + "/preparedInfo/resultSubset")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class))
|
||||
.count());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void matchTest() throws Exception {
|
||||
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/orcidtoresultfromsemrel/preparestep1")
|
||||
.getPath();
|
||||
|
||||
PrepareResultOrcidAssociationStep1
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-resultTableName", Publication.class.getCanonicalName(),
|
||||
"-outputPath", workingDir.toString() + "/preparedInfo",
|
||||
"-allowedsemrels", "IsSupplementedBy;IsSupplementTo",
|
||||
"-allowedpids", "orcid;orcid_pending"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<ResultOrcidList> tmp = sc
|
||||
.textFile(workingDir.toString() + "/preparedInfo/publication")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, ResultOrcidList.class));
|
||||
|
||||
Assertions.assertEquals(1, tmp.count());
|
||||
|
||||
tmp.foreach(e -> System.out.println(OBJECT_MAPPER.writeValueAsString(e)));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, tmp
|
||||
.filter(rol -> rol.getResultId().equals("50|475c1990cbb2::46b9f15a3e887ccb154a696c4e7e4217"))
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, tmp
|
||||
.filter(rol -> rol.getResultId().equals("50|475c1990cbb2::46b9f15a3e887ccb154a696c4e7e4217"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getAuthorList()
|
||||
.size());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"0000-0002-5001-6911",
|
||||
tmp
|
||||
.filter(rol -> rol.getResultId().equals("50|475c1990cbb2::46b9f15a3e887ccb154a696c4e7e4217"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getAuthorList()
|
||||
.get(0)
|
||||
.getOrcid());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"Barbarić-Mikočević, Željka",
|
||||
tmp
|
||||
.filter(rol -> rol.getResultId().equals("50|475c1990cbb2::46b9f15a3e887ccb154a696c4e7e4217"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getAuthorList()
|
||||
.get(0)
|
||||
.getFullname());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"Željka",
|
||||
tmp
|
||||
.filter(rol -> rol.getResultId().equals("50|475c1990cbb2::46b9f15a3e887ccb154a696c4e7e4217"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getAuthorList()
|
||||
.get(0)
|
||||
.getName());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"Barbarić-Mikočević",
|
||||
tmp
|
||||
.filter(rol -> rol.getResultId().equals("50|475c1990cbb2::46b9f15a3e887ccb154a696c4e7e4217"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getAuthorList()
|
||||
.get(0)
|
||||
.getSurname());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
7, sc
|
||||
.textFile(workingDir.toString() + "/preparedInfo/relationSubset")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, sc
|
||||
.textFile(workingDir.toString() + "/preparedInfo/resultSubset")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Publication.class))
|
||||
.count());
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,222 @@
|
|||
|
||||
package eu.dnetlib.dhp.orcidtoresultfromsemrel;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.api.java.function.Function;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
||||
public class PrepareStep2Test {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(PrepareStep2Test.class);
|
||||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static SparkSession spark;
|
||||
|
||||
private static Path workingDir;
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files.createTempDirectory(PrepareStep2Test.class.getSimpleName());
|
||||
log.info("using work dir {}", workingDir);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.setAppName(PrepareStep2Test.class.getSimpleName());
|
||||
|
||||
conf.setMaster("local[*]");
|
||||
conf.set("spark.driver.host", "localhost");
|
||||
conf.set("spark.ui.enabled", "false");
|
||||
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(PrepareStep2Test.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
public static void afterAll() throws IOException {
|
||||
FileUtils.deleteDirectory(workingDir.toFile());
|
||||
spark.stop();
|
||||
}
|
||||
|
||||
@Test
|
||||
void testMatch() throws Exception {
|
||||
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/orcidtoresultfromsemrel/preparedInfo/resultSubset")
|
||||
.getPath();
|
||||
|
||||
PrepareResultOrcidAssociationStep2
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-outputPath", workingDir.toString() + "/preparedInfo/mergedOrcidAssoc"
|
||||
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<ResultOrcidList> tmp = sc
|
||||
.textFile(workingDir.toString() + "/preparedInfo/mergedOrcidAssoc")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, ResultOrcidList.class));
|
||||
|
||||
Assertions.assertEquals(1, tmp.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
tmp
|
||||
.filter(rol -> rol.getResultId().equals("50|475c1990cbb2::46b9f15a3e887ccb154a696c4e7e4217"))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
2, tmp
|
||||
.filter(rol -> rol.getResultId().equals("50|475c1990cbb2::46b9f15a3e887ccb154a696c4e7e4217"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getAuthorList()
|
||||
.size());
|
||||
|
||||
Assertions
|
||||
.assertTrue(
|
||||
tmp
|
||||
.filter(rol -> rol.getResultId().equals("50|475c1990cbb2::46b9f15a3e887ccb154a696c4e7e4217"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getAuthorList()
|
||||
.stream()
|
||||
.anyMatch(aa -> aa.getOrcid().equals("0000-0002-1234-5678")));
|
||||
Assertions
|
||||
.assertTrue(
|
||||
tmp
|
||||
.filter(rol -> rol.getResultId().equals("50|475c1990cbb2::46b9f15a3e887ccb154a696c4e7e4217"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getAuthorList()
|
||||
.stream()
|
||||
.anyMatch(aa -> aa.getOrcid().equals("0000-0002-5001-6911")));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void matchTest() throws Exception {
|
||||
|
||||
final String sourcePath = getClass()
|
||||
.getResource("/eu/dnetlib/dhp/orcidtoresultfromsemrel/preparestep1")
|
||||
.getPath();
|
||||
|
||||
PrepareResultOrcidAssociationStep1
|
||||
.main(
|
||||
new String[] {
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath", sourcePath,
|
||||
"-resultTableName", Publication.class.getCanonicalName(),
|
||||
"-outputPath", workingDir.toString() + "/preparedInfo",
|
||||
"-allowedsemrels", "IsSupplementedBy;IsSupplementTo",
|
||||
"-allowedpids", "orcid;orcid_pending"
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<ResultOrcidList> tmp = sc
|
||||
.textFile(workingDir.toString() + "/preparedInfo/publication")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, ResultOrcidList.class));
|
||||
|
||||
Assertions.assertEquals(1, tmp.count());
|
||||
|
||||
tmp.foreach(e -> System.out.println(OBJECT_MAPPER.writeValueAsString(e)));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, tmp
|
||||
.filter(rol -> rol.getResultId().equals("50|475c1990cbb2::46b9f15a3e887ccb154a696c4e7e4217"))
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, tmp
|
||||
.filter(rol -> rol.getResultId().equals("50|475c1990cbb2::46b9f15a3e887ccb154a696c4e7e4217"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getAuthorList()
|
||||
.size());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"0000-0002-5001-6911",
|
||||
tmp
|
||||
.filter(rol -> rol.getResultId().equals("50|475c1990cbb2::46b9f15a3e887ccb154a696c4e7e4217"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getAuthorList()
|
||||
.get(0)
|
||||
.getOrcid());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"Barbarić-Mikočević, Željka",
|
||||
tmp
|
||||
.filter(rol -> rol.getResultId().equals("50|475c1990cbb2::46b9f15a3e887ccb154a696c4e7e4217"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getAuthorList()
|
||||
.get(0)
|
||||
.getFullname());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"Željka",
|
||||
tmp
|
||||
.filter(rol -> rol.getResultId().equals("50|475c1990cbb2::46b9f15a3e887ccb154a696c4e7e4217"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getAuthorList()
|
||||
.get(0)
|
||||
.getName());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
"Barbarić-Mikočević",
|
||||
tmp
|
||||
.filter(rol -> rol.getResultId().equals("50|475c1990cbb2::46b9f15a3e887ccb154a696c4e7e4217"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getAuthorList()
|
||||
.get(0)
|
||||
.getSurname());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
7, sc
|
||||
.textFile(workingDir.toString() + "/preparedInfo/relationSubset")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, sc
|
||||
.textFile(workingDir.toString() + "/preparedInfo/resultSubset")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Publication.class))
|
||||
.count());
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -33,32 +33,32 @@ public class ProjectPropagationJobTest {
|
|||
private static SparkSession spark;
|
||||
|
||||
private static Path workingDir;
|
||||
private static final SparkConf conf = new SparkConf();
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files.createTempDirectory(ProjectPropagationJobTest.class.getSimpleName());
|
||||
|
||||
log.info("using work dir {}", workingDir);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
|
||||
conf.setAppName(ProjectPropagationJobTest.class.getSimpleName());
|
||||
|
||||
conf.setMaster("local[*]");
|
||||
conf.set("spark.driver.host", "localhost");
|
||||
conf.set("hive.metastore.local", "true");
|
||||
conf.set("spark.ui.enabled", "false");
|
||||
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||
|
||||
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(ProjectPropagationJobTest.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
.builder()
|
||||
.appName(ProjectPropagationJobTest.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
public static void afterAll() throws IOException {
|
||||
FileUtils.deleteDirectory(workingDir.toFile());
|
||||
|
||||
spark.stop();
|
||||
}
|
||||
|
||||
|
@ -71,6 +71,7 @@ public class ProjectPropagationJobTest {
|
|||
@Test
|
||||
void NoUpdateTest() throws Exception {
|
||||
|
||||
workingDir = Files.createTempDirectory(ProjectPropagationJobTest.class.getSimpleName());
|
||||
final String potentialUpdateDate = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/noupdates/potentialUpdates")
|
||||
|
@ -82,10 +83,10 @@ public class ProjectPropagationJobTest {
|
|||
SparkResultToProjectThroughSemRelJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-hive_metastore_uris", "",
|
||||
"-saveGraph", "true",
|
||||
|
||||
"-outputPath", workingDir.toString() + "/relation",
|
||||
"-potentialUpdatePath", potentialUpdateDate,
|
||||
"-alreadyLinkedPath", alreadyLinkedPath,
|
||||
|
@ -98,6 +99,10 @@ public class ProjectPropagationJobTest {
|
|||
.map(item -> OBJECT_MAPPER.readValue(item, Relation.class));
|
||||
|
||||
Assertions.assertEquals(0, tmp.count());
|
||||
|
||||
FileUtils.deleteDirectory(workingDir.toFile());
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -107,6 +112,12 @@ public class ProjectPropagationJobTest {
|
|||
*/
|
||||
@Test
|
||||
void UpdateTenTest() throws Exception {
|
||||
workingDir = Files.createTempDirectory(ProjectPropagationJobTest.class.getSimpleName());
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(ProjectPropagationJobTest.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
final String potentialUpdatePath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates")
|
||||
|
@ -118,10 +129,10 @@ public class ProjectPropagationJobTest {
|
|||
SparkResultToProjectThroughSemRelJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-hive_metastore_uris", "",
|
||||
"-saveGraph", "true",
|
||||
|
||||
"-outputPath", workingDir.toString() + "/relation",
|
||||
"-potentialUpdatePath", potentialUpdatePath,
|
||||
"-alreadyLinkedPath", alreadyLinkedPath,
|
||||
|
@ -169,6 +180,9 @@ public class ProjectPropagationJobTest {
|
|||
.sql(
|
||||
"Select * from temporary where datainfo.inferenceprovenance = 'propagation'")
|
||||
.count());
|
||||
|
||||
FileUtils.deleteDirectory(workingDir.toFile());
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -179,6 +193,12 @@ public class ProjectPropagationJobTest {
|
|||
*/
|
||||
@Test
|
||||
void UpdateMixTest() throws Exception {
|
||||
workingDir = Files.createTempDirectory(ProjectPropagationJobTest.class.getSimpleName());
|
||||
spark = SparkSession
|
||||
.builder()
|
||||
.appName(ProjectPropagationJobTest.class.getSimpleName())
|
||||
.config(conf)
|
||||
.getOrCreate();
|
||||
final String potentialUpdatepath = getClass()
|
||||
.getResource(
|
||||
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates")
|
||||
|
@ -190,10 +210,10 @@ public class ProjectPropagationJobTest {
|
|||
SparkResultToProjectThroughSemRelJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-hive_metastore_uris", "",
|
||||
"-saveGraph", "true",
|
||||
|
||||
"-outputPath", workingDir.toString() + "/relation",
|
||||
"-potentialUpdatePath", potentialUpdatepath,
|
||||
"-alreadyLinkedPath", alreadyLinkedPath,
|
||||
|
@ -244,5 +264,7 @@ public class ProjectPropagationJobTest {
|
|||
.sql(
|
||||
"Select * from temporary where datainfo.inferenceprovenance = 'propagation'")
|
||||
.count());
|
||||
|
||||
FileUtils.deleteDirectory(workingDir.toFile());
|
||||
}
|
||||
}
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
|||
{"resultId":"50|475c1990cbb2::46b9f15a3e887ccb154a696c4e7e4217","authorList":[{"name":"Željka","surname":"Barbarić-Mikočević","fullname":"Barbarić-Mikočević, Željka","orcid":"0000-0002-5001-6911"}]}
|
|
@ -0,0 +1 @@
|
|||
{"resultId":"50|475c1990cbb2::46b9f15a3e887ccb154a696c4e7e4217","authorList":[{"name":"Vesna","surname":"Džimbeg-Malčić","fullname":"Džimbeg-Malčić, Vesna","orcid":"0000-0002-1234-5678"}]}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,18 @@
|
|||
{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"lastupdatetimestamp":1649252022977,"properties":[],"relClass":"isSupplementedBy","relType":"datasourceOrganization","source":"50|57a035e5b1ae::0637d444355058eb76ab6d7a842aa8b4","subRelType":"provision","target":"50|475c1990cbb2::02d3c300ac2d07135a6208159c512f62","validated":false}
|
||||
{"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"lastupdatetimestamp":1649252022977,"properties":[],"relClass":"isSupplementedBy","relType":"datasourceOrganization","source":"50|57a035e5b1ae::01894f77220771428abaecbfa2bcc8f7","subRelType":"provision","target":"50|475c1990cbb2::46b9f15a3e887ccb154a696c4e7e4217","validated":false}
|
||||
{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"lastupdatetimestamp":1649252022977,"properties":[],"relClass":"isSupplementTo","relType":"projectOrganization","source":"50|475c1990cbb2::02d3c300ac2d07135a6208159c512f62","subRelType":"participation","target":"50|57a035e5b1ae::0637d444355058eb76ab6d7a842aa8b4","validated":false}
|
||||
{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"lastupdatetimestamp":1649252022977,"properties":[],"relClass":"isSupplementTo","relType":"projectOrganization","source":"50|475c1990cbb2::46b9f15a3e887ccb154a696c4e7e4217","subRelType":"participation","target":"50|57a035e5b1ae::01894f77220771428abaecbfa2bcc8f7","validated":false}
|
||||
{"collectedfrom":[{"key":"10|openaire____::457528c43fabd74e212db2ed61101075","value":"Agence Nationale de la Recherche"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"lastupdatetimestamp":1649252022977,"properties":[],"relClass":"isSupplementedBy","relType":"projectOrganization","source":"50|57a035e5b1ae::07b10647d24e46073785210d4715f4e9","subRelType":"participation","target":"50|475c1990cbb2::699e01797642d72238c502ffcae18277","validated":false}
|
||||
{"collectedfrom":[{"key":"10|openaire____::457528c43fabd74e212db2ed61101075","value":"Agence Nationale de la Recherche"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"lastupdatetimestamp":1649252022977,"properties":[],"relClass":"IsSupplementedBy","relType":"projectOrganization","source":"50|57a035e5b1ae::0cee1d69f1cab270c382eaa853bcf4dc","subRelType":"participation","target":"50|475c1990cbb2::b778659ec5014f3db4c4e03c7907a69d","validated":false}
|
||||
{"collectedfrom":[{"key":"10|openaire____::457528c43fabd74e212db2ed61101075","value":"Agence Nationale de la Recherche"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"lastupdatetimestamp":1649252022977,"properties":[],"relClass":"IsSupplementTo","relType":"projectOrganization","source":"50|57a035e5b1ae::0d428b3119b0c822270df15058029172","subRelType":"participation","target":"50|475c1990cbb2::c8172336a860b66965e8d43a5494de2c","validated":false}
|
||||
{"collectedfrom":[{"key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f","value":"CORDA - COmmon Research DAta Warehouse"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"lastupdatetimestamp":1649252022977,"properties":[],"relClass":"hasParticipant","relType":"projectOrganization","source":"40|corda_______::27b677f5d4a8b3a1159dba624016dc70","subRelType":"participation","target":"20|corda_______::0790e5c820c6a795d2b7524415cefb53","validated":false}
|
||||
{"collectedfrom":[{"key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f","value":"CORDA - COmmon Research DAta Warehouse"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"lastupdatetimestamp":1649252022977,"properties":[],"relClass":"hasParticipant","relType":"projectOrganization","source":"40|corda_______::b5db617bb0f475b49584f5ee5120227c","subRelType":"participation","target":"20|corda_______::16220fe1781e3beb748872d31aa7f789","validated":false}
|
||||
{"collectedfrom":[{"key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f","value":"CORDA - COmmon Research DAta Warehouse"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"lastupdatetimestamp":1649252022977,"properties":[],"relClass":"hasParticipant","relType":"projectOrganization","source":"40|corda_______::2907ce789238006cbe07f3e89820c9df","subRelType":"participation","target":"20|corda_______::43edcb7ca35d487ec357959e05c7ed7b","validated":false}
|
||||
{"collectedfrom":[{"key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f","value":"CORDA - COmmon Research DAta Warehouse"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"lastupdatetimestamp":1649252022977,"properties":[],"relClass":"hasParticipant","relType":"projectOrganization","source":"40|corda_______::d185f413b046d7a7b15808388dad71a5","subRelType":"participation","target":"20|corda_______::46ac0acd65a3c66b10842bf291be9660","validated":false}
|
||||
{"collectedfrom":[{"key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f","value":"CORDA - COmmon Research DAta Warehouse"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"lastupdatetimestamp":1649252022977,"properties":[],"relClass":"hasParticipant","relType":"projectOrganization","source":"40|corda_______::9c454e23267b520b621199fd4a79e3a6","subRelType":"participation","target":"20|corda_______::86fa29ae6a36610616e1691e1283f807","validated":false}
|
||||
{"collectedfrom":[{"key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f","value":"CORDA - COmmon Research DAta Warehouse"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"lastupdatetimestamp":1649252022977,"properties":[],"relClass":"hasParticipant","relType":"projectOrganization","source":"40|corda_______::e40925978874b5f57378f301370e1293","subRelType":"participation","target":"20|corda_______::88e4a05f9c42a4830ffdd51663ed4538","validated":false}
|
||||
{"collectedfrom":[{"key":"10|openaire____::0362fcdb3076765d9c0041ad331553e8","value":"OpenOrgs Database"}],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.990"},"lastupdatetimestamp":1649252022894,"properties":[],"relClass":"merges","relType":"organizationOrganization","source":"20|pending_org_::5a01343420bc742ec1891cd98c36a258","subRelType":"dedup","target":"20|corda_______::a7468d48c5f0517ec67a2a9163af7150","validated":false}
|
||||
{"collectedfrom":[{"key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f","value":"CORDA - COmmon Research DAta Warehouse"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"lastupdatetimestamp":1649252022977,"properties":[],"relClass":"hasParticipant","relType":"projectOrganization","source":"40|corda_______::531cfba3fa5e10f6be1e42e3c54cc95f","subRelType":"participation","target":"20|corda_______::b2233c6930da222c40e78302385a277d","validated":false}
|
||||
{"collectedfrom":[{"key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f","value":"CORDA - COmmon Research DAta Warehouse"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"lastupdatetimestamp":1649252022977,"properties":[],"relClass":"hasParticipant","relType":"projectOrganization","source":"40|corda_______::48cb178c2561829bc2eedd787c052d48","subRelType":"participation","target":"20|corda_______::cd8ad1c4f710b667b74362c1674b92e6","validated":false}
|
||||
{"collectedfrom":[{"key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f","value":"CORDA - COmmon Research DAta Warehouse"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"lastupdatetimestamp":1649252022977,"properties":[],"relClass":"hasParticipant","relType":"projectOrganization","source":"40|corda_______::795be98a5ba4c9190a32fc56033a9540","subRelType":"participation","target":"20|corda_______::f2323f9ed70f0f3a93fdfbb92f715e0e","validated":false}
|
||||
{"collectedfrom":[{"key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f","value":"CORDA - COmmon Research DAta Warehouse"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"lastupdatetimestamp":1649252022977,"properties":[],"relClass":"hasParticipant","relType":"projectOrganization","source":"40|corda_______::ca5b255e4b2ef49ff424e0019962591c","subRelType":"participation","target":"20|corda_______::f2323f9ed70f0f3a93fdfbb92f715e0e","validated":false}
|
Loading…
Reference in New Issue