Compare commits

...

5 Commits
main ... main

Author SHA1 Message Date
sab ef6c90cc64 implemented methods to extract fulltext link from an API call 2024-09-11 14:57:38 +02:00
sab df82f8beb9 code adapted as per Michele's recommendations 2024-09-04 15:29:13 +02:00
sab 53787dbf67 code refactored 2024-08-01 09:52:19 +02:00
sab bbb79273a3 conversion to Dublin Core has been implemented 2024-08-01 01:23:04 +02:00
sab 7f39375ba8 data fetcher has been implemented 2024-07-31 18:05:11 +02:00
3 changed files with 204 additions and 0 deletions

View File

@ -0,0 +1,134 @@
package eu.dnetlib.dhp.transformation.xslt;
import java.io.Serializable;
import net.sf.saxon.s9api.*;
import org.apache.commons.io.IOUtils;
import org.json.JSONObject;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
/**
* This class fetches JSON from a provided link and returns
* a Dublin Core. This functionality is particularly needed for OSF Preprints
*/
public class DataFetcher implements ExtensionFunction, Serializable {
/**
* This method fetches JSON object from a given URL
* @param url a url in the metadata for fetching authors in JSON format
* @return
* @throws IOException
*/
static JSONObject getJson(URL url) throws IOException {
String json = IOUtils.toString(url);
return new JSONObject(json);
}
/**
* This method extracts authors from a given JSON
*
* @param jsonObject
* @return
*/
static List<String> getAuthorsFromJson(JSONObject jsonObject) {
List<String> authors = new ArrayList<>();
// count of authors
int countOfAuthors = jsonObject.getJSONArray("data").length();
for (int i = 0; i < countOfAuthors; i++) {
authors.add(jsonObject
.getJSONArray("data")
.getJSONObject(i)
.getJSONObject("embeds")
.getJSONObject("users")
.getJSONObject("data")
.getJSONObject("attributes")
.getString("full_name"));
}
return authors;
}
/**
* This method transforms list of authors into Dublin Core
* @param authors
* @return Dublin Core list of authors
*/
static List<String> transformListToDublinCore(List<String> authors) {
List<String> dublinCoreAuthors = new ArrayList<>();
for (String author : authors){
//splitting full name into first and last names according to OpenAIRE v3 guidelines at:
// https://guidelines.openaire.eu/en/latest/literature/field_creator.html
// surname, initials (first name) prefix.
String[] parts = author.split(" ");
String firstName = parts[0];
String lastName = parts[1];
char initialOfFirstName = firstName.charAt(0);
dublinCoreAuthors.add(
"<dc:creator>" + lastName + ", " + initialOfFirstName + ". (" + firstName + ")" + "</dc:creator>");
}
return dublinCoreAuthors;
}
/**
* This is a public method which fetches authors and transform them into Dublin Core
*/
public static String getAndTransformAuthors(URL url) throws IOException{
return String.join(", ", transformListToDublinCore(getAuthorsFromJson(getJson(url))));
}
/**
* This method extracts link to fulltext from a given JSON
*
* @return
*/
static private String getLinkToFulltextFromJson(JSONObject jsonObject) throws MalformedURLException {
// note: Link to JSON containing fulltextlink is in "primary_file" attribute.
// And in the resultant JSON, links->download contains the URL to fulltext
return jsonObject
.getJSONObject("data")
.getJSONObject("links")
.getString("download");
}
/**
* This is a public method which fetches link to full text and returns it as a suitable format
*/
public static String getFullTextLinkAndTransform (URL url )throws IOException{
return getLinkToFulltextFromJson(getJson(url));
}
@Override
public QName getName() {
return null;
}
@Override
public SequenceType getResultType() {
return null;
}
@Override
public SequenceType[] getArgumentTypes() {
return new SequenceType[0];
}
@Override
public XdmValue call(XdmValue[] xdmValues) throws SaxonApiException {
return null;
}
}

View File

@ -55,6 +55,8 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
processor.registerExtensionFunction(new DateCleaner()); processor.registerExtensionFunction(new DateCleaner());
processor.registerExtensionFunction(new PersonCleaner()); processor.registerExtensionFunction(new PersonCleaner());
processor.registerExtensionFunction(new DataFetcher());
final XsltCompiler comp = processor.newXsltCompiler(); final XsltCompiler comp = processor.newXsltCompiler();
QName datasourceIDParam = new QName(DATASOURCE_ID_PARAM); QName datasourceIDParam = new QName(DATASOURCE_ID_PARAM);
comp.setParameter(datasourceIDParam, new XdmAtomicValue(value.getProvenance().getDatasourceId())); comp.setParameter(datasourceIDParam, new XdmAtomicValue(value.getProvenance().getDatasourceId()));

View File

@ -0,0 +1,68 @@
package eu.dnetlib.dhp.transformation.xslt;
import org.json.JSONObject;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.List;
import static org.junit.jupiter.api.Assertions.*;
class DataFetcherTest {
@BeforeEach
void setUp() {
}
@AfterEach
void tearDown() {
}
@Test
void getJson() throws IOException, URISyntaxException {
URL contributorsUrl = new URI("https://api.osf.io/v2/preprints/mrwqb/contributors/?format=json").toURL();
JSONObject testJsonObj = DataFetcher.getJson(contributorsUrl);
String x = testJsonObj
.getJSONArray("data")
.getJSONObject(0)
.getJSONObject("embeds")
.getJSONObject("users")
.getJSONObject("data")
.getJSONObject("attributes")
.getString("full_name");
System.out.println(x);
System.out.println(testJsonObj.getJSONArray("data").length());
testJsonObj.getJSONArray("data").forEach(System.out::println);
}
@Test
void getAuthorsFromJson() throws IOException, URISyntaxException {
URL contributorsUrl = new URI("https://api.osf.io/v2/preprints/mrwqb/contributors/?format=json").toURL();
JSONObject testJsonObj = DataFetcher.getJson(contributorsUrl);
List<String> authors = DataFetcher.getAuthorsFromJson(testJsonObj);
System.out.println(authors);
System.out.println(DataFetcher.transformListToDublinCore(authors));
}
@Test
void getAndTransformAuthors() throws IOException, URISyntaxException {
URL contributorsUrl = new URI("https://api.osf.io/v2/preprints/mrwqb/contributors/?format=json").toURL();
System.out.println(DataFetcher.getAndTransformAuthors(contributorsUrl));
}
@Test
void getLinkToFulltextFromJson() throws URISyntaxException, IOException {
URL linkToFullTextDocument = new URI("https://api.osf.io/v2/files/5de7c96f84c479000c7928af/?format=json").toURL();
System.out.println(DataFetcher.getFullTextLinkAndTransform(linkToFullTextDocument));
}
}