forked from D-Net/dnet-hadoop
implemented methods to extract fulltext link from an API call
This commit is contained in:
parent
df82f8beb9
commit
ef6c90cc64
|
@ -6,6 +6,7 @@ import net.sf.saxon.s9api.*;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.json.JSONObject;
|
import org.json.JSONObject;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -18,16 +19,6 @@ import java.util.List;
|
||||||
|
|
||||||
public class DataFetcher implements ExtensionFunction, Serializable {
|
public class DataFetcher implements ExtensionFunction, Serializable {
|
||||||
|
|
||||||
/**
|
|
||||||
* this method fetches JSON from a provided URL and returns it as Dublin Core
|
|
||||||
*/
|
|
||||||
public static List<String> fetchAndTransform(URL url) throws IOException {
|
|
||||||
|
|
||||||
JSONObject jsonObject = getJson(url);
|
|
||||||
List<String> authors = getAuthorsFromJson(jsonObject);
|
|
||||||
return transformListToDublinCore(authors);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This method fetches JSON object from a given URL
|
* This method fetches JSON object from a given URL
|
||||||
* @param url a url in the metadata for fetching authors in JSON format
|
* @param url a url in the metadata for fetching authors in JSON format
|
||||||
|
@ -88,14 +79,39 @@ public class DataFetcher implements ExtensionFunction, Serializable {
|
||||||
return dublinCoreAuthors;
|
return dublinCoreAuthors;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* todo: add documentation
|
* This is a public method which fetches authors and transform them into Dublin Core
|
||||||
*/
|
*/
|
||||||
public static String getAndTransformAuthors(URL url) throws IOException{
|
public static String getAndTransformAuthors(URL url) throws IOException{
|
||||||
return String.join(", ", transformListToDublinCore(getAuthorsFromJson(getJson(url))));
|
return String.join(", ", transformListToDublinCore(getAuthorsFromJson(getJson(url))));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method extracts link to fulltext from a given JSON
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
static private String getLinkToFulltextFromJson(JSONObject jsonObject) throws MalformedURLException {
|
||||||
|
|
||||||
|
// note: Link to JSON containing fulltextlink is in "primary_file" attribute.
|
||||||
|
// And in the resultant JSON, “links->download” contains the URL to fulltext
|
||||||
|
|
||||||
|
return jsonObject
|
||||||
|
.getJSONObject("data")
|
||||||
|
.getJSONObject("links")
|
||||||
|
.getString("download");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is a public method which fetches link to full text and returns it as a suitable format
|
||||||
|
*/
|
||||||
|
public static String getFullTextLinkAndTransform (URL url )throws IOException{
|
||||||
|
|
||||||
|
return getLinkToFulltextFromJson(getJson(url));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public QName getName() {
|
public QName getName() {
|
||||||
return null;
|
return null;
|
||||||
|
|
|
@ -6,6 +6,7 @@ import org.junit.jupiter.api.BeforeEach;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
@ -57,4 +58,11 @@ class DataFetcherTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void getLinkToFulltextFromJson() throws URISyntaxException, IOException {
|
||||||
|
URL linkToFullTextDocument = new URI("https://api.osf.io/v2/files/5de7c96f84c479000c7928af/?format=json").toURL();
|
||||||
|
System.out.println(DataFetcher.getFullTextLinkAndTransform(linkToFullTextDocument));
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue