html tags are removed from extras as well as description field

git-svn-id: https://svn.d4science.research-infrastructures.eu/gcube/trunk/data-catalogue/grsf-publisher-ws@135101 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Costantino Perciante 2016-11-30 10:43:30 +00:00
parent 26d4f35f5c
commit 1171a54f12
6 changed files with 126 additions and 9 deletions

View File

@ -44,9 +44,17 @@
<webappDirectory>${project.build.directory}/${project.build.finalName}</webappDirectory>
<distroDirectory>distro</distroDirectory>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<jsoup.version>1.10.1</jsoup.version>
</properties>
<dependencies>
<!-- jsoup HTML parser library @ http://jsoup.org/ -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>${jsoup.version}</version>
<scope>compile</scope>
</dependency>
<!-- SmartGears -->
<dependency>
<groupId>org.gcube.core</groupId>

View File

@ -223,7 +223,7 @@ public class GrsfPublisherFisheryService {
record.getMaintainer(),
record.getMaintainerContact(),
version,
record.getDescription(),
HelperMethods.removeHTML(record.getDescription()),
license,
tags,
customFields,

View File

@ -213,7 +213,7 @@ public class GrsfPublisherStockService {
record.getMaintainer(),
record.getMaintainerContact(),
version,
record.getDescription(),
HelperMethods.removeHTML(record.getDescription()),
license,
tags,
customFields,

View File

@ -30,6 +30,7 @@ import org.gcube.datacatalogue.ckanutillibrary.DataCatalogue;
import org.gcube.datacatalogue.ckanutillibrary.DataCatalogueFactory;
import org.gcube.datacatalogue.ckanutillibrary.DataCatalogueImpl;
import org.gcube.datacatalogue.ckanutillibrary.models.ResourceBean;
import org.jsoup.Jsoup;
import org.slf4j.LoggerFactory;
import eu.trentorise.opendata.jackan.internal.org.apache.http.HttpResponse;
@ -214,19 +215,23 @@ public abstract class HelperMethods {
elementsToConsider = Math.min(elementsToConsider, TIME_SERIES_TAKE_LAST_VALUES);
for (int i = (asList.size() - elementsToConsider); i < asList.size(); i++) {
logger.debug(asList.get(i).toString().trim());
valuesForKey.add(asList.get(i).toString().trim());
// trim and remove html
String clean = HelperMethods.removeHTML(asList.get(i).toString().trim());
logger.debug(clean);
valuesForKey.add(clean);
}
}else{
for (int i = 0; i < elementsToConsider; i++) {
logger.debug(asList.get(i).toString().trim());
valuesForKey.add(asList.get(i).toString().trim());
String clean = HelperMethods.removeHTML(asList.get(i).toString().trim());
logger.debug(clean);
valuesForKey.add(clean);
}
}
}
}else{
valuesForKey.add(f.toString().trim());
String clean = HelperMethods.removeHTML(f.toString().trim());
valuesForKey.add(clean);
}
// add to the map
@ -352,7 +357,7 @@ public abstract class HelperMethods {
return res;
}catch(Exception e){
logger.error("error while performing post method " + e.toString());
logger.error("error while performing get method " + e.toString());
}
return null;
@ -522,4 +527,16 @@ public abstract class HelperMethods {
}
return result;
}
/**
* Strip out HTML code
* @param html
* @return
*/
public static String removeHTML(String html) {
logger.debug("Incoming text is " + html);
String withoutHTML = Jsoup.parse(html).text();
logger.debug("Without html is " + withoutHTML);
return withoutHTML;
}
}

View File

@ -0,0 +1,77 @@
/**
*
*/
package org.gcube.data_catalogue.grsf_publish_ws;
import static org.gcube.resources.discovery.icclient.ICFactory.client;
import static org.gcube.resources.discovery.icclient.ICFactory.queryFor;
import java.util.List;
import org.gcube.common.resources.gcore.GCoreEndpoint;
import org.gcube.common.scope.api.ScopeProvider;
import org.gcube.resources.discovery.client.api.DiscoveryClient;
import org.gcube.resources.discovery.client.queries.api.SimpleQuery;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Retrieve the GRSF Service endpoint in the Infrastructure.
* @author Costantino Perciante at ISTI-CNR (costantino.perciante@isti.cnr.it)
*/
public class GcoreEndpointGRSFService {
private static final String resource = "jersey-servlet";
private static final String serviceName = "GRSFPublisher";
private static final String serviceClass = "Data-Catalogue";
private static Logger logger = LoggerFactory.getLogger(GcoreEndpointGRSFService.class);
/**
* Instantiates a new gcore endpoint reader.
* @param scope the scope
* @throws Exception the exception
*/
public static String getServiceUrl(String scope) throws Exception {
if(scope == null || scope.isEmpty())
throw new IllegalArgumentException();
String oldScope = ScopeProvider.instance.get();
try{
logger.info("set scope "+scope);
ScopeProvider.instance.set(scope);
SimpleQuery query = queryFor(GCoreEndpoint.class);
query.addCondition(String.format("$resource/Profile/ServiceClass/text() eq '%s'",serviceClass));
query.addCondition("$resource/Profile/DeploymentData/Status/text() eq 'ready'");
query.addCondition(String.format("$resource/Profile/ServiceName/text() eq '%s'",serviceName));
query.setResult("$resource/Profile/AccessPoint/RunningInstanceInterfaces//Endpoint[@EntryName/string() eq \""+resource+"\"]/text()");
logger.debug("submitting quey "+query.toString());
DiscoveryClient<String> client = client();
List<String> endpoints = client.submit(query);
if (endpoints == null || endpoints.isEmpty()) throw new Exception("Cannot retrieve the GCoreEndpoint serviceName: "+serviceName +", serviceClass: " +serviceClass +", in scope: "+scope);
String urlFound = endpoints.get(0);
if(urlFound==null)
throw new Exception("Endpoint:"+resource+", is null for serviceName: "+serviceName +", serviceClass: " +serviceClass +", in scope: "+scope);
logger.info("found entyname "+urlFound+" for ckanResource: "+resource);
urlFound = urlFound.replaceFirst(":80", "").replace("http", "https");
return urlFound;
}catch(Exception e){
String error = "An error occurred during GCoreEndpoint discovery, serviceName: "+serviceName +", serviceClass: " +serviceClass +", in scope: "+scope +".";
logger.error(error, e);
throw new Exception(error);
}finally{
if(oldScope != null && !oldScope.equals(scope))
ScopeProvider.instance.set(oldScope);
}
}
}

View File

@ -42,7 +42,6 @@ import org.gcube.data_catalogue.grsf_publish_ws.utils.groups.Status;
import org.gcube.data_catalogue.grsf_publish_ws.utils.groups.Type_Fishery;
import org.gcube.datacatalogue.ckanutillibrary.DataCatalogue;
import org.gcube.datacatalogue.ckanutillibrary.DataCatalogueFactory;
import org.junit.Test;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.JsonProcessingException;
@ -506,4 +505,20 @@ public class JTests {
return result;
}
//@Test
public void GRSFServiceUrl() throws Exception{
String context = "/gcube/devNext/NextNext";
String url = GcoreEndpointGRSFService.getServiceUrl(context);
logger.debug("Url is " + url);
}
//@Test
public void removeHTML() throws Exception{
String toTest = "<p>Based on the results of the MP operation for 2015&ndash;17 in its 2013 meeting and the outcome of the review of exceptional circumstances in its 2015 meeting, the ESC recommended that there is no need to revise the Extended Commission&rsquo;s 2013 TAC decision regarding the TAC for 2016&ndash;17. The recommended annual TAC for the years 2016-2017 is 14,647.4t.</p>";
HelperMethods.removeHTML(toTest);
}
}