several improvements in url checkings, transformation of rul with new line in text, refactored code

git-svn-id: https://svn.research-infrastructures.eu/d4science/gcube/trunk/portlets/user/share-updates@93984 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Massimiliano Assante 2014-04-01 13:00:37 +00:00
parent 33c3bf0e23
commit f8d3bc7f8c
8 changed files with 487 additions and 419 deletions

View File

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/share-updates-1.2.3-SNAPSHOT/WEB-INF/classes" path="src/main/java">
<classpathentry kind="src" output="target/share-updates-1.3.0-SNAPSHOT/WEB-INF/classes" path="src/main/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
@ -31,5 +31,5 @@
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/share-updates-1.2.3-SNAPSHOT/WEB-INF/classes"/>
<classpathentry kind="output" path="target/share-updates-1.3.0-SNAPSHOT/WEB-INF/classes"/>
</classpath>

View File

@ -1,5 +1,5 @@
eclipse.preferences.version=1
jarsExcludedFromWebInfLib=
lastWarOutDir=/Users/massi/Documents/workspace/share-updates/target/share-updates-1.2.3-SNAPSHOT
lastWarOutDir=/Users/massi/Documents/workspace/share-updates/target/share-updates-1.3.0-SNAPSHOT
warSrcDir=src/main/webapp
warSrcDirIsOutput=false

View File

@ -13,7 +13,7 @@
<groupId>org.gcube.portlets.user</groupId>
<artifactId>share-updates</artifactId>
<packaging>war</packaging>
<version>1.2.3-SNAPSHOT</version>
<version>1.3.0-SNAPSHOT</version>
<name>gCube Share Updates Portlet</name>
<description>

View File

@ -313,7 +313,6 @@ public class ShareUpdateForm extends Composite {
}
});
}
private PrivacyLevel getPrivacyLevel() {
String selected = privacyLevel.getValue(privacyLevel.getSelectedIndex());
if (selected.compareTo(PrivacyLevel.CONNECTION.toString()) == 0)
@ -328,10 +327,6 @@ public class ShareUpdateForm extends Composite {
return PrivacyLevel.SINGLE_VRE;
}
/**
* Escape an html string. Escaping data received from the client helps to
* prevent cross-site script vulnerabilities.
@ -355,7 +350,7 @@ public class ShareUpdateForm extends Composite {
String [] parts = textToCheck.split("\\s");
// Attempt to convert each item into an URL.
for( String item : parts ) {
if (item.startsWith("http")) {
if (item.startsWith("http") || item.startsWith("www")) {
preview.add(new LinkLoader());
submitButton.setEnabled(false);
//GWT.log("It's http link:" + linkToCheck);

View File

@ -18,13 +18,17 @@ import java.util.Iterator;
import javax.imageio.ImageIO;
import javax.imageio.ImageReader;
import javax.imageio.stream.FileImageInputStream;
import javax.imageio.stream.ImageInputStream;
import net.coobird.thumbnailator.Thumbnails;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.gcube.applicationsupportlayer.social.storage.FTPManager;
import org.gcube.portal.databook.shared.ImageType;
import org.gcube.portlets.user.shareupdates.shared.LinkPreview;
@ -45,7 +49,6 @@ public class FilePreviewer {
private static Logger _log = LoggerFactory.getLogger(FilePreviewer.class);
private static final String PDF_DEFAULT_IMAGE = "default/pdf.png";
private static final String NOTHUMB_DEFAULT_IMAGE = "default/default_image.png";
private static final String GENERICFILE_DEFAULT_IMAGE = "default/default_generic.png";
/**
* these are the extension for which I have an icon image preview
@ -233,5 +236,22 @@ public class FilePreviewer {
return null;
}
/**
*
* @param file
* @return
* @throws IOException
* @throws MagicParseException
* @throws MagicMatchNotFoundException
* @throws MagicException
*/
protected static String getMimeType(File file, String filenameWithExtension) throws IOException {
TikaConfig config = TikaConfig.getDefaultConfig();
Detector detector = config.getDetector();
TikaInputStream stream = TikaInputStream.get(file);
Metadata metadata = new Metadata();
metadata.add(Metadata.RESOURCE_NAME_KEY, filenameWithExtension);
MediaType mediaType = detector.detect(stream, metadata);
return mediaType.getBaseType().toString();
}
}

View File

@ -140,7 +140,7 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
}
public String getDevelopmentUser() {
String user = "test.user";
//user = "massimiliano.assante";
user = "massimiliano.assante";
return user;
}
@ -150,12 +150,12 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
public ClientFeed share(String postText, FeedType feedType, PrivacyLevel pLevel,
String vreId, LinkPreview preview, String urlThumbnail, ArrayList<String> mentionedUserFullNames,String fileName, String filePathOnServer, boolean notifyGroup) {
String escapedFeedText = escapeHtmlAndTransformUrl(postText);
String escapedFeedText = TextTransfromUtils.escapeHtmlAndTransformUrl(postText);
ArrayList<PickingUser> mentionedUsers = null;
if (mentionedUserFullNames != null && ! mentionedUserFullNames.isEmpty()) {
mentionedUsers = getSelectedUserIds(mentionedUserFullNames);
escapedFeedText = convertMentionPeopleAnchorHTML(escapedFeedText, mentionedUsers);
escapedFeedText = TextTransfromUtils.convertMentionPeopleAnchorHTML(escapedFeedText, mentionedUsers);
}
@ -185,12 +185,12 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
//this means the user has shared a file without text in it.
String textToPost = "";
if (escapedFeedText.compareTo(ShareUpdateForm.NO_TEXT_FILE_SHARE) == 0) {
textToPost = convertFileNameAnchorHTML(url);
textToPost = TextTransfromUtils.convertFileNameAnchorHTML(url);
} else {
textToPost = escapedFeedText;
System.out.println("textToPost=" + textToPost);
}
ScopeBean scope = new ScopeBean(session.getScope());
String vreId2Set = scope.is(Type.VRE) ? scope.toString() : "";
@ -231,7 +231,7 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
//everything went fine
ClientFeed cf = new ClientFeed(toShare.getKey(), toShare.getType().toString(), username, feedDate, toShare.getUri(),
replaceAmpersand(toShare.getDescription()), fullName, email, thumbnailURL, toShare.getLinkTitle(), toShare.getLinkDescription(),
TextTransfromUtils.replaceAmpersand(toShare.getDescription()), fullName, email, thumbnailURL, toShare.getLinkTitle(), toShare.getLinkDescription(),
toShare.getUriThumbnail(), toShare.getLinkHost());
@ -260,73 +260,6 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
return cf;
}
/**
* convert the mentioned people in HTML anchor and also Encode the params Base64
* @param escapedFeedText
* @param taggedPeople
* @return
*/
private String convertMentionPeopleAnchorHTML(String escapedFeedText, ArrayList<PickingUser> taggedPeople) {
for (PickingUser tagged : taggedPeople) {
String taggedHTML = "<a class=\"link\" style=\"font-size:14px;\" href=\""+GCubeSocialNetworking.USER_PROFILE_LINK
+"?"+
new String(Base64.encodeBase64(GCubeSocialNetworking.USER_PROFILE_OID.getBytes()))+"="+
new String(Base64.encodeBase64(tagged.getUsername().getBytes()))+"\">"+tagged.getFullName()+"</a> ";
escapedFeedText = escapedFeedText.replace(tagged.getFullName(), taggedHTML);
}
return escapedFeedText;
}
private void setUserSettingsInSession(UserSettings user) {
getASLSession().setAttribute(UserInfo.USER_INFO_ATTR, user);
}
private String replaceAmpersand(String toReplace) {
String toReturn = toReplace.replaceAll("&amp;", "&");
return toReturn;
}
/**
* utility method that convert a url ina text in a clickable url by the browser
* and if the user has just pasted a link, converts the link in: shared a link
* @param feedText
* @return the text with the clickable url in it
*/
private String transformUrls(String feedText) {
StringBuilder sb = new StringBuilder();
// separate input by spaces ( URLs have no spaces )
String [] parts = feedText.split("\\s");
// Attempt to convert each item into an URL.
for (int i = 0; i < parts.length; i++) {
if (parts[i].startsWith("http")) {
try {
URL url = new URL(parts[i]);
if (i == 0 && parts.length == 1) //then he shared just a link
return sb.append("<span style=\"color:gray; font-size:12px;\">shared </span><a class=\"link\" href=\"").append(url).append("\" target=\"_blank\">").append("a link.").append("</a> ").toString();
// If possible then replace with anchor...
sb.append("<a class=\"link\" style=\"font-size:14px;\" href=\"").append(url).append("\" target=\"_blank\">").append(url).append("</a> ");
} catch (MalformedURLException e) {
// If there was an URL then it's not valid
_log.error("MalformedURLException returning... ");
return feedText;
}
} else {
sb.append(parts[i]);
sb.append(" ");
}
}
return sb.toString();
}
/**
*
* @param preview
* @return
*/
private String convertFileNameAnchorHTML(String url) {
StringBuilder sb = new StringBuilder();
sb.append("<span style=\"color:gray; font-size:12px;\">shared </span><a class=\"link\" href=\"").append(url).append("\" target=\"_blank\">").append("a file.").append("</a> ").toString();
return sb.toString();
}
@Override
public UserSettings getUserSettings() {
try {
@ -348,12 +281,9 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
UserInfo userInfo = new UserInfo(username, fullName, thumbnailURL, user.getEmailAddress(), accountURL, true, isAdmin(), vreNames);
UserSettings toReturn = new UserSettings(userInfo, 0, session.getScopeName(), isInfrastructureScope());
setUserSettingsInSession(toReturn);
return toReturn;
}
else {
_log.info("Returning test USER = " + session.getUsername());
HashMap<String, String> fakeVreNames = new HashMap<String, String>();
fakeVreNames.put("/gcube/devsec/devVRE","devVRE");
@ -393,7 +323,7 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
try {
String mimeType = getMimeType(new File(fileabsolutePathOnServer), fileName);
String mimeType = FilePreviewer.getMimeType(new File(fileabsolutePathOnServer), fileName);
UriResolverReaderParameter resolver = new UriResolverReaderParameter();
//get the url to show (though it could not be ready for download at this stage)
httpURL = resolver.resolveAsUriRequest(smpURI, fileName, mimeType, true);
@ -433,27 +363,96 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
_log.debug("Returning httpURL=" + httpURL);
return toReturn;
}
/**
* tries the following in the indicated order for Populating the Link preview
* Open Graph protocol
* Meta "title" and "description" tags
* Best guess from page content (not recommended)
*
* @param file
* @return
* @throws IOException
* @throws MagicParseException
* @throws MagicMatchNotFoundException
* @throws MagicException
* Schema.org microdata <-- This is still a TODO
*/
protected static String getMimeType(File file, String filenameWithExtension) throws IOException {
TikaConfig config = TikaConfig.getDefaultConfig();
Detector detector = config.getDetector();
TikaInputStream stream = TikaInputStream.get(file);
Metadata metadata = new Metadata();
metadata.add(Metadata.RESOURCE_NAME_KEY, filenameWithExtension);
MediaType mediaType = detector.detect(stream, metadata);
return mediaType.getBaseType().toString();
}
@Override
public LinkPreview checkLink(String linkToCheck) {
LinkPreview toReturn = null;
_log.info("to check " + linkToCheck);
//look for a url in text
linkToCheck = TextTransfromUtils.extractURL(linkToCheck);
if (linkToCheck == null)
return null; //no url
String[] schemes = {"http","https"};
UrlValidator urlValidator = new UrlValidator(schemes);
if (! urlValidator.isValid(linkToCheck)) {
_log.warn("url is NOT valid, returning nothing");
return null;
}
_log.debug("url is valid");
URL pageURL;
URLConnection siteConnection = null;
try {
pageURL = new URL(linkToCheck);
if (pageURL.getProtocol().equalsIgnoreCase("https")) {
System.setProperty("java.protocol.handler.pkgs", "com.sun.net.ssl.internal.www.protocol");
java.security.Security.addProvider(new com.sun.net.ssl.internal.ssl.Provider());
TextTransfromUtils.trustAllHTTPSConnections();
siteConnection = (HttpsURLConnection) pageURL.openConnection();
}
else
siteConnection = (HttpURLConnection) pageURL.openConnection();
} catch (MalformedURLException e) {
_log.error("url is not valid");
return null;
} catch (IOException e) {
_log.error("url is not reachable");
return null;
}
//pretend you're a browser (make my request from Java more browsery-like.)
siteConnection.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
String title;
String description;
ArrayList<String> imageUrls = new ArrayList<String>();
//get the host from the url
String host = pageURL.getHost().replaceAll("www.", "");
//try openGraph First
OpenGraph ogLink = null;
try {
ogLink = new OpenGraph(linkToCheck, true, siteConnection);
if (ogLink == null || ogLink.getContent("title") == null) {
//there is no OpenGraph for this link
_log.info("No OpenGraph Found, going Best guess from page content") ;
toReturn = TextTransfromUtils.getInfoFromHTML(siteConnection, pageURL, linkToCheck, host);
} else {
//there is OpenGraph
_log.info("OpenGraph Found") ;
title = ogLink.getContent("title");
description = (ogLink.getContent("description") != null) ? ogLink.getContent("description") : "";
description = ((description.length() > 256) ? description.substring(0, 256)+"..." : description);
//look for the image ask the guesser if not present
if (ogLink.getContent("image") != null) {
String imageUrl = TextTransfromUtils.getImageUrlFromSrcAttribute(pageURL, ogLink.getContent("image"));
imageUrls.add(imageUrl);
_log.trace("OpenGraph getImage = " +imageUrl) ;
}
else {
_log.trace("OpenGraph No Image, trying manuale parsing");
ArrayList<String> images = TextTransfromUtils.getImagesWithCleaner(pageURL);
if (! images.isEmpty())
imageUrls = images;
}
toReturn = new LinkPreview(title, description, linkToCheck, host, imageUrls);
return toReturn;
}
} catch (IOException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
return toReturn;
}
/**
* return the id as key and the names as value of the vre a user is subscribed to
* @param username
@ -548,318 +547,10 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
return null;
}
}
/**
* Escape an html string. Escaping data received from the client helps to
* prevent cross-site script vulnerabilities.
*
* @param html the html string to escape
* @return the escaped string
*/
private String escapeHtmlAndTransformUrl(String html) {
if (html == null) {
return null;
}
String toReturn = html.replaceAll("&", "&amp;").replaceAll("<", "&lt;")
.replaceAll(">", "&gt;");
// replace all the line breaks by <br/>
toReturn = toReturn.replaceAll("(\r\n|\n)"," <br/> ");
//transfrom the URL in a clickable URL
toReturn = transformUrls(toReturn);
// then replace all the double spaces by the html version &nbsp;
toReturn = toReturn.replaceAll("\\s\\s","&nbsp;&nbsp;");
return toReturn;
}
/**
* utilty method that extract an url ina text
* @param feedText
* @return the text with the clickable url in it
*/
public String extractURL(String feedText) {
// separate input by spaces ( URLs have no spaces )
String [] parts = feedText.split("\\s");
// Attempt to convert each item into an URL.
for( String item : parts ) {
if (item.startsWith("http")) {
try {
new URL(item);
return item;
} catch (MalformedURLException e) {
// If there was an URL then it's not valid
_log.error("MalformedURLException returning... ");
return null;
}
}
}
return null;
}
/**
* tries the following in the indicated order for Populating the Link preview
* Open Graph protocol
* Meta "title" and "description" tags
* Best guess from page content (not recommended)
*
* Schema.org microdata <-- This is still a TODO
*/
public LinkPreview checkLink(String linkToCheck) {
LinkPreview toReturn = null;
_log.info("to check " + linkToCheck);
//look for a url in text
linkToCheck = extractURL(linkToCheck);
if (linkToCheck == null)
return null; //no url
String[] schemes = {"http","https"};
UrlValidator urlValidator = new UrlValidator(schemes);
if (! urlValidator.isValid(linkToCheck)) {
_log.warn("url is NOT valid, returning nothing");
return null;
}
_log.debug("url is valid");
URL pageURL;
URLConnection siteConnection = null;
try {
pageURL = new URL(linkToCheck);
if (pageURL.getProtocol().equalsIgnoreCase("https")) {
System.setProperty("java.protocol.handler.pkgs", "com.sun.net.ssl.internal.www.protocol");
java.security.Security.addProvider(new com.sun.net.ssl.internal.ssl.Provider());
trustAllHTTPSConnections();
siteConnection = (HttpsURLConnection) pageURL.openConnection();
}
else
siteConnection = (HttpURLConnection) pageURL.openConnection();
} catch (MalformedURLException e) {
_log.error("url is not valid");
return null;
} catch (IOException e) {
_log.error("url is not reachable");
return null;
}
//pretend you're a browser (make my request from Java more browsery-like.)
siteConnection.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
String title;
String description;
ArrayList<String> imageUrls = new ArrayList<String>();
//get the host from the url
String host = pageURL.getHost().replaceAll("www.", "");
//try openGraph First
OpenGraph ogLink = null;
try {
ogLink = new OpenGraph(linkToCheck, true, siteConnection);
if (ogLink == null || ogLink.getContent("title") == null) {
//there is no OpenGraph for this link
_log.info("No OpenGraph Found, going Best guess from page content") ;
toReturn = getInfoFromHTML(siteConnection, pageURL, linkToCheck, host);
} else {
//there is OpenGraph
_log.info("OpenGraph Found") ;
title = ogLink.getContent("title");
description = (ogLink.getContent("description") != null) ? ogLink.getContent("description") : "";
description = ((description.length() > 256) ? description.substring(0, 256)+"..." : description);
//look for the image ask the guesser if not present
if (ogLink.getContent("image") != null) {
String imageUrl = getImageUrlFromSrcAttribute(pageURL, ogLink.getContent("image"));
imageUrls.add(imageUrl);
_log.trace("OpenGraph getImage = " +imageUrl) ;
}
else {
_log.trace("OpenGraph No Image, trying manuale parsing");
ArrayList<String> images = getImagesWithCleaner(pageURL);
if (! images.isEmpty())
imageUrls = images;
}
toReturn = new LinkPreview(title, description, linkToCheck, host, imageUrls);
return toReturn;
}
} catch (IOException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
return toReturn;
}
/**
* to use when OpenGraph is not available, Tries Metadata first, then Best guess from page content
* @param pageUrl
* @param link
* @param host
* @return a LinPreview object instance filled with the extracted information
* @throws IOException
*/
private LinkPreview getInfoFromHTML(URLConnection connection, URL pageUrl, String link, String host) throws Exception {
LinkPreview toReturn = null;
String title = "";
String description = "";
URLConnection conn = pageUrl.openConnection();
//pretend you're a browser (make my request from Java more browsery-like.)
conn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
MetaSeeker ms = null;
try {
title = getTitleFromHeader(pageUrl);
ms = new MetaSeeker(connection, pageUrl);
//try the metadata, otherwise ask the guesser
description = (ms.getContent("description") != null && ! ms.getContent("description").isEmpty()) ? ms.getContent("description") : createDescriptionFromContent(link);
ArrayList<String> images = new ArrayList<String>();
images = getImagesWithCleaner(pageUrl);
toReturn = new LinkPreview(title, description, link, host, images);
} catch(Exception e) {
_log.error("[MANUAL-PARSE] Something wrong with the meta seeker returning ... ");
return toReturn;
}
return toReturn;
}
/**
* @param pageURL
* @return the title of the page or null if can't read it
* @throws IOException
*/
private String getTitleFromHeader(URL pageURL) throws IOException {
URLConnection conn = pageURL.openConnection();
//pretend you're a browser (make my request from Java more browsery-like.)
conn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
Charset charset = OpenGraph.getConnectionCharset(conn);
BufferedReader dis = new BufferedReader(new InputStreamReader(conn.getInputStream(), charset));
String inputLine;
StringBuffer headContents = new StringBuffer();
// Loop through each line, looking for the closing head element
while ((inputLine = dis.readLine()) != null)
{
if (inputLine.contains("</head>")) {
inputLine = inputLine.substring(0, inputLine.indexOf("</head>") + 7);
inputLine = inputLine.concat("<body></body></html>");
headContents.append(inputLine + "\r\n");
break;
}
headContents.append(inputLine + "\r\n");
}
String headContentsStr = headContents.toString();
HtmlCleaner cleaner = new HtmlCleaner();
// parse the string HTML
TagNode pageData = cleaner.clean(headContentsStr);
// open only the title tags
TagNode[] title = pageData.getElementsByName("title", true);
if (title != null && title.length > 0) {
String theTitle = title[0].getChildren().get(0).toString();
_log.trace("theTitle: " + theTitle);
return theTitle;
}
return null;
}
/**
* try with HtmlCleaner API to read the images
* @param pageURL
* @return the title of the page or null if can't read it
* @throws IOException
*/
private ArrayList<String> getImagesWithCleaner(URL pageURL) throws IOException {
ArrayList<String> images = new ArrayList<String>();
URLConnection conn = pageURL.openConnection();
//pretend you're a browser (make my request from Java more browsery-like.)
conn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
Charset charset = OpenGraph.getConnectionCharset(conn);
BufferedReader dis = new BufferedReader(new InputStreamReader(conn.getInputStream(), charset));
String inputLine;
StringBuffer headContents = new StringBuffer();
// Loop through each line, looking for the closing head element
while ((inputLine = dis.readLine()) != null) {
headContents.append(inputLine + "\r\n");
}
String headContentsStr = headContents.toString();
HtmlCleaner cleaner = new HtmlCleaner();
// parse the string HTML
TagNode pageData = cleaner.clean(headContentsStr);
// open only the title tags
TagNode[] imgs = pageData.getElementsByName("img", true);
int upTo = (imgs.length > 15) ? 15 : imgs.length;
for (int i = 0; i < upTo; i++) {
if (imgs[i].hasAttribute("src")) {
String imageUrl = getImageUrlFromSrcAttribute(pageURL, imgs[i].getAttributeByName("src"));
images.add(imageUrl);
_log.trace("[FOUND image] " + imageUrl);
}
}
return images;
}
/**
* There are several ways to refer an image in a HTML, this method use an heuristic to get the actual image url
* @param pageURL the url
* @param srcAttr the content of the img src attribute
* @return the image url ready to be referred outside native environment
*/
private String getImageUrlFromSrcAttribute(URL pageURL, String srcAttr) {
String imageUrl = srcAttr;
if (imageUrl.startsWith("/")) //referred as absolute path case
imageUrl = pageURL.getProtocol()+"://"+pageURL.getHost()+imageUrl;
else if (imageUrl.startsWith("../")) { //relative path case
imageUrl = pageURL.toExternalForm().endsWith("/") ? pageURL.toExternalForm() + imageUrl : pageURL.toExternalForm() + "/" + imageUrl;
}
else if (!imageUrl.contains("/")) { //the image is probably in the same folder
// e.g. http://www.adomain.com/docrep/018/i3328e/i3328e00.htm?utm_source
String imageFolder = pageURL.toString().substring(0, pageURL.toString().lastIndexOf("/"));
imageUrl= imageFolder + "/" + imageUrl;
}
else if (!imageUrl.startsWith("http") ) { //e.g. http://adomain.com/anImage.png
imageUrl = pageURL.toExternalForm().endsWith("/") ? pageURL.toExternalForm() + imageUrl : pageURL.toExternalForm() + "/" + imageUrl;
}
return imageUrl;
}
/**
* generate the description parsing the content (Best Guess)
* @param link the link to check
* @return the description guessed
*/
private String createDescriptionFromContent(String link) {
StringBean sb = new StringBean();
sb.setURL(link);
sb.setLinks(false);
String description = sb.getStrings();
description = ((description.length() > 256) ? description.substring(0, 256)+"..." : description);
return description;
}
/**
* this method handles the non trusted https connections
*/
private void trustAllHTTPSConnections() {
// Create a trust manager that does not validate certificate chains
TrustManager[] trustAllCerts = new TrustManager[]{
new X509TrustManager() {
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
return null;
}
public void checkClientTrusted(
java.security.cert.X509Certificate[] certs, String authType) {
}
public void checkServerTrusted(
java.security.cert.X509Certificate[] certs, String authType) {
}
}
};
try {
SSLContext sc = SSLContext.getInstance("SSL");
sc.init(null, trustAllCerts, new java.security.SecureRandom());
HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
} catch (Exception e) {
System.out.println("Error" + e);
}
}
/**
* Indicates whether the scope is the whole infrastructure.

View File

@ -0,0 +1,362 @@
package org.gcube.portlets.user.shareupdates.server;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.Charset;
import java.util.ArrayList;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import org.apache.commons.codec.binary.Base64;
import org.gcube.portal.databook.client.GCubeSocialNetworking;
import org.gcube.portlets.user.shareupdates.server.metaseeker.MetaSeeker;
import org.gcube.portlets.user.shareupdates.server.opengraph.OpenGraph;
import org.gcube.portlets.user.shareupdates.shared.LinkPreview;
import org.gcube.portlets.widgets.pickuser.shared.PickingUser;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
import org.htmlparser.beans.StringBean;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.sun.net.ssl.HttpsURLConnection;
/**
* this class contains utility method for parsing and trasforming users pasted text containing URLs and other utility methods
* @author Massimiliano Assante, ISTI-CNR
*
*/
public class TextTransfromUtils {
/**
*
*/
private static Logger _log = LoggerFactory.getLogger(ShareUpdateServiceImpl.class);
/**
*
* @param preview
* @return
*/
protected static String convertFileNameAnchorHTML(String url) {
StringBuilder sb = new StringBuilder();
sb.append("<span style=\"color:gray; font-size:12px;\">shared </span><a class=\"link\" href=\"").append(url).append("\" target=\"_blank\">").append("a file.").append("</a> ").toString();
return sb.toString();
}
/**
* convert the mentioned people in HTML anchor and also Encode the params Base64
* @param escapedFeedText
* @param taggedPeople
* @return
*/
protected static String convertMentionPeopleAnchorHTML(String escapedFeedText, ArrayList<PickingUser> taggedPeople) {
for (PickingUser tagged : taggedPeople) {
String taggedHTML = "<a class=\"link\" style=\"font-size:14px;\" href=\""+GCubeSocialNetworking.USER_PROFILE_LINK
+"?"+
new String(Base64.encodeBase64(GCubeSocialNetworking.USER_PROFILE_OID.getBytes()))+"="+
new String(Base64.encodeBase64(tagged.getUsername().getBytes()))+"\">"+tagged.getFullName()+"</a> ";
escapedFeedText = escapedFeedText.replace(tagged.getFullName(), taggedHTML);
}
return escapedFeedText;
}
/**
* generate the description parsing the content (Best Guess)
* @param link the link to check
* @return the description guessed
*/
private static String createDescriptionFromContent(String link) {
StringBean sb = new StringBean();
sb.setURL(link);
sb.setLinks(false);
String description = sb.getStrings();
description = ((description.length() > 256) ? description.substring(0, 256)+"..." : description);
return description;
}
/**
* Escape an html string. Escaping data received from the client helps to
* prevent cross-site script vulnerabilities.
*
* @param html the html string to escape
* @return the escaped string
*/
protected static String escapeHtmlAndTransformUrl(String html) {
if (html == null) {
return null;
}
String toReturn = html.replaceAll("&", "&amp;").replaceAll("<", "&lt;")
.replaceAll(">", "&gt;");
// replace all the line breaks by <br/>
toReturn = toReturn.replaceAll("(\r\n|\n)"," <br/> ");
//transfrom the URL in a clickable URL
toReturn = transformUrls(toReturn);
// then replace all the double spaces by the html version &nbsp;
toReturn = toReturn.replaceAll("\\s\\s","&nbsp;&nbsp;");
return toReturn;
}
/**
* utility method that extract an url ina text when you paste a link
* @param feedText
* @return the text with the clickable url in it
*/
protected static String extractURL(String feedText) {
// separate input by spaces ( URLs have no spaces )
feedText = feedText.replaceAll("(\r\n|\n)"," <br/> ");
String [] parts = feedText.split("\\s");
// Attempt to convert each item into an URL.
for( String item : parts ) {
String toCheck = getHttpToken(item);
if (toCheck != null) {
try {
new URL(toCheck);
return toCheck;
} catch (MalformedURLException e) {
// If there was an URL then it's not valid
_log.error("MalformedURLException returning... ");
return null;
}
}
}
return null;
}
/**
* check the tokens of a pasted text and see if there's any http link in it
* @param item a text token
* @return the actual http link
*/
private static String getHttpToken(String item) {
if (item.startsWith("http") || item.startsWith("www") || item.startsWith("(www") || item.startsWith("(http")) {
if (item.startsWith("("))
item = item.substring(1, item.length());
if (item.endsWith(".") || item.endsWith(")")) { //sometimes people write the url and close the phrase with a .
item = item.substring(0, item.length()-1);
}
item = item.startsWith("www") ? "http://"+item : item;
System.out.println("getHttpToken returns -> " + item);
return item;
}
return null;
}
/**
* try with HtmlCleaner API to read the images
* @param pageURL
* @return the title of the page or null if can't read it
* @throws IOException
*/
protected static ArrayList<String> getImagesWithCleaner(URL pageURL) throws IOException {
ArrayList<String> images = new ArrayList<String>();
URLConnection conn = pageURL.openConnection();
//pretend you're a browser (make my request from Java more browsery-like.)
conn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
Charset charset = OpenGraph.getConnectionCharset(conn);
BufferedReader dis = new BufferedReader(new InputStreamReader(conn.getInputStream(), charset));
String inputLine;
StringBuffer headContents = new StringBuffer();
// Loop through each line, looking for the closing head element
while ((inputLine = dis.readLine()) != null) {
headContents.append(inputLine + "\r\n");
}
String headContentsStr = headContents.toString();
HtmlCleaner cleaner = new HtmlCleaner();
// parse the string HTML
TagNode pageData = cleaner.clean(headContentsStr);
// open only the title tags
TagNode[] imgs = pageData.getElementsByName("img", true);
int upTo = (imgs.length > 15) ? 15 : imgs.length;
for (int i = 0; i < upTo; i++) {
if (imgs[i].hasAttribute("src")) {
String imageUrl = getImageUrlFromSrcAttribute(pageURL, imgs[i].getAttributeByName("src"));
images.add(imageUrl);
_log.trace("[FOUND image] " + imageUrl);
}
}
return images;
}
/**
* There are several ways to refer an image in a HTML, this method use an heuristic to get the actual image url
* @param pageURL the url
* @param srcAttr the content of the img src attribute
* @return the image url ready to be referred outside native environment
*/
protected static String getImageUrlFromSrcAttribute(URL pageURL, String srcAttr) {
String imageUrl = srcAttr;
_log.trace("imageUrl="+imageUrl);
if (imageUrl.startsWith("http")) {
_log.trace("Direct link case");
return imageUrl;
}
if (imageUrl.startsWith("/")) {//referred as absolute path case
_log.trace("Absolute Path case");
imageUrl = pageURL.getProtocol()+"://"+pageURL.getHost()+imageUrl;
}
else if (imageUrl.startsWith("../")) { //relative path case
_log.trace("Relative Path case");
String imageFolder = pageURL.toString().substring(0, pageURL.toString().lastIndexOf("/"));
imageUrl= imageFolder + "/" + imageUrl;
}
else if (!imageUrl.contains("/") || !imageUrl.startsWith("/")) { //the image is probably in the same folder or in a path starting from the last slash
_log.trace("probably in the same folder");
// e.g. http://www.adomain.com/docrep/018/i3328e/i3328e00.htm?utm_source
String imageFolder = pageURL.toString().substring(0, pageURL.toString().lastIndexOf("/"));
imageUrl= imageFolder + "/" + imageUrl;
}
else if (!imageUrl.startsWith("http") ) { //e.g. http://adomain.com/anImage.png
_log.trace("In the root");
imageUrl = pageURL.toExternalForm().endsWith("/") ? pageURL.toExternalForm() + imageUrl : pageURL.toExternalForm() + "/" + imageUrl;
}
return imageUrl;
}
/**
* to use when OpenGraph is not available, Tries Metadata first, then Best guess from page content
* @param pageUrl
* @param link
* @param host
* @return a LinPreview object instance filled with the extracted information
* @throws IOException
*/
protected static LinkPreview getInfoFromHTML(URLConnection connection, URL pageUrl, String link, String host) throws Exception {
LinkPreview toReturn = null;
String title = "";
String description = "";
URLConnection conn = pageUrl.openConnection();
//pretend you're a browser (make my request from Java more browsery-like.)
conn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
MetaSeeker ms = null;
try {
title = getTitleFromHeader(pageUrl);
_log.trace("Found Title=" + title);
ms = new MetaSeeker(connection, pageUrl);
//try the metadata, otherwise ask the guesser
description = (ms.getContent("description") != null && ! ms.getContent("description").isEmpty()) ? ms.getContent("description") : createDescriptionFromContent(link);
ArrayList<String> images = new ArrayList<String>();
images = getImagesWithCleaner(pageUrl);
toReturn = new LinkPreview(title, description, link, host, images);
} catch(Exception e) {
_log.error("[MANUAL-PARSE] Something wrong with the meta seeker returning ... ");
e.printStackTrace();
return toReturn;
}
return toReturn;
}
/**
* @param pageURL
* @return the title of the page or null if can't read it
* @throws IOException
*/
private static String getTitleFromHeader(URL pageURL) throws IOException {
URLConnection conn = pageURL.openConnection();
//pretend you're a browser (make my request from Java more browsery-like.)
conn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
Charset charset = OpenGraph.getConnectionCharset(conn);
BufferedReader dis = new BufferedReader(new InputStreamReader(conn.getInputStream(), charset));
String inputLine;
StringBuffer headContents = new StringBuffer();
// Loop through each line, looking for the closing head element
while ((inputLine = dis.readLine()) != null)
{
if (inputLine.contains("</head>")) {
inputLine = inputLine.substring(0, inputLine.indexOf("</head>") + 7);
inputLine = inputLine.concat("<body></body></html>");
headContents.append(inputLine + "\r\n");
break;
}
headContents.append(inputLine + "\r\n");
}
String headContentsStr = headContents.toString();
HtmlCleaner cleaner = new HtmlCleaner();
// parse the string HTML
TagNode pageData = cleaner.clean(headContentsStr);
// open only the title tags
TagNode[] title = pageData.getElementsByName("title", true);
if (title != null && title.length > 0 && title[0].getChildren().size() > 0) {
String theTitle = title[0].getChildren().get(0).toString();
_log.trace("theTitle: " + theTitle);
return theTitle;
}
return "No-title";
}
protected static String replaceAmpersand(String toReplace) {
String toReturn = toReplace.replaceAll("&amp;", "&");
return toReturn;
}
/**
* utility method that convert a url ina text in a clickable url by the browser
* and if the user has just pasted a link, converts the link in: shared a link
* @param feedText
* @return the text with the clickable url in it
*/
protected static String transformUrls(String feedText) {
StringBuilder sb = new StringBuilder();
// separate input by spaces ( URLs have no spaces )
String [] parts = feedText.split("\\s");
// Attempt to convert each item into an URL.
for (int i = 0; i < parts.length; i++) {
String toCheck = getHttpToken(parts[i]);
if (toCheck != null) {
try {
URL url = new URL(toCheck);
if (i == 0 && parts.length == 1) //then he shared just a link
return sb.append("<span style=\"color:gray; font-size:12px;\">shared </span><a class=\"link\" href=\"").append(url).append("\" target=\"_blank\">").append("a link.").append("</a> ").toString();
// If possible then replace with anchor...
sb.append("<a class=\"link\" style=\"font-size:14px;\" href=\"").append(url).append("\" target=\"_blank\">").append(url).append("</a> ");
} catch (MalformedURLException e) {
// If there was an URL then it's not valid
_log.error("MalformedURLException returning... ");
return feedText;
}
} else {
sb.append(parts[i]);
sb.append(" ");
}
}
return sb.toString();
}
/**
* this method handles the non trusted https connections
*/
protected static void trustAllHTTPSConnections() {
// Create a trust manager that does not validate certificate chains
TrustManager[] trustAllCerts = new TrustManager[]{
new X509TrustManager() {
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
return null;
}
public void checkClientTrusted(
java.security.cert.X509Certificate[] certs, String authType) {
}
public void checkServerTrusted(
java.security.cert.X509Certificate[] certs, String authType) {
}
}
};
try {
SSLContext sc = SSLContext.getInstance("SSL");
sc.init(null, trustAllCerts, new java.security.SecureRandom());
HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
} catch (Exception e) {
System.out.println("Error" + e);
}
}
}

View File

@ -58,7 +58,7 @@ public class UploadToWorkspaceThread implements Runnable {
_log.info("File to upload="+fileabsolutePathOnServer);
File file = new File(fileabsolutePathOnServer);
String mimeType = ShareUpdateServiceImpl.getMimeType(file, fileName);
String mimeType = FilePreviewer.getMimeType(file, fileName);
InputStream fileData = new FileInputStream(file);
String theId = "";
_log.info("mimeType="+mimeType + " fileData null? " + (fileData == null) );