several improvements in url checkings, transformation of rul with new line in text, refactored code
git-svn-id: https://svn.research-infrastructures.eu/d4science/gcube/trunk/portlets/user/share-updates@93984 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
33c3bf0e23
commit
f8d3bc7f8c
|
@ -1,6 +1,6 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<classpath>
|
<classpath>
|
||||||
<classpathentry kind="src" output="target/share-updates-1.2.3-SNAPSHOT/WEB-INF/classes" path="src/main/java">
|
<classpathentry kind="src" output="target/share-updates-1.3.0-SNAPSHOT/WEB-INF/classes" path="src/main/java">
|
||||||
<attributes>
|
<attributes>
|
||||||
<attribute name="optional" value="true"/>
|
<attribute name="optional" value="true"/>
|
||||||
<attribute name="maven.pomderived" value="true"/>
|
<attribute name="maven.pomderived" value="true"/>
|
||||||
|
@ -31,5 +31,5 @@
|
||||||
<attribute name="maven.pomderived" value="true"/>
|
<attribute name="maven.pomderived" value="true"/>
|
||||||
</attributes>
|
</attributes>
|
||||||
</classpathentry>
|
</classpathentry>
|
||||||
<classpathentry kind="output" path="target/share-updates-1.2.3-SNAPSHOT/WEB-INF/classes"/>
|
<classpathentry kind="output" path="target/share-updates-1.3.0-SNAPSHOT/WEB-INF/classes"/>
|
||||||
</classpath>
|
</classpath>
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
eclipse.preferences.version=1
|
eclipse.preferences.version=1
|
||||||
jarsExcludedFromWebInfLib=
|
jarsExcludedFromWebInfLib=
|
||||||
lastWarOutDir=/Users/massi/Documents/workspace/share-updates/target/share-updates-1.2.3-SNAPSHOT
|
lastWarOutDir=/Users/massi/Documents/workspace/share-updates/target/share-updates-1.3.0-SNAPSHOT
|
||||||
warSrcDir=src/main/webapp
|
warSrcDir=src/main/webapp
|
||||||
warSrcDirIsOutput=false
|
warSrcDirIsOutput=false
|
||||||
|
|
2
pom.xml
2
pom.xml
|
@ -13,7 +13,7 @@
|
||||||
<groupId>org.gcube.portlets.user</groupId>
|
<groupId>org.gcube.portlets.user</groupId>
|
||||||
<artifactId>share-updates</artifactId>
|
<artifactId>share-updates</artifactId>
|
||||||
<packaging>war</packaging>
|
<packaging>war</packaging>
|
||||||
<version>1.2.3-SNAPSHOT</version>
|
<version>1.3.0-SNAPSHOT</version>
|
||||||
|
|
||||||
<name>gCube Share Updates Portlet</name>
|
<name>gCube Share Updates Portlet</name>
|
||||||
<description>
|
<description>
|
||||||
|
|
|
@ -313,7 +313,6 @@ public class ShareUpdateForm extends Composite {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private PrivacyLevel getPrivacyLevel() {
|
private PrivacyLevel getPrivacyLevel() {
|
||||||
String selected = privacyLevel.getValue(privacyLevel.getSelectedIndex());
|
String selected = privacyLevel.getValue(privacyLevel.getSelectedIndex());
|
||||||
if (selected.compareTo(PrivacyLevel.CONNECTION.toString()) == 0)
|
if (selected.compareTo(PrivacyLevel.CONNECTION.toString()) == 0)
|
||||||
|
@ -328,10 +327,6 @@ public class ShareUpdateForm extends Composite {
|
||||||
return PrivacyLevel.SINGLE_VRE;
|
return PrivacyLevel.SINGLE_VRE;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Escape an html string. Escaping data received from the client helps to
|
* Escape an html string. Escaping data received from the client helps to
|
||||||
* prevent cross-site script vulnerabilities.
|
* prevent cross-site script vulnerabilities.
|
||||||
|
@ -355,7 +350,7 @@ public class ShareUpdateForm extends Composite {
|
||||||
String [] parts = textToCheck.split("\\s");
|
String [] parts = textToCheck.split("\\s");
|
||||||
// Attempt to convert each item into an URL.
|
// Attempt to convert each item into an URL.
|
||||||
for( String item : parts ) {
|
for( String item : parts ) {
|
||||||
if (item.startsWith("http")) {
|
if (item.startsWith("http") || item.startsWith("www")) {
|
||||||
preview.add(new LinkLoader());
|
preview.add(new LinkLoader());
|
||||||
submitButton.setEnabled(false);
|
submitButton.setEnabled(false);
|
||||||
//GWT.log("It's http link:" + linkToCheck);
|
//GWT.log("It's http link:" + linkToCheck);
|
||||||
|
|
|
@ -18,13 +18,17 @@ import java.util.Iterator;
|
||||||
|
|
||||||
import javax.imageio.ImageIO;
|
import javax.imageio.ImageIO;
|
||||||
import javax.imageio.ImageReader;
|
import javax.imageio.ImageReader;
|
||||||
import javax.imageio.stream.FileImageInputStream;
|
|
||||||
import javax.imageio.stream.ImageInputStream;
|
import javax.imageio.stream.ImageInputStream;
|
||||||
|
|
||||||
import net.coobird.thumbnailator.Thumbnails;
|
import net.coobird.thumbnailator.Thumbnails;
|
||||||
|
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
import org.apache.pdfbox.util.PDFTextStripper;
|
import org.apache.pdfbox.util.PDFTextStripper;
|
||||||
|
import org.apache.tika.config.TikaConfig;
|
||||||
|
import org.apache.tika.detect.Detector;
|
||||||
|
import org.apache.tika.io.TikaInputStream;
|
||||||
|
import org.apache.tika.metadata.Metadata;
|
||||||
|
import org.apache.tika.mime.MediaType;
|
||||||
import org.gcube.applicationsupportlayer.social.storage.FTPManager;
|
import org.gcube.applicationsupportlayer.social.storage.FTPManager;
|
||||||
import org.gcube.portal.databook.shared.ImageType;
|
import org.gcube.portal.databook.shared.ImageType;
|
||||||
import org.gcube.portlets.user.shareupdates.shared.LinkPreview;
|
import org.gcube.portlets.user.shareupdates.shared.LinkPreview;
|
||||||
|
@ -45,7 +49,6 @@ public class FilePreviewer {
|
||||||
private static Logger _log = LoggerFactory.getLogger(FilePreviewer.class);
|
private static Logger _log = LoggerFactory.getLogger(FilePreviewer.class);
|
||||||
|
|
||||||
private static final String PDF_DEFAULT_IMAGE = "default/pdf.png";
|
private static final String PDF_DEFAULT_IMAGE = "default/pdf.png";
|
||||||
private static final String NOTHUMB_DEFAULT_IMAGE = "default/default_image.png";
|
|
||||||
private static final String GENERICFILE_DEFAULT_IMAGE = "default/default_generic.png";
|
private static final String GENERICFILE_DEFAULT_IMAGE = "default/default_generic.png";
|
||||||
/**
|
/**
|
||||||
* these are the extension for which I have an icon image preview
|
* these are the extension for which I have an icon image preview
|
||||||
|
@ -233,5 +236,22 @@ public class FilePreviewer {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param file
|
||||||
|
* @return
|
||||||
|
* @throws IOException
|
||||||
|
* @throws MagicParseException
|
||||||
|
* @throws MagicMatchNotFoundException
|
||||||
|
* @throws MagicException
|
||||||
|
*/
|
||||||
|
protected static String getMimeType(File file, String filenameWithExtension) throws IOException {
|
||||||
|
TikaConfig config = TikaConfig.getDefaultConfig();
|
||||||
|
Detector detector = config.getDetector();
|
||||||
|
TikaInputStream stream = TikaInputStream.get(file);
|
||||||
|
Metadata metadata = new Metadata();
|
||||||
|
metadata.add(Metadata.RESOURCE_NAME_KEY, filenameWithExtension);
|
||||||
|
MediaType mediaType = detector.detect(stream, metadata);
|
||||||
|
return mediaType.getBaseType().toString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -140,7 +140,7 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
|
||||||
}
|
}
|
||||||
public String getDevelopmentUser() {
|
public String getDevelopmentUser() {
|
||||||
String user = "test.user";
|
String user = "test.user";
|
||||||
//user = "massimiliano.assante";
|
user = "massimiliano.assante";
|
||||||
return user;
|
return user;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -150,12 +150,12 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
|
||||||
public ClientFeed share(String postText, FeedType feedType, PrivacyLevel pLevel,
|
public ClientFeed share(String postText, FeedType feedType, PrivacyLevel pLevel,
|
||||||
String vreId, LinkPreview preview, String urlThumbnail, ArrayList<String> mentionedUserFullNames,String fileName, String filePathOnServer, boolean notifyGroup) {
|
String vreId, LinkPreview preview, String urlThumbnail, ArrayList<String> mentionedUserFullNames,String fileName, String filePathOnServer, boolean notifyGroup) {
|
||||||
|
|
||||||
String escapedFeedText = escapeHtmlAndTransformUrl(postText);
|
String escapedFeedText = TextTransfromUtils.escapeHtmlAndTransformUrl(postText);
|
||||||
|
|
||||||
ArrayList<PickingUser> mentionedUsers = null;
|
ArrayList<PickingUser> mentionedUsers = null;
|
||||||
if (mentionedUserFullNames != null && ! mentionedUserFullNames.isEmpty()) {
|
if (mentionedUserFullNames != null && ! mentionedUserFullNames.isEmpty()) {
|
||||||
mentionedUsers = getSelectedUserIds(mentionedUserFullNames);
|
mentionedUsers = getSelectedUserIds(mentionedUserFullNames);
|
||||||
escapedFeedText = convertMentionPeopleAnchorHTML(escapedFeedText, mentionedUsers);
|
escapedFeedText = TextTransfromUtils.convertMentionPeopleAnchorHTML(escapedFeedText, mentionedUsers);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -185,7 +185,7 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
|
||||||
//this means the user has shared a file without text in it.
|
//this means the user has shared a file without text in it.
|
||||||
String textToPost = "";
|
String textToPost = "";
|
||||||
if (escapedFeedText.compareTo(ShareUpdateForm.NO_TEXT_FILE_SHARE) == 0) {
|
if (escapedFeedText.compareTo(ShareUpdateForm.NO_TEXT_FILE_SHARE) == 0) {
|
||||||
textToPost = convertFileNameAnchorHTML(url);
|
textToPost = TextTransfromUtils.convertFileNameAnchorHTML(url);
|
||||||
} else {
|
} else {
|
||||||
textToPost = escapedFeedText;
|
textToPost = escapedFeedText;
|
||||||
System.out.println("textToPost=" + textToPost);
|
System.out.println("textToPost=" + textToPost);
|
||||||
|
@ -231,7 +231,7 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
|
||||||
|
|
||||||
//everything went fine
|
//everything went fine
|
||||||
ClientFeed cf = new ClientFeed(toShare.getKey(), toShare.getType().toString(), username, feedDate, toShare.getUri(),
|
ClientFeed cf = new ClientFeed(toShare.getKey(), toShare.getType().toString(), username, feedDate, toShare.getUri(),
|
||||||
replaceAmpersand(toShare.getDescription()), fullName, email, thumbnailURL, toShare.getLinkTitle(), toShare.getLinkDescription(),
|
TextTransfromUtils.replaceAmpersand(toShare.getDescription()), fullName, email, thumbnailURL, toShare.getLinkTitle(), toShare.getLinkDescription(),
|
||||||
toShare.getUriThumbnail(), toShare.getLinkHost());
|
toShare.getUriThumbnail(), toShare.getLinkHost());
|
||||||
|
|
||||||
|
|
||||||
|
@ -260,73 +260,6 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
|
||||||
return cf;
|
return cf;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* convert the mentioned people in HTML anchor and also Encode the params Base64
|
|
||||||
* @param escapedFeedText
|
|
||||||
* @param taggedPeople
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
private String convertMentionPeopleAnchorHTML(String escapedFeedText, ArrayList<PickingUser> taggedPeople) {
|
|
||||||
for (PickingUser tagged : taggedPeople) {
|
|
||||||
String taggedHTML = "<a class=\"link\" style=\"font-size:14px;\" href=\""+GCubeSocialNetworking.USER_PROFILE_LINK
|
|
||||||
+"?"+
|
|
||||||
new String(Base64.encodeBase64(GCubeSocialNetworking.USER_PROFILE_OID.getBytes()))+"="+
|
|
||||||
new String(Base64.encodeBase64(tagged.getUsername().getBytes()))+"\">"+tagged.getFullName()+"</a> ";
|
|
||||||
escapedFeedText = escapedFeedText.replace(tagged.getFullName(), taggedHTML);
|
|
||||||
}
|
|
||||||
return escapedFeedText;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void setUserSettingsInSession(UserSettings user) {
|
|
||||||
getASLSession().setAttribute(UserInfo.USER_INFO_ATTR, user);
|
|
||||||
}
|
|
||||||
private String replaceAmpersand(String toReplace) {
|
|
||||||
String toReturn = toReplace.replaceAll("&", "&");
|
|
||||||
return toReturn;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* utility method that convert a url ina text in a clickable url by the browser
|
|
||||||
* and if the user has just pasted a link, converts the link in: shared a link
|
|
||||||
* @param feedText
|
|
||||||
* @return the text with the clickable url in it
|
|
||||||
*/
|
|
||||||
private String transformUrls(String feedText) {
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
// separate input by spaces ( URLs have no spaces )
|
|
||||||
String [] parts = feedText.split("\\s");
|
|
||||||
// Attempt to convert each item into an URL.
|
|
||||||
for (int i = 0; i < parts.length; i++) {
|
|
||||||
if (parts[i].startsWith("http")) {
|
|
||||||
try {
|
|
||||||
URL url = new URL(parts[i]);
|
|
||||||
if (i == 0 && parts.length == 1) //then he shared just a link
|
|
||||||
return sb.append("<span style=\"color:gray; font-size:12px;\">shared </span><a class=\"link\" href=\"").append(url).append("\" target=\"_blank\">").append("a link.").append("</a> ").toString();
|
|
||||||
// If possible then replace with anchor...
|
|
||||||
sb.append("<a class=\"link\" style=\"font-size:14px;\" href=\"").append(url).append("\" target=\"_blank\">").append(url).append("</a> ");
|
|
||||||
} catch (MalformedURLException e) {
|
|
||||||
// If there was an URL then it's not valid
|
|
||||||
_log.error("MalformedURLException returning... ");
|
|
||||||
return feedText;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
sb.append(parts[i]);
|
|
||||||
sb.append(" ");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @param preview
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
private String convertFileNameAnchorHTML(String url) {
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
sb.append("<span style=\"color:gray; font-size:12px;\">shared </span><a class=\"link\" href=\"").append(url).append("\" target=\"_blank\">").append("a file.").append("</a> ").toString();
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public UserSettings getUserSettings() {
|
public UserSettings getUserSettings() {
|
||||||
try {
|
try {
|
||||||
|
@ -348,12 +281,9 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
|
||||||
|
|
||||||
UserInfo userInfo = new UserInfo(username, fullName, thumbnailURL, user.getEmailAddress(), accountURL, true, isAdmin(), vreNames);
|
UserInfo userInfo = new UserInfo(username, fullName, thumbnailURL, user.getEmailAddress(), accountURL, true, isAdmin(), vreNames);
|
||||||
UserSettings toReturn = new UserSettings(userInfo, 0, session.getScopeName(), isInfrastructureScope());
|
UserSettings toReturn = new UserSettings(userInfo, 0, session.getScopeName(), isInfrastructureScope());
|
||||||
setUserSettingsInSession(toReturn);
|
|
||||||
|
|
||||||
return toReturn;
|
return toReturn;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
||||||
_log.info("Returning test USER = " + session.getUsername());
|
_log.info("Returning test USER = " + session.getUsername());
|
||||||
HashMap<String, String> fakeVreNames = new HashMap<String, String>();
|
HashMap<String, String> fakeVreNames = new HashMap<String, String>();
|
||||||
fakeVreNames.put("/gcube/devsec/devVRE","devVRE");
|
fakeVreNames.put("/gcube/devsec/devVRE","devVRE");
|
||||||
|
@ -393,7 +323,7 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
|
||||||
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
String mimeType = getMimeType(new File(fileabsolutePathOnServer), fileName);
|
String mimeType = FilePreviewer.getMimeType(new File(fileabsolutePathOnServer), fileName);
|
||||||
UriResolverReaderParameter resolver = new UriResolverReaderParameter();
|
UriResolverReaderParameter resolver = new UriResolverReaderParameter();
|
||||||
//get the url to show (though it could not be ready for download at this stage)
|
//get the url to show (though it could not be ready for download at this stage)
|
||||||
httpURL = resolver.resolveAsUriRequest(smpURI, fileName, mimeType, true);
|
httpURL = resolver.resolveAsUriRequest(smpURI, fileName, mimeType, true);
|
||||||
|
@ -433,25 +363,94 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
|
||||||
_log.debug("Returning httpURL=" + httpURL);
|
_log.debug("Returning httpURL=" + httpURL);
|
||||||
return toReturn;
|
return toReturn;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* tries the following in the indicated order for Populating the Link preview
|
||||||
|
* Open Graph protocol
|
||||||
|
* Meta "title" and "description" tags
|
||||||
|
* Best guess from page content (not recommended)
|
||||||
*
|
*
|
||||||
* @param file
|
* Schema.org microdata <-- This is still a TODO
|
||||||
* @return
|
|
||||||
* @throws IOException
|
|
||||||
* @throws MagicParseException
|
|
||||||
* @throws MagicMatchNotFoundException
|
|
||||||
* @throws MagicException
|
|
||||||
*/
|
*/
|
||||||
protected static String getMimeType(File file, String filenameWithExtension) throws IOException {
|
@Override
|
||||||
TikaConfig config = TikaConfig.getDefaultConfig();
|
public LinkPreview checkLink(String linkToCheck) {
|
||||||
Detector detector = config.getDetector();
|
LinkPreview toReturn = null;
|
||||||
TikaInputStream stream = TikaInputStream.get(file);
|
_log.info("to check " + linkToCheck);
|
||||||
Metadata metadata = new Metadata();
|
//look for a url in text
|
||||||
metadata.add(Metadata.RESOURCE_NAME_KEY, filenameWithExtension);
|
linkToCheck = TextTransfromUtils.extractURL(linkToCheck);
|
||||||
MediaType mediaType = detector.detect(stream, metadata);
|
if (linkToCheck == null)
|
||||||
return mediaType.getBaseType().toString();
|
return null; //no url
|
||||||
|
|
||||||
|
String[] schemes = {"http","https"};
|
||||||
|
UrlValidator urlValidator = new UrlValidator(schemes);
|
||||||
|
if (! urlValidator.isValid(linkToCheck)) {
|
||||||
|
_log.warn("url is NOT valid, returning nothing");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
_log.debug("url is valid");
|
||||||
|
|
||||||
|
URL pageURL;
|
||||||
|
URLConnection siteConnection = null;
|
||||||
|
try {
|
||||||
|
pageURL = new URL(linkToCheck);
|
||||||
|
if (pageURL.getProtocol().equalsIgnoreCase("https")) {
|
||||||
|
System.setProperty("java.protocol.handler.pkgs", "com.sun.net.ssl.internal.www.protocol");
|
||||||
|
java.security.Security.addProvider(new com.sun.net.ssl.internal.ssl.Provider());
|
||||||
|
TextTransfromUtils.trustAllHTTPSConnections();
|
||||||
|
siteConnection = (HttpsURLConnection) pageURL.openConnection();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
siteConnection = (HttpURLConnection) pageURL.openConnection();
|
||||||
|
} catch (MalformedURLException e) {
|
||||||
|
_log.error("url is not valid");
|
||||||
|
return null;
|
||||||
|
} catch (IOException e) {
|
||||||
|
_log.error("url is not reachable");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
//pretend you're a browser (make my request from Java more “browsery-like”.)
|
||||||
|
siteConnection.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
|
||||||
|
|
||||||
|
String title;
|
||||||
|
String description;
|
||||||
|
ArrayList<String> imageUrls = new ArrayList<String>();
|
||||||
|
//get the host from the url
|
||||||
|
String host = pageURL.getHost().replaceAll("www.", "");
|
||||||
|
|
||||||
|
//try openGraph First
|
||||||
|
OpenGraph ogLink = null;
|
||||||
|
try {
|
||||||
|
ogLink = new OpenGraph(linkToCheck, true, siteConnection);
|
||||||
|
if (ogLink == null || ogLink.getContent("title") == null) {
|
||||||
|
//there is no OpenGraph for this link
|
||||||
|
_log.info("No OpenGraph Found, going Best guess from page content") ;
|
||||||
|
toReturn = TextTransfromUtils.getInfoFromHTML(siteConnection, pageURL, linkToCheck, host);
|
||||||
|
} else {
|
||||||
|
//there is OpenGraph
|
||||||
|
_log.info("OpenGraph Found") ;
|
||||||
|
title = ogLink.getContent("title");
|
||||||
|
description = (ogLink.getContent("description") != null) ? ogLink.getContent("description") : "";
|
||||||
|
description = ((description.length() > 256) ? description.substring(0, 256)+"..." : description);
|
||||||
|
//look for the image ask the guesser if not present
|
||||||
|
if (ogLink.getContent("image") != null) {
|
||||||
|
String imageUrl = TextTransfromUtils.getImageUrlFromSrcAttribute(pageURL, ogLink.getContent("image"));
|
||||||
|
imageUrls.add(imageUrl);
|
||||||
|
_log.trace("OpenGraph getImage = " +imageUrl) ;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
_log.trace("OpenGraph No Image, trying manuale parsing");
|
||||||
|
ArrayList<String> images = TextTransfromUtils.getImagesWithCleaner(pageURL);
|
||||||
|
if (! images.isEmpty())
|
||||||
|
imageUrls = images;
|
||||||
|
}
|
||||||
|
toReturn = new LinkPreview(title, description, linkToCheck, host, imageUrls);
|
||||||
|
return toReturn;
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
return toReturn;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -548,318 +547,10 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/**
|
|
||||||
* Escape an html string. Escaping data received from the client helps to
|
|
||||||
* prevent cross-site script vulnerabilities.
|
|
||||||
*
|
|
||||||
* @param html the html string to escape
|
|
||||||
* @return the escaped string
|
|
||||||
*/
|
|
||||||
private String escapeHtmlAndTransformUrl(String html) {
|
|
||||||
if (html == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
String toReturn = html.replaceAll("&", "&").replaceAll("<", "<")
|
|
||||||
.replaceAll(">", ">");
|
|
||||||
|
|
||||||
// replace all the line breaks by <br/>
|
|
||||||
toReturn = toReturn.replaceAll("(\r\n|\n)"," <br/> ");
|
|
||||||
//transfrom the URL in a clickable URL
|
|
||||||
toReturn = transformUrls(toReturn);
|
|
||||||
// then replace all the double spaces by the html version
|
|
||||||
toReturn = toReturn.replaceAll("\\s\\s"," ");
|
|
||||||
return toReturn;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* utilty method that extract an url ina text
|
|
||||||
* @param feedText
|
|
||||||
* @return the text with the clickable url in it
|
|
||||||
*/
|
|
||||||
public String extractURL(String feedText) {
|
|
||||||
// separate input by spaces ( URLs have no spaces )
|
|
||||||
String [] parts = feedText.split("\\s");
|
|
||||||
// Attempt to convert each item into an URL.
|
|
||||||
for( String item : parts ) {
|
|
||||||
if (item.startsWith("http")) {
|
|
||||||
try {
|
|
||||||
new URL(item);
|
|
||||||
return item;
|
|
||||||
} catch (MalformedURLException e) {
|
|
||||||
// If there was an URL then it's not valid
|
|
||||||
_log.error("MalformedURLException returning... ");
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* tries the following in the indicated order for Populating the Link preview
|
|
||||||
* Open Graph protocol
|
|
||||||
* Meta "title" and "description" tags
|
|
||||||
* Best guess from page content (not recommended)
|
|
||||||
*
|
|
||||||
* Schema.org microdata <-- This is still a TODO
|
|
||||||
*/
|
|
||||||
public LinkPreview checkLink(String linkToCheck) {
|
|
||||||
LinkPreview toReturn = null;
|
|
||||||
_log.info("to check " + linkToCheck);
|
|
||||||
//look for a url in text
|
|
||||||
linkToCheck = extractURL(linkToCheck);
|
|
||||||
if (linkToCheck == null)
|
|
||||||
return null; //no url
|
|
||||||
|
|
||||||
String[] schemes = {"http","https"};
|
|
||||||
UrlValidator urlValidator = new UrlValidator(schemes);
|
|
||||||
if (! urlValidator.isValid(linkToCheck)) {
|
|
||||||
_log.warn("url is NOT valid, returning nothing");
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
_log.debug("url is valid");
|
|
||||||
|
|
||||||
URL pageURL;
|
|
||||||
URLConnection siteConnection = null;
|
|
||||||
try {
|
|
||||||
pageURL = new URL(linkToCheck);
|
|
||||||
if (pageURL.getProtocol().equalsIgnoreCase("https")) {
|
|
||||||
System.setProperty("java.protocol.handler.pkgs", "com.sun.net.ssl.internal.www.protocol");
|
|
||||||
java.security.Security.addProvider(new com.sun.net.ssl.internal.ssl.Provider());
|
|
||||||
trustAllHTTPSConnections();
|
|
||||||
siteConnection = (HttpsURLConnection) pageURL.openConnection();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
siteConnection = (HttpURLConnection) pageURL.openConnection();
|
|
||||||
} catch (MalformedURLException e) {
|
|
||||||
_log.error("url is not valid");
|
|
||||||
return null;
|
|
||||||
} catch (IOException e) {
|
|
||||||
_log.error("url is not reachable");
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
//pretend you're a browser (make my request from Java more “browsery-like”.)
|
|
||||||
siteConnection.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
|
|
||||||
|
|
||||||
String title;
|
|
||||||
String description;
|
|
||||||
ArrayList<String> imageUrls = new ArrayList<String>();
|
|
||||||
//get the host from the url
|
|
||||||
String host = pageURL.getHost().replaceAll("www.", "");
|
|
||||||
|
|
||||||
//try openGraph First
|
|
||||||
OpenGraph ogLink = null;
|
|
||||||
try {
|
|
||||||
ogLink = new OpenGraph(linkToCheck, true, siteConnection);
|
|
||||||
if (ogLink == null || ogLink.getContent("title") == null) {
|
|
||||||
//there is no OpenGraph for this link
|
|
||||||
_log.info("No OpenGraph Found, going Best guess from page content") ;
|
|
||||||
toReturn = getInfoFromHTML(siteConnection, pageURL, linkToCheck, host);
|
|
||||||
} else {
|
|
||||||
//there is OpenGraph
|
|
||||||
_log.info("OpenGraph Found") ;
|
|
||||||
title = ogLink.getContent("title");
|
|
||||||
description = (ogLink.getContent("description") != null) ? ogLink.getContent("description") : "";
|
|
||||||
description = ((description.length() > 256) ? description.substring(0, 256)+"..." : description);
|
|
||||||
//look for the image ask the guesser if not present
|
|
||||||
if (ogLink.getContent("image") != null) {
|
|
||||||
String imageUrl = getImageUrlFromSrcAttribute(pageURL, ogLink.getContent("image"));
|
|
||||||
imageUrls.add(imageUrl);
|
|
||||||
_log.trace("OpenGraph getImage = " +imageUrl) ;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
_log.trace("OpenGraph No Image, trying manuale parsing");
|
|
||||||
ArrayList<String> images = getImagesWithCleaner(pageURL);
|
|
||||||
if (! images.isEmpty())
|
|
||||||
imageUrls = images;
|
|
||||||
}
|
|
||||||
toReturn = new LinkPreview(title, description, linkToCheck, host, imageUrls);
|
|
||||||
return toReturn;
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
} catch (Exception e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
return toReturn;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* to use when OpenGraph is not available, Tries Metadata first, then Best guess from page content
|
|
||||||
* @param pageUrl
|
|
||||||
* @param link
|
|
||||||
* @param host
|
|
||||||
* @return a LinPreview object instance filled with the extracted information
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
private LinkPreview getInfoFromHTML(URLConnection connection, URL pageUrl, String link, String host) throws Exception {
|
|
||||||
LinkPreview toReturn = null;
|
|
||||||
String title = "";
|
|
||||||
String description = "";
|
|
||||||
|
|
||||||
URLConnection conn = pageUrl.openConnection();
|
|
||||||
//pretend you're a browser (make my request from Java more “browsery-like”.)
|
|
||||||
conn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
|
|
||||||
|
|
||||||
MetaSeeker ms = null;
|
|
||||||
try {
|
|
||||||
title = getTitleFromHeader(pageUrl);
|
|
||||||
ms = new MetaSeeker(connection, pageUrl);
|
|
||||||
|
|
||||||
//try the metadata, otherwise ask the guesser
|
|
||||||
description = (ms.getContent("description") != null && ! ms.getContent("description").isEmpty()) ? ms.getContent("description") : createDescriptionFromContent(link);
|
|
||||||
|
|
||||||
ArrayList<String> images = new ArrayList<String>();
|
|
||||||
images = getImagesWithCleaner(pageUrl);
|
|
||||||
toReturn = new LinkPreview(title, description, link, host, images);
|
|
||||||
|
|
||||||
} catch(Exception e) {
|
|
||||||
_log.error("[MANUAL-PARSE] Something wrong with the meta seeker returning ... ");
|
|
||||||
return toReturn;
|
|
||||||
}
|
|
||||||
return toReturn;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* @param pageURL
|
|
||||||
* @return the title of the page or null if can't read it
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
private String getTitleFromHeader(URL pageURL) throws IOException {
|
|
||||||
URLConnection conn = pageURL.openConnection();
|
|
||||||
//pretend you're a browser (make my request from Java more “browsery-like”.)
|
|
||||||
conn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
|
|
||||||
|
|
||||||
Charset charset = OpenGraph.getConnectionCharset(conn);
|
|
||||||
BufferedReader dis = new BufferedReader(new InputStreamReader(conn.getInputStream(), charset));
|
|
||||||
String inputLine;
|
|
||||||
StringBuffer headContents = new StringBuffer();
|
|
||||||
|
|
||||||
// Loop through each line, looking for the closing head element
|
|
||||||
while ((inputLine = dis.readLine()) != null)
|
|
||||||
{
|
|
||||||
if (inputLine.contains("</head>")) {
|
|
||||||
inputLine = inputLine.substring(0, inputLine.indexOf("</head>") + 7);
|
|
||||||
inputLine = inputLine.concat("<body></body></html>");
|
|
||||||
headContents.append(inputLine + "\r\n");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
headContents.append(inputLine + "\r\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
String headContentsStr = headContents.toString();
|
|
||||||
HtmlCleaner cleaner = new HtmlCleaner();
|
|
||||||
// parse the string HTML
|
|
||||||
TagNode pageData = cleaner.clean(headContentsStr);
|
|
||||||
// open only the title tags
|
|
||||||
TagNode[] title = pageData.getElementsByName("title", true);
|
|
||||||
if (title != null && title.length > 0) {
|
|
||||||
String theTitle = title[0].getChildren().get(0).toString();
|
|
||||||
_log.trace("theTitle: " + theTitle);
|
|
||||||
return theTitle;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* try with HtmlCleaner API to read the images
|
|
||||||
* @param pageURL
|
|
||||||
* @return the title of the page or null if can't read it
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
private ArrayList<String> getImagesWithCleaner(URL pageURL) throws IOException {
|
|
||||||
ArrayList<String> images = new ArrayList<String>();
|
|
||||||
URLConnection conn = pageURL.openConnection();
|
|
||||||
//pretend you're a browser (make my request from Java more “browsery-like”.)
|
|
||||||
conn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
|
|
||||||
|
|
||||||
Charset charset = OpenGraph.getConnectionCharset(conn);
|
|
||||||
BufferedReader dis = new BufferedReader(new InputStreamReader(conn.getInputStream(), charset));
|
|
||||||
String inputLine;
|
|
||||||
StringBuffer headContents = new StringBuffer();
|
|
||||||
|
|
||||||
// Loop through each line, looking for the closing head element
|
|
||||||
while ((inputLine = dis.readLine()) != null) {
|
|
||||||
headContents.append(inputLine + "\r\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
String headContentsStr = headContents.toString();
|
|
||||||
HtmlCleaner cleaner = new HtmlCleaner();
|
|
||||||
// parse the string HTML
|
|
||||||
TagNode pageData = cleaner.clean(headContentsStr);
|
|
||||||
// open only the title tags
|
|
||||||
TagNode[] imgs = pageData.getElementsByName("img", true);
|
|
||||||
int upTo = (imgs.length > 15) ? 15 : imgs.length;
|
|
||||||
for (int i = 0; i < upTo; i++) {
|
|
||||||
if (imgs[i].hasAttribute("src")) {
|
|
||||||
String imageUrl = getImageUrlFromSrcAttribute(pageURL, imgs[i].getAttributeByName("src"));
|
|
||||||
images.add(imageUrl);
|
|
||||||
_log.trace("[FOUND image] " + imageUrl);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return images;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* There are several ways to refer an image in a HTML, this method use an heuristic to get the actual image url
|
|
||||||
* @param pageURL the url
|
|
||||||
* @param srcAttr the content of the img src attribute
|
|
||||||
* @return the image url ready to be referred outside native environment
|
|
||||||
*/
|
|
||||||
private String getImageUrlFromSrcAttribute(URL pageURL, String srcAttr) {
|
|
||||||
String imageUrl = srcAttr;
|
|
||||||
if (imageUrl.startsWith("/")) //referred as absolute path case
|
|
||||||
imageUrl = pageURL.getProtocol()+"://"+pageURL.getHost()+imageUrl;
|
|
||||||
else if (imageUrl.startsWith("../")) { //relative path case
|
|
||||||
imageUrl = pageURL.toExternalForm().endsWith("/") ? pageURL.toExternalForm() + imageUrl : pageURL.toExternalForm() + "/" + imageUrl;
|
|
||||||
}
|
|
||||||
else if (!imageUrl.contains("/")) { //the image is probably in the same folder
|
|
||||||
// e.g. http://www.adomain.com/docrep/018/i3328e/i3328e00.htm?utm_source
|
|
||||||
String imageFolder = pageURL.toString().substring(0, pageURL.toString().lastIndexOf("/"));
|
|
||||||
imageUrl= imageFolder + "/" + imageUrl;
|
|
||||||
}
|
|
||||||
else if (!imageUrl.startsWith("http") ) { //e.g. http://adomain.com/anImage.png
|
|
||||||
imageUrl = pageURL.toExternalForm().endsWith("/") ? pageURL.toExternalForm() + imageUrl : pageURL.toExternalForm() + "/" + imageUrl;
|
|
||||||
}
|
|
||||||
return imageUrl;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* generate the description parsing the content (Best Guess)
|
|
||||||
* @param link the link to check
|
|
||||||
* @return the description guessed
|
|
||||||
*/
|
|
||||||
private String createDescriptionFromContent(String link) {
|
|
||||||
StringBean sb = new StringBean();
|
|
||||||
sb.setURL(link);
|
|
||||||
sb.setLinks(false);
|
|
||||||
String description = sb.getStrings();
|
|
||||||
description = ((description.length() > 256) ? description.substring(0, 256)+"..." : description);
|
|
||||||
return description;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* this method handles the non trusted https connections
|
|
||||||
*/
|
|
||||||
private void trustAllHTTPSConnections() {
|
|
||||||
// Create a trust manager that does not validate certificate chains
|
|
||||||
TrustManager[] trustAllCerts = new TrustManager[]{
|
|
||||||
new X509TrustManager() {
|
|
||||||
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void checkClientTrusted(
|
|
||||||
java.security.cert.X509Certificate[] certs, String authType) {
|
|
||||||
}
|
|
||||||
|
|
||||||
public void checkServerTrusted(
|
|
||||||
java.security.cert.X509Certificate[] certs, String authType) {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
try {
|
|
||||||
SSLContext sc = SSLContext.getInstance("SSL");
|
|
||||||
sc.init(null, trustAllCerts, new java.security.SecureRandom());
|
|
||||||
HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
|
|
||||||
} catch (Exception e) {
|
|
||||||
System.out.println("Error" + e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Indicates whether the scope is the whole infrastructure.
|
* Indicates whether the scope is the whole infrastructure.
|
||||||
|
|
|
@ -0,0 +1,362 @@
|
||||||
|
package org.gcube.portlets.user.shareupdates.server;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.net.URLConnection;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
import javax.net.ssl.SSLContext;
|
||||||
|
import javax.net.ssl.TrustManager;
|
||||||
|
import javax.net.ssl.X509TrustManager;
|
||||||
|
|
||||||
|
import org.apache.commons.codec.binary.Base64;
|
||||||
|
import org.gcube.portal.databook.client.GCubeSocialNetworking;
|
||||||
|
import org.gcube.portlets.user.shareupdates.server.metaseeker.MetaSeeker;
|
||||||
|
import org.gcube.portlets.user.shareupdates.server.opengraph.OpenGraph;
|
||||||
|
import org.gcube.portlets.user.shareupdates.shared.LinkPreview;
|
||||||
|
import org.gcube.portlets.widgets.pickuser.shared.PickingUser;
|
||||||
|
import org.htmlcleaner.HtmlCleaner;
|
||||||
|
import org.htmlcleaner.TagNode;
|
||||||
|
import org.htmlparser.beans.StringBean;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.sun.net.ssl.HttpsURLConnection;
|
||||||
|
/**
|
||||||
|
* this class contains utility method for parsing and trasforming users pasted text containing URLs and other utility methods
|
||||||
|
* @author Massimiliano Assante, ISTI-CNR
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class TextTransfromUtils {
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
private static Logger _log = LoggerFactory.getLogger(ShareUpdateServiceImpl.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param preview
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
protected static String convertFileNameAnchorHTML(String url) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append("<span style=\"color:gray; font-size:12px;\">shared </span><a class=\"link\" href=\"").append(url).append("\" target=\"_blank\">").append("a file.").append("</a> ").toString();
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* convert the mentioned people in HTML anchor and also Encode the params Base64
|
||||||
|
* @param escapedFeedText
|
||||||
|
* @param taggedPeople
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
protected static String convertMentionPeopleAnchorHTML(String escapedFeedText, ArrayList<PickingUser> taggedPeople) {
|
||||||
|
for (PickingUser tagged : taggedPeople) {
|
||||||
|
String taggedHTML = "<a class=\"link\" style=\"font-size:14px;\" href=\""+GCubeSocialNetworking.USER_PROFILE_LINK
|
||||||
|
+"?"+
|
||||||
|
new String(Base64.encodeBase64(GCubeSocialNetworking.USER_PROFILE_OID.getBytes()))+"="+
|
||||||
|
new String(Base64.encodeBase64(tagged.getUsername().getBytes()))+"\">"+tagged.getFullName()+"</a> ";
|
||||||
|
escapedFeedText = escapedFeedText.replace(tagged.getFullName(), taggedHTML);
|
||||||
|
}
|
||||||
|
return escapedFeedText;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* generate the description parsing the content (Best Guess)
|
||||||
|
* @param link the link to check
|
||||||
|
* @return the description guessed
|
||||||
|
*/
|
||||||
|
private static String createDescriptionFromContent(String link) {
|
||||||
|
StringBean sb = new StringBean();
|
||||||
|
sb.setURL(link);
|
||||||
|
sb.setLinks(false);
|
||||||
|
String description = sb.getStrings();
|
||||||
|
description = ((description.length() > 256) ? description.substring(0, 256)+"..." : description);
|
||||||
|
return description;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Escape an html string. Escaping data received from the client helps to
|
||||||
|
* prevent cross-site script vulnerabilities.
|
||||||
|
*
|
||||||
|
* @param html the html string to escape
|
||||||
|
* @return the escaped string
|
||||||
|
*/
|
||||||
|
protected static String escapeHtmlAndTransformUrl(String html) {
|
||||||
|
if (html == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
String toReturn = html.replaceAll("&", "&").replaceAll("<", "<")
|
||||||
|
.replaceAll(">", ">");
|
||||||
|
|
||||||
|
// replace all the line breaks by <br/>
|
||||||
|
toReturn = toReturn.replaceAll("(\r\n|\n)"," <br/> ");
|
||||||
|
//transfrom the URL in a clickable URL
|
||||||
|
toReturn = transformUrls(toReturn);
|
||||||
|
// then replace all the double spaces by the html version
|
||||||
|
toReturn = toReturn.replaceAll("\\s\\s"," ");
|
||||||
|
return toReturn;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* utility method that extract an url ina text when you paste a link
|
||||||
|
* @param feedText
|
||||||
|
* @return the text with the clickable url in it
|
||||||
|
*/
|
||||||
|
protected static String extractURL(String feedText) {
|
||||||
|
// separate input by spaces ( URLs have no spaces )
|
||||||
|
feedText = feedText.replaceAll("(\r\n|\n)"," <br/> ");
|
||||||
|
String [] parts = feedText.split("\\s");
|
||||||
|
// Attempt to convert each item into an URL.
|
||||||
|
for( String item : parts ) {
|
||||||
|
String toCheck = getHttpToken(item);
|
||||||
|
if (toCheck != null) {
|
||||||
|
try {
|
||||||
|
new URL(toCheck);
|
||||||
|
return toCheck;
|
||||||
|
} catch (MalformedURLException e) {
|
||||||
|
// If there was an URL then it's not valid
|
||||||
|
_log.error("MalformedURLException returning... ");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* check the tokens of a pasted text and see if there's any http link in it
|
||||||
|
* @param item a text token
|
||||||
|
* @return the actual http link
|
||||||
|
*/
|
||||||
|
private static String getHttpToken(String item) {
|
||||||
|
if (item.startsWith("http") || item.startsWith("www") || item.startsWith("(www") || item.startsWith("(http")) {
|
||||||
|
if (item.startsWith("("))
|
||||||
|
item = item.substring(1, item.length());
|
||||||
|
if (item.endsWith(".") || item.endsWith(")")) { //sometimes people write the url and close the phrase with a .
|
||||||
|
item = item.substring(0, item.length()-1);
|
||||||
|
}
|
||||||
|
item = item.startsWith("www") ? "http://"+item : item;
|
||||||
|
System.out.println("getHttpToken returns -> " + item);
|
||||||
|
return item;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* try with HtmlCleaner API to read the images
|
||||||
|
* @param pageURL
|
||||||
|
* @return the title of the page or null if can't read it
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
protected static ArrayList<String> getImagesWithCleaner(URL pageURL) throws IOException {
|
||||||
|
ArrayList<String> images = new ArrayList<String>();
|
||||||
|
URLConnection conn = pageURL.openConnection();
|
||||||
|
//pretend you're a browser (make my request from Java more “browsery-like”.)
|
||||||
|
conn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
|
||||||
|
|
||||||
|
Charset charset = OpenGraph.getConnectionCharset(conn);
|
||||||
|
BufferedReader dis = new BufferedReader(new InputStreamReader(conn.getInputStream(), charset));
|
||||||
|
String inputLine;
|
||||||
|
StringBuffer headContents = new StringBuffer();
|
||||||
|
|
||||||
|
// Loop through each line, looking for the closing head element
|
||||||
|
while ((inputLine = dis.readLine()) != null) {
|
||||||
|
headContents.append(inputLine + "\r\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
String headContentsStr = headContents.toString();
|
||||||
|
HtmlCleaner cleaner = new HtmlCleaner();
|
||||||
|
// parse the string HTML
|
||||||
|
TagNode pageData = cleaner.clean(headContentsStr);
|
||||||
|
// open only the title tags
|
||||||
|
TagNode[] imgs = pageData.getElementsByName("img", true);
|
||||||
|
int upTo = (imgs.length > 15) ? 15 : imgs.length;
|
||||||
|
for (int i = 0; i < upTo; i++) {
|
||||||
|
if (imgs[i].hasAttribute("src")) {
|
||||||
|
String imageUrl = getImageUrlFromSrcAttribute(pageURL, imgs[i].getAttributeByName("src"));
|
||||||
|
images.add(imageUrl);
|
||||||
|
_log.trace("[FOUND image] " + imageUrl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return images;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* There are several ways to refer an image in a HTML, this method use an heuristic to get the actual image url
|
||||||
|
* @param pageURL the url
|
||||||
|
* @param srcAttr the content of the img src attribute
|
||||||
|
* @return the image url ready to be referred outside native environment
|
||||||
|
*/
|
||||||
|
protected static String getImageUrlFromSrcAttribute(URL pageURL, String srcAttr) {
|
||||||
|
String imageUrl = srcAttr;
|
||||||
|
_log.trace("imageUrl="+imageUrl);
|
||||||
|
if (imageUrl.startsWith("http")) {
|
||||||
|
_log.trace("Direct link case");
|
||||||
|
return imageUrl;
|
||||||
|
}
|
||||||
|
if (imageUrl.startsWith("/")) {//referred as absolute path case
|
||||||
|
_log.trace("Absolute Path case");
|
||||||
|
imageUrl = pageURL.getProtocol()+"://"+pageURL.getHost()+imageUrl;
|
||||||
|
}
|
||||||
|
else if (imageUrl.startsWith("../")) { //relative path case
|
||||||
|
_log.trace("Relative Path case");
|
||||||
|
String imageFolder = pageURL.toString().substring(0, pageURL.toString().lastIndexOf("/"));
|
||||||
|
imageUrl= imageFolder + "/" + imageUrl;
|
||||||
|
}
|
||||||
|
else if (!imageUrl.contains("/") || !imageUrl.startsWith("/")) { //the image is probably in the same folder or in a path starting from the last slash
|
||||||
|
_log.trace("probably in the same folder");
|
||||||
|
// e.g. http://www.adomain.com/docrep/018/i3328e/i3328e00.htm?utm_source
|
||||||
|
String imageFolder = pageURL.toString().substring(0, pageURL.toString().lastIndexOf("/"));
|
||||||
|
imageUrl= imageFolder + "/" + imageUrl;
|
||||||
|
}
|
||||||
|
else if (!imageUrl.startsWith("http") ) { //e.g. http://adomain.com/anImage.png
|
||||||
|
_log.trace("In the root");
|
||||||
|
imageUrl = pageURL.toExternalForm().endsWith("/") ? pageURL.toExternalForm() + imageUrl : pageURL.toExternalForm() + "/" + imageUrl;
|
||||||
|
}
|
||||||
|
return imageUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* to use when OpenGraph is not available, Tries Metadata first, then Best guess from page content
|
||||||
|
* @param pageUrl
|
||||||
|
* @param link
|
||||||
|
* @param host
|
||||||
|
* @return a LinPreview object instance filled with the extracted information
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
protected static LinkPreview getInfoFromHTML(URLConnection connection, URL pageUrl, String link, String host) throws Exception {
|
||||||
|
LinkPreview toReturn = null;
|
||||||
|
String title = "";
|
||||||
|
String description = "";
|
||||||
|
|
||||||
|
URLConnection conn = pageUrl.openConnection();
|
||||||
|
//pretend you're a browser (make my request from Java more “browsery-like”.)
|
||||||
|
conn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
|
||||||
|
|
||||||
|
MetaSeeker ms = null;
|
||||||
|
try {
|
||||||
|
title = getTitleFromHeader(pageUrl);
|
||||||
|
_log.trace("Found Title=" + title);
|
||||||
|
ms = new MetaSeeker(connection, pageUrl);
|
||||||
|
|
||||||
|
//try the metadata, otherwise ask the guesser
|
||||||
|
description = (ms.getContent("description") != null && ! ms.getContent("description").isEmpty()) ? ms.getContent("description") : createDescriptionFromContent(link);
|
||||||
|
|
||||||
|
ArrayList<String> images = new ArrayList<String>();
|
||||||
|
images = getImagesWithCleaner(pageUrl);
|
||||||
|
toReturn = new LinkPreview(title, description, link, host, images);
|
||||||
|
|
||||||
|
} catch(Exception e) {
|
||||||
|
_log.error("[MANUAL-PARSE] Something wrong with the meta seeker returning ... ");
|
||||||
|
e.printStackTrace();
|
||||||
|
return toReturn;
|
||||||
|
}
|
||||||
|
return toReturn;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param pageURL
|
||||||
|
* @return the title of the page or null if can't read it
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
private static String getTitleFromHeader(URL pageURL) throws IOException {
|
||||||
|
URLConnection conn = pageURL.openConnection();
|
||||||
|
//pretend you're a browser (make my request from Java more “browsery-like”.)
|
||||||
|
conn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
|
||||||
|
|
||||||
|
Charset charset = OpenGraph.getConnectionCharset(conn);
|
||||||
|
BufferedReader dis = new BufferedReader(new InputStreamReader(conn.getInputStream(), charset));
|
||||||
|
String inputLine;
|
||||||
|
StringBuffer headContents = new StringBuffer();
|
||||||
|
|
||||||
|
// Loop through each line, looking for the closing head element
|
||||||
|
while ((inputLine = dis.readLine()) != null)
|
||||||
|
{
|
||||||
|
if (inputLine.contains("</head>")) {
|
||||||
|
inputLine = inputLine.substring(0, inputLine.indexOf("</head>") + 7);
|
||||||
|
inputLine = inputLine.concat("<body></body></html>");
|
||||||
|
headContents.append(inputLine + "\r\n");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
headContents.append(inputLine + "\r\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
String headContentsStr = headContents.toString();
|
||||||
|
HtmlCleaner cleaner = new HtmlCleaner();
|
||||||
|
// parse the string HTML
|
||||||
|
TagNode pageData = cleaner.clean(headContentsStr);
|
||||||
|
// open only the title tags
|
||||||
|
TagNode[] title = pageData.getElementsByName("title", true);
|
||||||
|
if (title != null && title.length > 0 && title[0].getChildren().size() > 0) {
|
||||||
|
String theTitle = title[0].getChildren().get(0).toString();
|
||||||
|
_log.trace("theTitle: " + theTitle);
|
||||||
|
return theTitle;
|
||||||
|
}
|
||||||
|
return "No-title";
|
||||||
|
}
|
||||||
|
protected static String replaceAmpersand(String toReplace) {
|
||||||
|
String toReturn = toReplace.replaceAll("&", "&");
|
||||||
|
return toReturn;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* utility method that convert a url ina text in a clickable url by the browser
|
||||||
|
* and if the user has just pasted a link, converts the link in: shared a link
|
||||||
|
* @param feedText
|
||||||
|
* @return the text with the clickable url in it
|
||||||
|
*/
|
||||||
|
protected static String transformUrls(String feedText) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
// separate input by spaces ( URLs have no spaces )
|
||||||
|
String [] parts = feedText.split("\\s");
|
||||||
|
// Attempt to convert each item into an URL.
|
||||||
|
for (int i = 0; i < parts.length; i++) {
|
||||||
|
String toCheck = getHttpToken(parts[i]);
|
||||||
|
if (toCheck != null) {
|
||||||
|
try {
|
||||||
|
URL url = new URL(toCheck);
|
||||||
|
if (i == 0 && parts.length == 1) //then he shared just a link
|
||||||
|
return sb.append("<span style=\"color:gray; font-size:12px;\">shared </span><a class=\"link\" href=\"").append(url).append("\" target=\"_blank\">").append("a link.").append("</a> ").toString();
|
||||||
|
// If possible then replace with anchor...
|
||||||
|
sb.append("<a class=\"link\" style=\"font-size:14px;\" href=\"").append(url).append("\" target=\"_blank\">").append(url).append("</a> ");
|
||||||
|
} catch (MalformedURLException e) {
|
||||||
|
// If there was an URL then it's not valid
|
||||||
|
_log.error("MalformedURLException returning... ");
|
||||||
|
return feedText;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
sb.append(parts[i]);
|
||||||
|
sb.append(" ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* this method handles the non trusted https connections
|
||||||
|
*/
|
||||||
|
protected static void trustAllHTTPSConnections() {
|
||||||
|
// Create a trust manager that does not validate certificate chains
|
||||||
|
TrustManager[] trustAllCerts = new TrustManager[]{
|
||||||
|
new X509TrustManager() {
|
||||||
|
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void checkClientTrusted(
|
||||||
|
java.security.cert.X509Certificate[] certs, String authType) {
|
||||||
|
}
|
||||||
|
|
||||||
|
public void checkServerTrusted(
|
||||||
|
java.security.cert.X509Certificate[] certs, String authType) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
try {
|
||||||
|
SSLContext sc = SSLContext.getInstance("SSL");
|
||||||
|
sc.init(null, trustAllCerts, new java.security.SecureRandom());
|
||||||
|
HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.out.println("Error" + e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -58,7 +58,7 @@ public class UploadToWorkspaceThread implements Runnable {
|
||||||
|
|
||||||
_log.info("File to upload="+fileabsolutePathOnServer);
|
_log.info("File to upload="+fileabsolutePathOnServer);
|
||||||
File file = new File(fileabsolutePathOnServer);
|
File file = new File(fileabsolutePathOnServer);
|
||||||
String mimeType = ShareUpdateServiceImpl.getMimeType(file, fileName);
|
String mimeType = FilePreviewer.getMimeType(file, fileName);
|
||||||
InputStream fileData = new FileInputStream(file);
|
InputStream fileData = new FileInputStream(file);
|
||||||
String theId = "";
|
String theId = "";
|
||||||
_log.info("mimeType="+mimeType + " fileData null? " + (fileData == null) );
|
_log.info("mimeType="+mimeType + " fileData null? " + (fileData == null) );
|
||||||
|
|
Loading…
Reference in New Issue