diff --git a/.classpath b/.classpath index 8339954..d0e57fc 100644 --- a/.classpath +++ b/.classpath @@ -1,6 +1,6 @@ - + @@ -31,5 +31,5 @@ - + diff --git a/.settings/com.google.gdt.eclipse.core.prefs b/.settings/com.google.gdt.eclipse.core.prefs index 91ccfb3..f364b3d 100644 --- a/.settings/com.google.gdt.eclipse.core.prefs +++ b/.settings/com.google.gdt.eclipse.core.prefs @@ -1,5 +1,5 @@ eclipse.preferences.version=1 jarsExcludedFromWebInfLib= -lastWarOutDir=/Users/massi/Documents/workspace/share-updates/target/share-updates-1.2.3-SNAPSHOT +lastWarOutDir=/Users/massi/Documents/workspace/share-updates/target/share-updates-1.3.0-SNAPSHOT warSrcDir=src/main/webapp warSrcDirIsOutput=false diff --git a/pom.xml b/pom.xml index efbe006..bcab07c 100644 --- a/pom.xml +++ b/pom.xml @@ -13,7 +13,7 @@ org.gcube.portlets.user share-updates war - 1.2.3-SNAPSHOT + 1.3.0-SNAPSHOT gCube Share Updates Portlet diff --git a/src/main/java/org/gcube/portlets/user/shareupdates/client/view/ShareUpdateForm.java b/src/main/java/org/gcube/portlets/user/shareupdates/client/view/ShareUpdateForm.java index 9b1bf4f..c66eb15 100644 --- a/src/main/java/org/gcube/portlets/user/shareupdates/client/view/ShareUpdateForm.java +++ b/src/main/java/org/gcube/portlets/user/shareupdates/client/view/ShareUpdateForm.java @@ -313,7 +313,6 @@ public class ShareUpdateForm extends Composite { } }); } - private PrivacyLevel getPrivacyLevel() { String selected = privacyLevel.getValue(privacyLevel.getSelectedIndex()); if (selected.compareTo(PrivacyLevel.CONNECTION.toString()) == 0) @@ -328,10 +327,6 @@ public class ShareUpdateForm extends Composite { return PrivacyLevel.SINGLE_VRE; } - - - - /** * Escape an html string. Escaping data received from the client helps to * prevent cross-site script vulnerabilities. @@ -355,7 +350,7 @@ public class ShareUpdateForm extends Composite { String [] parts = textToCheck.split("\\s"); // Attempt to convert each item into an URL. for( String item : parts ) { - if (item.startsWith("http")) { + if (item.startsWith("http") || item.startsWith("www")) { preview.add(new LinkLoader()); submitButton.setEnabled(false); //GWT.log("It's http link:" + linkToCheck); diff --git a/src/main/java/org/gcube/portlets/user/shareupdates/server/FilePreviewer.java b/src/main/java/org/gcube/portlets/user/shareupdates/server/FilePreviewer.java index ccf2b59..8c26600 100644 --- a/src/main/java/org/gcube/portlets/user/shareupdates/server/FilePreviewer.java +++ b/src/main/java/org/gcube/portlets/user/shareupdates/server/FilePreviewer.java @@ -18,13 +18,17 @@ import java.util.Iterator; import javax.imageio.ImageIO; import javax.imageio.ImageReader; -import javax.imageio.stream.FileImageInputStream; import javax.imageio.stream.ImageInputStream; import net.coobird.thumbnailator.Thumbnails; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.util.PDFTextStripper; +import org.apache.tika.config.TikaConfig; +import org.apache.tika.detect.Detector; +import org.apache.tika.io.TikaInputStream; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; import org.gcube.applicationsupportlayer.social.storage.FTPManager; import org.gcube.portal.databook.shared.ImageType; import org.gcube.portlets.user.shareupdates.shared.LinkPreview; @@ -45,7 +49,6 @@ public class FilePreviewer { private static Logger _log = LoggerFactory.getLogger(FilePreviewer.class); private static final String PDF_DEFAULT_IMAGE = "default/pdf.png"; - private static final String NOTHUMB_DEFAULT_IMAGE = "default/default_image.png"; private static final String GENERICFILE_DEFAULT_IMAGE = "default/default_generic.png"; /** * these are the extension for which I have an icon image preview @@ -233,5 +236,22 @@ public class FilePreviewer { return null; } - + /** + * + * @param file + * @return + * @throws IOException + * @throws MagicParseException + * @throws MagicMatchNotFoundException + * @throws MagicException + */ + protected static String getMimeType(File file, String filenameWithExtension) throws IOException { + TikaConfig config = TikaConfig.getDefaultConfig(); + Detector detector = config.getDetector(); + TikaInputStream stream = TikaInputStream.get(file); + Metadata metadata = new Metadata(); + metadata.add(Metadata.RESOURCE_NAME_KEY, filenameWithExtension); + MediaType mediaType = detector.detect(stream, metadata); + return mediaType.getBaseType().toString(); + } } diff --git a/src/main/java/org/gcube/portlets/user/shareupdates/server/ShareUpdateServiceImpl.java b/src/main/java/org/gcube/portlets/user/shareupdates/server/ShareUpdateServiceImpl.java index c0b190e..0dc6669 100644 --- a/src/main/java/org/gcube/portlets/user/shareupdates/server/ShareUpdateServiceImpl.java +++ b/src/main/java/org/gcube/portlets/user/shareupdates/server/ShareUpdateServiceImpl.java @@ -140,7 +140,7 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar } public String getDevelopmentUser() { String user = "test.user"; - //user = "massimiliano.assante"; + user = "massimiliano.assante"; return user; } @@ -150,12 +150,12 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar public ClientFeed share(String postText, FeedType feedType, PrivacyLevel pLevel, String vreId, LinkPreview preview, String urlThumbnail, ArrayList mentionedUserFullNames,String fileName, String filePathOnServer, boolean notifyGroup) { - String escapedFeedText = escapeHtmlAndTransformUrl(postText); + String escapedFeedText = TextTransfromUtils.escapeHtmlAndTransformUrl(postText); ArrayList mentionedUsers = null; if (mentionedUserFullNames != null && ! mentionedUserFullNames.isEmpty()) { mentionedUsers = getSelectedUserIds(mentionedUserFullNames); - escapedFeedText = convertMentionPeopleAnchorHTML(escapedFeedText, mentionedUsers); + escapedFeedText = TextTransfromUtils.convertMentionPeopleAnchorHTML(escapedFeedText, mentionedUsers); } @@ -185,12 +185,12 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar //this means the user has shared a file without text in it. String textToPost = ""; if (escapedFeedText.compareTo(ShareUpdateForm.NO_TEXT_FILE_SHARE) == 0) { - textToPost = convertFileNameAnchorHTML(url); + textToPost = TextTransfromUtils.convertFileNameAnchorHTML(url); } else { textToPost = escapedFeedText; System.out.println("textToPost=" + textToPost); } - + ScopeBean scope = new ScopeBean(session.getScope()); String vreId2Set = scope.is(Type.VRE) ? scope.toString() : ""; @@ -231,7 +231,7 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar //everything went fine ClientFeed cf = new ClientFeed(toShare.getKey(), toShare.getType().toString(), username, feedDate, toShare.getUri(), - replaceAmpersand(toShare.getDescription()), fullName, email, thumbnailURL, toShare.getLinkTitle(), toShare.getLinkDescription(), + TextTransfromUtils.replaceAmpersand(toShare.getDescription()), fullName, email, thumbnailURL, toShare.getLinkTitle(), toShare.getLinkDescription(), toShare.getUriThumbnail(), toShare.getLinkHost()); @@ -260,73 +260,6 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar return cf; } - - /** - * convert the mentioned people in HTML anchor and also Encode the params Base64 - * @param escapedFeedText - * @param taggedPeople - * @return - */ - private String convertMentionPeopleAnchorHTML(String escapedFeedText, ArrayList taggedPeople) { - for (PickingUser tagged : taggedPeople) { - String taggedHTML = ""+tagged.getFullName()+" "; - escapedFeedText = escapedFeedText.replace(tagged.getFullName(), taggedHTML); - } - return escapedFeedText; - } - - private void setUserSettingsInSession(UserSettings user) { - getASLSession().setAttribute(UserInfo.USER_INFO_ATTR, user); - } - private String replaceAmpersand(String toReplace) { - String toReturn = toReplace.replaceAll("&", "&"); - return toReturn; - } - /** - * utility method that convert a url ina text in a clickable url by the browser - * and if the user has just pasted a link, converts the link in: shared a link - * @param feedText - * @return the text with the clickable url in it - */ - private String transformUrls(String feedText) { - StringBuilder sb = new StringBuilder(); - // separate input by spaces ( URLs have no spaces ) - String [] parts = feedText.split("\\s"); - // Attempt to convert each item into an URL. - for (int i = 0; i < parts.length; i++) { - if (parts[i].startsWith("http")) { - try { - URL url = new URL(parts[i]); - if (i == 0 && parts.length == 1) //then he shared just a link - return sb.append("shared ").append("a link.").append(" ").toString(); - // If possible then replace with anchor... - sb.append("").append(url).append(" "); - } catch (MalformedURLException e) { - // If there was an URL then it's not valid - _log.error("MalformedURLException returning... "); - return feedText; - } - } else { - sb.append(parts[i]); - sb.append(" "); - } - } - return sb.toString(); - } - /** - * - * @param preview - * @return - */ - private String convertFileNameAnchorHTML(String url) { - StringBuilder sb = new StringBuilder(); - sb.append("shared ").append("a file.").append(" ").toString(); - return sb.toString(); - } - @Override public UserSettings getUserSettings() { try { @@ -348,12 +281,9 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar UserInfo userInfo = new UserInfo(username, fullName, thumbnailURL, user.getEmailAddress(), accountURL, true, isAdmin(), vreNames); UserSettings toReturn = new UserSettings(userInfo, 0, session.getScopeName(), isInfrastructureScope()); - setUserSettingsInSession(toReturn); - return toReturn; } else { - _log.info("Returning test USER = " + session.getUsername()); HashMap fakeVreNames = new HashMap(); fakeVreNames.put("/gcube/devsec/devVRE","devVRE"); @@ -393,7 +323,7 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar try { - String mimeType = getMimeType(new File(fileabsolutePathOnServer), fileName); + String mimeType = FilePreviewer.getMimeType(new File(fileabsolutePathOnServer), fileName); UriResolverReaderParameter resolver = new UriResolverReaderParameter(); //get the url to show (though it could not be ready for download at this stage) httpURL = resolver.resolveAsUriRequest(smpURI, fileName, mimeType, true); @@ -433,27 +363,96 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar _log.debug("Returning httpURL=" + httpURL); return toReturn; } - - /** + * tries the following in the indicated order for Populating the Link preview + * Open Graph protocol + * Meta "title" and "description" tags + * Best guess from page content (not recommended) * - * @param file - * @return - * @throws IOException - * @throws MagicParseException - * @throws MagicMatchNotFoundException - * @throws MagicException + * Schema.org microdata <-- This is still a TODO */ - protected static String getMimeType(File file, String filenameWithExtension) throws IOException { - TikaConfig config = TikaConfig.getDefaultConfig(); - Detector detector = config.getDetector(); - TikaInputStream stream = TikaInputStream.get(file); - Metadata metadata = new Metadata(); - metadata.add(Metadata.RESOURCE_NAME_KEY, filenameWithExtension); - MediaType mediaType = detector.detect(stream, metadata); - return mediaType.getBaseType().toString(); - } + @Override + public LinkPreview checkLink(String linkToCheck) { + LinkPreview toReturn = null; + _log.info("to check " + linkToCheck); + //look for a url in text + linkToCheck = TextTransfromUtils.extractURL(linkToCheck); + if (linkToCheck == null) + return null; //no url + String[] schemes = {"http","https"}; + UrlValidator urlValidator = new UrlValidator(schemes); + if (! urlValidator.isValid(linkToCheck)) { + _log.warn("url is NOT valid, returning nothing"); + return null; + } + _log.debug("url is valid"); + + URL pageURL; + URLConnection siteConnection = null; + try { + pageURL = new URL(linkToCheck); + if (pageURL.getProtocol().equalsIgnoreCase("https")) { + System.setProperty("java.protocol.handler.pkgs", "com.sun.net.ssl.internal.www.protocol"); + java.security.Security.addProvider(new com.sun.net.ssl.internal.ssl.Provider()); + TextTransfromUtils.trustAllHTTPSConnections(); + siteConnection = (HttpsURLConnection) pageURL.openConnection(); + } + else + siteConnection = (HttpURLConnection) pageURL.openConnection(); + } catch (MalformedURLException e) { + _log.error("url is not valid"); + return null; + } catch (IOException e) { + _log.error("url is not reachable"); + return null; + } + //pretend you're a browser (make my request from Java more “browsery-like”.) + siteConnection.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); + + String title; + String description; + ArrayList imageUrls = new ArrayList(); + //get the host from the url + String host = pageURL.getHost().replaceAll("www.", ""); + + //try openGraph First + OpenGraph ogLink = null; + try { + ogLink = new OpenGraph(linkToCheck, true, siteConnection); + if (ogLink == null || ogLink.getContent("title") == null) { + //there is no OpenGraph for this link + _log.info("No OpenGraph Found, going Best guess from page content") ; + toReturn = TextTransfromUtils.getInfoFromHTML(siteConnection, pageURL, linkToCheck, host); + } else { + //there is OpenGraph + _log.info("OpenGraph Found") ; + title = ogLink.getContent("title"); + description = (ogLink.getContent("description") != null) ? ogLink.getContent("description") : ""; + description = ((description.length() > 256) ? description.substring(0, 256)+"..." : description); + //look for the image ask the guesser if not present + if (ogLink.getContent("image") != null) { + String imageUrl = TextTransfromUtils.getImageUrlFromSrcAttribute(pageURL, ogLink.getContent("image")); + imageUrls.add(imageUrl); + _log.trace("OpenGraph getImage = " +imageUrl) ; + } + else { + _log.trace("OpenGraph No Image, trying manuale parsing"); + ArrayList images = TextTransfromUtils.getImagesWithCleaner(pageURL); + if (! images.isEmpty()) + imageUrls = images; + } + toReturn = new LinkPreview(title, description, linkToCheck, host, imageUrls); + return toReturn; + } + } catch (IOException e) { + e.printStackTrace(); + } catch (Exception e) { + e.printStackTrace(); + } + return toReturn; + } + /** * return the id as key and the names as value of the vre a user is subscribed to * @param username @@ -548,318 +547,10 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements Shar return null; } } - /** - * Escape an html string. Escaping data received from the client helps to - * prevent cross-site script vulnerabilities. - * - * @param html the html string to escape - * @return the escaped string - */ - private String escapeHtmlAndTransformUrl(String html) { - if (html == null) { - return null; - } - String toReturn = html.replaceAll("&", "&").replaceAll("<", "<") - .replaceAll(">", ">"); + - // replace all the line breaks by
- toReturn = toReturn.replaceAll("(\r\n|\n)","
"); - //transfrom the URL in a clickable URL - toReturn = transformUrls(toReturn); - // then replace all the double spaces by the html version   - toReturn = toReturn.replaceAll("\\s\\s","  "); - return toReturn; - } - - /** - * utilty method that extract an url ina text - * @param feedText - * @return the text with the clickable url in it - */ - public String extractURL(String feedText) { - // separate input by spaces ( URLs have no spaces ) - String [] parts = feedText.split("\\s"); - // Attempt to convert each item into an URL. - for( String item : parts ) { - if (item.startsWith("http")) { - try { - new URL(item); - return item; - } catch (MalformedURLException e) { - // If there was an URL then it's not valid - _log.error("MalformedURLException returning... "); - return null; - } - } - } - return null; - } - /** - * tries the following in the indicated order for Populating the Link preview - * Open Graph protocol - * Meta "title" and "description" tags - * Best guess from page content (not recommended) - * - * Schema.org microdata <-- This is still a TODO - */ - public LinkPreview checkLink(String linkToCheck) { - LinkPreview toReturn = null; - _log.info("to check " + linkToCheck); - //look for a url in text - linkToCheck = extractURL(linkToCheck); - if (linkToCheck == null) - return null; //no url - - String[] schemes = {"http","https"}; - UrlValidator urlValidator = new UrlValidator(schemes); - if (! urlValidator.isValid(linkToCheck)) { - _log.warn("url is NOT valid, returning nothing"); - return null; - } - _log.debug("url is valid"); - - URL pageURL; - URLConnection siteConnection = null; - try { - pageURL = new URL(linkToCheck); - if (pageURL.getProtocol().equalsIgnoreCase("https")) { - System.setProperty("java.protocol.handler.pkgs", "com.sun.net.ssl.internal.www.protocol"); - java.security.Security.addProvider(new com.sun.net.ssl.internal.ssl.Provider()); - trustAllHTTPSConnections(); - siteConnection = (HttpsURLConnection) pageURL.openConnection(); - } - else - siteConnection = (HttpURLConnection) pageURL.openConnection(); - } catch (MalformedURLException e) { - _log.error("url is not valid"); - return null; - } catch (IOException e) { - _log.error("url is not reachable"); - return null; - } - //pretend you're a browser (make my request from Java more “browsery-like”.) - siteConnection.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); - - String title; - String description; - ArrayList imageUrls = new ArrayList(); - //get the host from the url - String host = pageURL.getHost().replaceAll("www.", ""); - - //try openGraph First - OpenGraph ogLink = null; - try { - ogLink = new OpenGraph(linkToCheck, true, siteConnection); - if (ogLink == null || ogLink.getContent("title") == null) { - //there is no OpenGraph for this link - _log.info("No OpenGraph Found, going Best guess from page content") ; - toReturn = getInfoFromHTML(siteConnection, pageURL, linkToCheck, host); - } else { - //there is OpenGraph - _log.info("OpenGraph Found") ; - title = ogLink.getContent("title"); - description = (ogLink.getContent("description") != null) ? ogLink.getContent("description") : ""; - description = ((description.length() > 256) ? description.substring(0, 256)+"..." : description); - //look for the image ask the guesser if not present - if (ogLink.getContent("image") != null) { - String imageUrl = getImageUrlFromSrcAttribute(pageURL, ogLink.getContent("image")); - imageUrls.add(imageUrl); - _log.trace("OpenGraph getImage = " +imageUrl) ; - } - else { - _log.trace("OpenGraph No Image, trying manuale parsing"); - ArrayList images = getImagesWithCleaner(pageURL); - if (! images.isEmpty()) - imageUrls = images; - } - toReturn = new LinkPreview(title, description, linkToCheck, host, imageUrls); - return toReturn; - } - } catch (IOException e) { - e.printStackTrace(); - } catch (Exception e) { - e.printStackTrace(); - } - return toReturn; - } - /** - * to use when OpenGraph is not available, Tries Metadata first, then Best guess from page content - * @param pageUrl - * @param link - * @param host - * @return a LinPreview object instance filled with the extracted information - * @throws IOException - */ - private LinkPreview getInfoFromHTML(URLConnection connection, URL pageUrl, String link, String host) throws Exception { - LinkPreview toReturn = null; - String title = ""; - String description = ""; - - URLConnection conn = pageUrl.openConnection(); - //pretend you're a browser (make my request from Java more “browsery-like”.) - conn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); - - MetaSeeker ms = null; - try { - title = getTitleFromHeader(pageUrl); - ms = new MetaSeeker(connection, pageUrl); - - //try the metadata, otherwise ask the guesser - description = (ms.getContent("description") != null && ! ms.getContent("description").isEmpty()) ? ms.getContent("description") : createDescriptionFromContent(link); - - ArrayList images = new ArrayList(); - images = getImagesWithCleaner(pageUrl); - toReturn = new LinkPreview(title, description, link, host, images); - - } catch(Exception e) { - _log.error("[MANUAL-PARSE] Something wrong with the meta seeker returning ... "); - return toReturn; - } - return toReturn; - } - /** - * @param pageURL - * @return the title of the page or null if can't read it - * @throws IOException - */ - private String getTitleFromHeader(URL pageURL) throws IOException { - URLConnection conn = pageURL.openConnection(); - //pretend you're a browser (make my request from Java more “browsery-like”.) - conn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); - - Charset charset = OpenGraph.getConnectionCharset(conn); - BufferedReader dis = new BufferedReader(new InputStreamReader(conn.getInputStream(), charset)); - String inputLine; - StringBuffer headContents = new StringBuffer(); - - // Loop through each line, looking for the closing head element - while ((inputLine = dis.readLine()) != null) - { - if (inputLine.contains("")) { - inputLine = inputLine.substring(0, inputLine.indexOf("") + 7); - inputLine = inputLine.concat(""); - headContents.append(inputLine + "\r\n"); - break; - } - headContents.append(inputLine + "\r\n"); - } - - String headContentsStr = headContents.toString(); - HtmlCleaner cleaner = new HtmlCleaner(); - // parse the string HTML - TagNode pageData = cleaner.clean(headContentsStr); - // open only the title tags - TagNode[] title = pageData.getElementsByName("title", true); - if (title != null && title.length > 0) { - String theTitle = title[0].getChildren().get(0).toString(); - _log.trace("theTitle: " + theTitle); - return theTitle; - } - return null; - } - - /** - * try with HtmlCleaner API to read the images - * @param pageURL - * @return the title of the page or null if can't read it - * @throws IOException - */ - private ArrayList getImagesWithCleaner(URL pageURL) throws IOException { - ArrayList images = new ArrayList(); - URLConnection conn = pageURL.openConnection(); - //pretend you're a browser (make my request from Java more “browsery-like”.) - conn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); - - Charset charset = OpenGraph.getConnectionCharset(conn); - BufferedReader dis = new BufferedReader(new InputStreamReader(conn.getInputStream(), charset)); - String inputLine; - StringBuffer headContents = new StringBuffer(); - - // Loop through each line, looking for the closing head element - while ((inputLine = dis.readLine()) != null) { - headContents.append(inputLine + "\r\n"); - } - - String headContentsStr = headContents.toString(); - HtmlCleaner cleaner = new HtmlCleaner(); - // parse the string HTML - TagNode pageData = cleaner.clean(headContentsStr); - // open only the title tags - TagNode[] imgs = pageData.getElementsByName("img", true); - int upTo = (imgs.length > 15) ? 15 : imgs.length; - for (int i = 0; i < upTo; i++) { - if (imgs[i].hasAttribute("src")) { - String imageUrl = getImageUrlFromSrcAttribute(pageURL, imgs[i].getAttributeByName("src")); - images.add(imageUrl); - _log.trace("[FOUND image] " + imageUrl); - } - } - return images; - } - /** - * There are several ways to refer an image in a HTML, this method use an heuristic to get the actual image url - * @param pageURL the url - * @param srcAttr the content of the img src attribute - * @return the image url ready to be referred outside native environment - */ - private String getImageUrlFromSrcAttribute(URL pageURL, String srcAttr) { - String imageUrl = srcAttr; - if (imageUrl.startsWith("/")) //referred as absolute path case - imageUrl = pageURL.getProtocol()+"://"+pageURL.getHost()+imageUrl; - else if (imageUrl.startsWith("../")) { //relative path case - imageUrl = pageURL.toExternalForm().endsWith("/") ? pageURL.toExternalForm() + imageUrl : pageURL.toExternalForm() + "/" + imageUrl; - } - else if (!imageUrl.contains("/")) { //the image is probably in the same folder - // e.g. http://www.adomain.com/docrep/018/i3328e/i3328e00.htm?utm_source - String imageFolder = pageURL.toString().substring(0, pageURL.toString().lastIndexOf("/")); - imageUrl= imageFolder + "/" + imageUrl; - } - else if (!imageUrl.startsWith("http") ) { //e.g. http://adomain.com/anImage.png - imageUrl = pageURL.toExternalForm().endsWith("/") ? pageURL.toExternalForm() + imageUrl : pageURL.toExternalForm() + "/" + imageUrl; - } - return imageUrl; - } - /** - * generate the description parsing the content (Best Guess) - * @param link the link to check - * @return the description guessed - */ - private String createDescriptionFromContent(String link) { - StringBean sb = new StringBean(); - sb.setURL(link); - sb.setLinks(false); - String description = sb.getStrings(); - description = ((description.length() > 256) ? description.substring(0, 256)+"..." : description); - return description; - } - /** - * this method handles the non trusted https connections - */ - private void trustAllHTTPSConnections() { - // Create a trust manager that does not validate certificate chains - TrustManager[] trustAllCerts = new TrustManager[]{ - new X509TrustManager() { - public java.security.cert.X509Certificate[] getAcceptedIssuers() { - return null; - } - - public void checkClientTrusted( - java.security.cert.X509Certificate[] certs, String authType) { - } - - public void checkServerTrusted( - java.security.cert.X509Certificate[] certs, String authType) { - } - } - }; - try { - SSLContext sc = SSLContext.getInstance("SSL"); - sc.init(null, trustAllCerts, new java.security.SecureRandom()); - HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory()); - } catch (Exception e) { - System.out.println("Error" + e); - } - } + + /** * Indicates whether the scope is the whole infrastructure. diff --git a/src/main/java/org/gcube/portlets/user/shareupdates/server/TextTransfromUtils.java b/src/main/java/org/gcube/portlets/user/shareupdates/server/TextTransfromUtils.java new file mode 100644 index 0000000..b9636af --- /dev/null +++ b/src/main/java/org/gcube/portlets/user/shareupdates/server/TextTransfromUtils.java @@ -0,0 +1,362 @@ +package org.gcube.portlets.user.shareupdates.server; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLConnection; +import java.nio.charset.Charset; +import java.util.ArrayList; + +import javax.net.ssl.SSLContext; +import javax.net.ssl.TrustManager; +import javax.net.ssl.X509TrustManager; + +import org.apache.commons.codec.binary.Base64; +import org.gcube.portal.databook.client.GCubeSocialNetworking; +import org.gcube.portlets.user.shareupdates.server.metaseeker.MetaSeeker; +import org.gcube.portlets.user.shareupdates.server.opengraph.OpenGraph; +import org.gcube.portlets.user.shareupdates.shared.LinkPreview; +import org.gcube.portlets.widgets.pickuser.shared.PickingUser; +import org.htmlcleaner.HtmlCleaner; +import org.htmlcleaner.TagNode; +import org.htmlparser.beans.StringBean; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.sun.net.ssl.HttpsURLConnection; +/** + * this class contains utility method for parsing and trasforming users pasted text containing URLs and other utility methods + * @author Massimiliano Assante, ISTI-CNR + * + */ +public class TextTransfromUtils { + /** + * + */ + private static Logger _log = LoggerFactory.getLogger(ShareUpdateServiceImpl.class); + + /** + * + * @param preview + * @return + */ + protected static String convertFileNameAnchorHTML(String url) { + StringBuilder sb = new StringBuilder(); + sb.append("shared ").append("a file.").append(" ").toString(); + return sb.toString(); + } + + + /** + * convert the mentioned people in HTML anchor and also Encode the params Base64 + * @param escapedFeedText + * @param taggedPeople + * @return + */ + protected static String convertMentionPeopleAnchorHTML(String escapedFeedText, ArrayList taggedPeople) { + for (PickingUser tagged : taggedPeople) { + String taggedHTML = ""+tagged.getFullName()+" "; + escapedFeedText = escapedFeedText.replace(tagged.getFullName(), taggedHTML); + } + return escapedFeedText; + } + + /** + * generate the description parsing the content (Best Guess) + * @param link the link to check + * @return the description guessed + */ + private static String createDescriptionFromContent(String link) { + StringBean sb = new StringBean(); + sb.setURL(link); + sb.setLinks(false); + String description = sb.getStrings(); + description = ((description.length() > 256) ? description.substring(0, 256)+"..." : description); + return description; + } + /** + * Escape an html string. Escaping data received from the client helps to + * prevent cross-site script vulnerabilities. + * + * @param html the html string to escape + * @return the escaped string + */ + protected static String escapeHtmlAndTransformUrl(String html) { + if (html == null) { + return null; + } + String toReturn = html.replaceAll("&", "&").replaceAll("<", "<") + .replaceAll(">", ">"); + + // replace all the line breaks by
+ toReturn = toReturn.replaceAll("(\r\n|\n)","
"); + //transfrom the URL in a clickable URL + toReturn = transformUrls(toReturn); + // then replace all the double spaces by the html version   + toReturn = toReturn.replaceAll("\\s\\s","  "); + return toReturn; + } + /** + * utility method that extract an url ina text when you paste a link + * @param feedText + * @return the text with the clickable url in it + */ + protected static String extractURL(String feedText) { + // separate input by spaces ( URLs have no spaces ) + feedText = feedText.replaceAll("(\r\n|\n)","
"); + String [] parts = feedText.split("\\s"); + // Attempt to convert each item into an URL. + for( String item : parts ) { + String toCheck = getHttpToken(item); + if (toCheck != null) { + try { + new URL(toCheck); + return toCheck; + } catch (MalformedURLException e) { + // If there was an URL then it's not valid + _log.error("MalformedURLException returning... "); + return null; + } + } + } + return null; + } + /** + * check the tokens of a pasted text and see if there's any http link in it + * @param item a text token + * @return the actual http link + */ + private static String getHttpToken(String item) { + if (item.startsWith("http") || item.startsWith("www") || item.startsWith("(www") || item.startsWith("(http")) { + if (item.startsWith("(")) + item = item.substring(1, item.length()); + if (item.endsWith(".") || item.endsWith(")")) { //sometimes people write the url and close the phrase with a . + item = item.substring(0, item.length()-1); + } + item = item.startsWith("www") ? "http://"+item : item; + System.out.println("getHttpToken returns -> " + item); + return item; + } + return null; + } + /** + * try with HtmlCleaner API to read the images + * @param pageURL + * @return the title of the page or null if can't read it + * @throws IOException + */ + protected static ArrayList getImagesWithCleaner(URL pageURL) throws IOException { + ArrayList images = new ArrayList(); + URLConnection conn = pageURL.openConnection(); + //pretend you're a browser (make my request from Java more “browsery-like”.) + conn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); + + Charset charset = OpenGraph.getConnectionCharset(conn); + BufferedReader dis = new BufferedReader(new InputStreamReader(conn.getInputStream(), charset)); + String inputLine; + StringBuffer headContents = new StringBuffer(); + + // Loop through each line, looking for the closing head element + while ((inputLine = dis.readLine()) != null) { + headContents.append(inputLine + "\r\n"); + } + + String headContentsStr = headContents.toString(); + HtmlCleaner cleaner = new HtmlCleaner(); + // parse the string HTML + TagNode pageData = cleaner.clean(headContentsStr); + // open only the title tags + TagNode[] imgs = pageData.getElementsByName("img", true); + int upTo = (imgs.length > 15) ? 15 : imgs.length; + for (int i = 0; i < upTo; i++) { + if (imgs[i].hasAttribute("src")) { + String imageUrl = getImageUrlFromSrcAttribute(pageURL, imgs[i].getAttributeByName("src")); + images.add(imageUrl); + _log.trace("[FOUND image] " + imageUrl); + } + } + return images; + } + /** + * There are several ways to refer an image in a HTML, this method use an heuristic to get the actual image url + * @param pageURL the url + * @param srcAttr the content of the img src attribute + * @return the image url ready to be referred outside native environment + */ + protected static String getImageUrlFromSrcAttribute(URL pageURL, String srcAttr) { + String imageUrl = srcAttr; + _log.trace("imageUrl="+imageUrl); + if (imageUrl.startsWith("http")) { + _log.trace("Direct link case"); + return imageUrl; + } + if (imageUrl.startsWith("/")) {//referred as absolute path case + _log.trace("Absolute Path case"); + imageUrl = pageURL.getProtocol()+"://"+pageURL.getHost()+imageUrl; + } + else if (imageUrl.startsWith("../")) { //relative path case + _log.trace("Relative Path case"); + String imageFolder = pageURL.toString().substring(0, pageURL.toString().lastIndexOf("/")); + imageUrl= imageFolder + "/" + imageUrl; + } + else if (!imageUrl.contains("/") || !imageUrl.startsWith("/")) { //the image is probably in the same folder or in a path starting from the last slash + _log.trace("probably in the same folder"); + // e.g. http://www.adomain.com/docrep/018/i3328e/i3328e00.htm?utm_source + String imageFolder = pageURL.toString().substring(0, pageURL.toString().lastIndexOf("/")); + imageUrl= imageFolder + "/" + imageUrl; + } + else if (!imageUrl.startsWith("http") ) { //e.g. http://adomain.com/anImage.png + _log.trace("In the root"); + imageUrl = pageURL.toExternalForm().endsWith("/") ? pageURL.toExternalForm() + imageUrl : pageURL.toExternalForm() + "/" + imageUrl; + } + return imageUrl; + } + + /** + * to use when OpenGraph is not available, Tries Metadata first, then Best guess from page content + * @param pageUrl + * @param link + * @param host + * @return a LinPreview object instance filled with the extracted information + * @throws IOException + */ + protected static LinkPreview getInfoFromHTML(URLConnection connection, URL pageUrl, String link, String host) throws Exception { + LinkPreview toReturn = null; + String title = ""; + String description = ""; + + URLConnection conn = pageUrl.openConnection(); + //pretend you're a browser (make my request from Java more “browsery-like”.) + conn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); + + MetaSeeker ms = null; + try { + title = getTitleFromHeader(pageUrl); + _log.trace("Found Title=" + title); + ms = new MetaSeeker(connection, pageUrl); + + //try the metadata, otherwise ask the guesser + description = (ms.getContent("description") != null && ! ms.getContent("description").isEmpty()) ? ms.getContent("description") : createDescriptionFromContent(link); + + ArrayList images = new ArrayList(); + images = getImagesWithCleaner(pageUrl); + toReturn = new LinkPreview(title, description, link, host, images); + + } catch(Exception e) { + _log.error("[MANUAL-PARSE] Something wrong with the meta seeker returning ... "); + e.printStackTrace(); + return toReturn; + } + return toReturn; + } + + /** + * @param pageURL + * @return the title of the page or null if can't read it + * @throws IOException + */ + private static String getTitleFromHeader(URL pageURL) throws IOException { + URLConnection conn = pageURL.openConnection(); + //pretend you're a browser (make my request from Java more “browsery-like”.) + conn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); + + Charset charset = OpenGraph.getConnectionCharset(conn); + BufferedReader dis = new BufferedReader(new InputStreamReader(conn.getInputStream(), charset)); + String inputLine; + StringBuffer headContents = new StringBuffer(); + + // Loop through each line, looking for the closing head element + while ((inputLine = dis.readLine()) != null) + { + if (inputLine.contains("")) { + inputLine = inputLine.substring(0, inputLine.indexOf("") + 7); + inputLine = inputLine.concat(""); + headContents.append(inputLine + "\r\n"); + break; + } + headContents.append(inputLine + "\r\n"); + } + + String headContentsStr = headContents.toString(); + HtmlCleaner cleaner = new HtmlCleaner(); + // parse the string HTML + TagNode pageData = cleaner.clean(headContentsStr); + // open only the title tags + TagNode[] title = pageData.getElementsByName("title", true); + if (title != null && title.length > 0 && title[0].getChildren().size() > 0) { + String theTitle = title[0].getChildren().get(0).toString(); + _log.trace("theTitle: " + theTitle); + return theTitle; + } + return "No-title"; + } + protected static String replaceAmpersand(String toReplace) { + String toReturn = toReplace.replaceAll("&", "&"); + return toReturn; + } + /** + * utility method that convert a url ina text in a clickable url by the browser + * and if the user has just pasted a link, converts the link in: shared a link + * @param feedText + * @return the text with the clickable url in it + */ + protected static String transformUrls(String feedText) { + StringBuilder sb = new StringBuilder(); + // separate input by spaces ( URLs have no spaces ) + String [] parts = feedText.split("\\s"); + // Attempt to convert each item into an URL. + for (int i = 0; i < parts.length; i++) { + String toCheck = getHttpToken(parts[i]); + if (toCheck != null) { + try { + URL url = new URL(toCheck); + if (i == 0 && parts.length == 1) //then he shared just a link + return sb.append("shared ").append("a link.").append(" ").toString(); + // If possible then replace with anchor... + sb.append("").append(url).append(" "); + } catch (MalformedURLException e) { + // If there was an URL then it's not valid + _log.error("MalformedURLException returning... "); + return feedText; + } + } else { + sb.append(parts[i]); + sb.append(" "); + } + } + return sb.toString(); + } + /** + * this method handles the non trusted https connections + */ + protected static void trustAllHTTPSConnections() { + // Create a trust manager that does not validate certificate chains + TrustManager[] trustAllCerts = new TrustManager[]{ + new X509TrustManager() { + public java.security.cert.X509Certificate[] getAcceptedIssuers() { + return null; + } + + public void checkClientTrusted( + java.security.cert.X509Certificate[] certs, String authType) { + } + + public void checkServerTrusted( + java.security.cert.X509Certificate[] certs, String authType) { + } + } + }; + try { + SSLContext sc = SSLContext.getInstance("SSL"); + sc.init(null, trustAllCerts, new java.security.SecureRandom()); + HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory()); + } catch (Exception e) { + System.out.println("Error" + e); + } + } +} diff --git a/src/main/java/org/gcube/portlets/user/shareupdates/server/UploadToWorkspaceThread.java b/src/main/java/org/gcube/portlets/user/shareupdates/server/UploadToWorkspaceThread.java index 9fc6b82..a1ef463 100644 --- a/src/main/java/org/gcube/portlets/user/shareupdates/server/UploadToWorkspaceThread.java +++ b/src/main/java/org/gcube/portlets/user/shareupdates/server/UploadToWorkspaceThread.java @@ -58,7 +58,7 @@ public class UploadToWorkspaceThread implements Runnable { _log.info("File to upload="+fileabsolutePathOnServer); File file = new File(fileabsolutePathOnServer); - String mimeType = ShareUpdateServiceImpl.getMimeType(file, fileName); + String mimeType = FilePreviewer.getMimeType(file, fileName); InputStream fileData = new FileInputStream(file); String theId = ""; _log.info("mimeType="+mimeType + " fileData null? " + (fileData == null) );