001package conexp.fx.core.xml; 002 003/* 004 * #%L 005 * Concept Explorer FX 006 * %% 007 * Copyright (C) 2010 - 2023 Francesco Kriegel 008 * %% 009 * This program is free software: you can redistribute it and/or modify 010 * it under the terms of the GNU General Public License as 011 * published by the Free Software Foundation, either version 3 of the 012 * License, or (at your option) any later version. 013 * 014 * This program is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public 020 * License along with this program. If not, see 021 * <http://www.gnu.org/licenses/gpl-3.0.html>. 022 * #L% 023 */ 024 025 026import java.io.IOException; 027import java.net.MalformedURLException; 028import java.net.URL; 029 030import org.jsoup.Jsoup; 031import org.jsoup.nodes.Document; 032import org.jsoup.nodes.Element; 033 034import com.google.common.base.Predicate; 035import com.google.common.collect.Iterables; 036 037public class JsoupUtil { 038 039 private static final String USER_AGENT = 040 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1309.0 Safari/537.17"; 041 private static final int CONNECTION_TIMEOUT = 60000; 042 043 public static final Document getDocument(final String url) throws MalformedURLException, IOException { 044 return getDocument(new URL(url)); 045 } 046 047 public static final Document getDocument(final URL url) throws IOException { 048 return Jsoup 049 .connect(url.toString()) 050 .ignoreContentType(true) 051 .timeout(CONNECTION_TIMEOUT) 052 .userAgent(USER_AGENT) 053 .get(); 054 } 055 056 public static final Element firstElement(final Element element, final String... tags) { 057 Element _element = element; 058 for (String tag : tags) 059 _element = firstChildByTag(_element, tag); 060 return _element; 061 } 062 063 public static final Element firstOrAppendElement(final Element element, final String... tags) { 064 Element _element = element; 065 for (String tag : tags) 066 if (!childrenByTag(_element, tag).iterator().hasNext()) 067 _element = _element.appendElement(tag); 068 else 069 _element = firstChildByTag(_element, tag); 070 return _element; 071 } 072 073 public static final Iterable<Element> childrenByTag(final Element element, final String tag) { 074 return Iterables.filter(element.children(), new Predicate<Element>() { 075 076 public final boolean apply(final Element _element) { 077 return _element.tagName().equals(tag); 078 } 079 }); 080 } 081 082 public static final Element firstChildByTag(final Element element, final String tag) { 083 return Iterables.getFirst(childrenByTag(element, tag), null); 084 } 085 086}