001package conexp.fx.core.xml;
002
003/*
004 * #%L
005 * Concept Explorer FX
006 * %%
007 * Copyright (C) 2010 - 2023 Francesco Kriegel
008 * %%
009 * This program is free software: you can redistribute it and/or modify
010 * it under the terms of the GNU General Public License as
011 * published by the Free Software Foundation, either version 3 of the
012 * License, or (at your option) any later version.
013 * 
014 * This program is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017 * GNU General Public License for more details.
018 * 
019 * You should have received a copy of the GNU General Public
020 * License along with this program.  If not, see
021 * <http://www.gnu.org/licenses/gpl-3.0.html>.
022 * #L%
023 */
024
025
026import java.io.IOException;
027import java.net.MalformedURLException;
028import java.net.URL;
029
030import org.jsoup.Jsoup;
031import org.jsoup.nodes.Document;
032import org.jsoup.nodes.Element;
033
034import com.google.common.base.Predicate;
035import com.google.common.collect.Iterables;
036
037public class JsoupUtil {
038
039  private static final String USER_AGENT         =
040                                                     "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1309.0 Safari/537.17";
041  private static final int    CONNECTION_TIMEOUT = 60000;
042
043  public static final Document getDocument(final String url) throws MalformedURLException, IOException {
044    return getDocument(new URL(url));
045  }
046
047  public static final Document getDocument(final URL url) throws IOException {
048    return Jsoup
049        .connect(url.toString())
050        .ignoreContentType(true)
051        .timeout(CONNECTION_TIMEOUT)
052        .userAgent(USER_AGENT)
053        .get();
054  }
055
056  public static final Element firstElement(final Element element, final String... tags) {
057    Element _element = element;
058    for (String tag : tags)
059      _element = firstChildByTag(_element, tag);
060    return _element;
061  }
062
063  public static final Element firstOrAppendElement(final Element element, final String... tags) {
064    Element _element = element;
065    for (String tag : tags)
066      if (!childrenByTag(_element, tag).iterator().hasNext())
067        _element = _element.appendElement(tag);
068      else
069        _element = firstChildByTag(_element, tag);
070    return _element;
071  }
072
073  public static final Iterable<Element> childrenByTag(final Element element, final String tag) {
074    return Iterables.filter(element.children(), new Predicate<Element>() {
075
076      public final boolean apply(final Element _element) {
077        return _element.tagName().equals(tag);
078      }
079    });
080  }
081
082  public static final Element firstChildByTag(final Element element, final String tag) {
083    return Iterables.getFirst(childrenByTag(element, tag), null);
084  }
085
086}