htmlparsers/src/main/kotlin/FeedExtractor.kt
author Da Risk <da_risk@geekorum.com>
Tue, 25 Jun 2019 16:51:33 -0700
changeset 307 f1b40d8534be
child 611 91b8d76c03cd
permissions -rw-r--r--
extract htmlparsers into a new library module

/*
 * Geekttrss is a RSS feed reader application on the Android Platform.
 *
 * Copyright (C) 2017-2019 by Frederic-Charles Barthelery.
 *
 * This file is part of Geekttrss.
 *
 * Geekttrss is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Geekttrss is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Geekttrss.  If not, see <http://www.gnu.org/licenses/>.
 */

package com.geekorum.ttrss.htmlparsers

import org.jsoup.nodes.Document
import org.jsoup.nodes.Element
import javax.inject.Inject

/**
 * Allows to extract [FeedInformation] from an Html Document.
 */
class FeedExtractor @Inject constructor() : HtmlExtractor<FeedInformation>() {

    private val VALID_FEED_MIMETYPE = listOf("application/rss+xml", "application/atom+xml")

    override fun extract(document: Document): Collection<FeedInformation>  {
        document.head()?.let {head ->
            return head.getElementsByTag("link")
                .filter { isValidLinkFeed(it) }
                .map {
                    FeedInformation(href = it.attr("abs:href")!!,
                        type = it.attr("type"),
                        title = it.attr("title"))
                }
        }
        return emptyList()
    }

    private fun isValidLinkFeed(elem: Element): Boolean {
        val type = elem.attr("type")
        val url = elem.attr("abs:href")
        return elem.attr("rel") == "alternate"
                && type in VALID_FEED_MIMETYPE
                && url.isNotBlank()
    }
}

/**
 * Information about a Feed.
 */
data class FeedInformation(
    val href: String,
    val type: String = "",
    val title: String = "")