htmlparsers/src/main/kotlin/HtmlExtractor.kt
author Da Risk <da_risk@geekorum.com>
Tue, 09 Nov 2021 22:09:00 -0400
changeset 846 ac0863af5ef6
parent 611 91b8d76c03cd
child 882 7a74abf66c49
permissions -rw-r--r--
build: update licence header check
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
307
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
     1
/*
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
     2
 * Geekttrss is a RSS feed reader application on the Android Platform.
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
     3
 *
846
ac0863af5ef6 build: update licence header check
Da Risk <da_risk@geekorum.com>
parents: 611
diff changeset
     4
 * Copyright (C) 2017-2021 by Frederic-Charles Barthelery.
307
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
     5
 *
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
     6
 * This file is part of Geekttrss.
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
     7
 *
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
     8
 * Geekttrss is free software: you can redistribute it and/or modify
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
     9
 * it under the terms of the GNU General Public License as published by
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    10
 * the Free Software Foundation, either version 3 of the License, or
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    11
 * (at your option) any later version.
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    12
 *
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    13
 * Geekttrss is distributed in the hope that it will be useful,
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    16
 * GNU General Public License for more details.
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    17
 *
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    18
 * You should have received a copy of the GNU General Public License
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    19
 * along with Geekttrss.  If not, see <http://www.gnu.org/licenses/>.
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    20
 */
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    21
package com.geekorum.ttrss.htmlparsers
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    22
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    23
import org.jsoup.Jsoup
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    24
import org.jsoup.nodes.Document
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    25
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    26
/**
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    27
 * Extract some data T from an HtmlDocument
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    28
 */
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    29
abstract class HtmlExtractor<out T> {
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    30
    abstract fun extract(document: Document) : Collection<T>
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    31
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    32
    fun extract(html: String) = extract(Jsoup.parse(html))
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    33
}