htmlparsers/src/test/kotlin/FeedExtractorTest.kt
author Da Risk <da_risk@geekorum.com>
Mon, 15 Sep 2025 14:00:07 -0400
changeset 1370 13e39ef920a8
parent 1305 ecb49cb4d40b
permissions -rw-r--r--
update license headers
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
137
5464f07a306c Update copyright headers for 2019
Da Risk <da_risk@geekorum.com>
parents: 0
diff changeset
     1
/*
0
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
     2
 * Geekttrss is a RSS feed reader application on the Android Platform.
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
     3
 *
1370
13e39ef920a8 update license headers
Da Risk <da_risk@geekorum.com>
parents: 1305
diff changeset
     4
 * Copyright (C) 2017-2025 by Frederic-Charles Barthelery.
0
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
     5
 *
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
     6
 * This file is part of Geekttrss.
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
     7
 *
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
     8
 * Geekttrss is free software: you can redistribute it and/or modify
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
     9
 * it under the terms of the GNU General Public License as published by
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    10
 * the Free Software Foundation, either version 3 of the License, or
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    11
 * (at your option) any later version.
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    12
 *
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    13
 * Geekttrss is distributed in the hope that it will be useful,
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    16
 * GNU General Public License for more details.
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    17
 *
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    18
 * You should have received a copy of the GNU General Public License
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    19
 * along with Geekttrss.  If not, see <http://www.gnu.org/licenses/>.
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    20
 */
307
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents: 137
diff changeset
    21
package com.geekorum.ttrss.htmlparsers
0
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    22
1305
ecb49cb4d40b htmlparsers: use ksoup instead of jsoup
Da Risk <da_risk@geekorum.com>
parents: 1174
diff changeset
    23
import com.fleeksoft.ksoup.Ksoup
0
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    24
import com.google.common.truth.Truth.assertThat
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    25
import kotlin.test.BeforeTest
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    26
import kotlin.test.Test
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    27
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    28
private const val emptyHtmlDoc = ""
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    29
private const val htmlDocWithoutFeeds = "<html></html>"
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    30
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    31
private val htmlDocWithOneRssFeed = """
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    32
    <html>
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    33
        <head>
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    34
            <link rel="alternate" type="application/rss+xml" title="RSS Advisory Board" href="http://feeds.rssboard.org/rssboard" />
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    35
        </head>
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    36
    </html>
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    37
    """.trimIndent()
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    38
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    39
private val rssAdvisoryBoardFeedInfo = FeedInformation(
307
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents: 137
diff changeset
    40
    href = "http://feeds.rssboard.org/rssboard",
0
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    41
    type = "application/rss+xml", title = "RSS Advisory Board")
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    42
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    43
private val htmlDocWithOneRelativeRssFeed = """
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    44
    <html>
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    45
        <head>
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    46
            <title>RSS Advisory Board</title>
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    47
            <base href="http://feeds.rssboard.org/">
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    48
            <link rel="alternate" type="application/rss+xml" href="rssboard">
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    49
        </head>
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    50
    </html>
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    51
    """.trimIndent()
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    52
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    53
private val rssAdvisoryBoardFeedInfoNoTitle = FeedInformation(
307
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents: 137
diff changeset
    54
    href = "http://feeds.rssboard.org/rssboard",
0
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    55
    type = "application/rss+xml")
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    56
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    57
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    58
private val htmlDocWithOneAtomFeed = """
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    59
      <html>
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    60
        <head>
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    61
            <link rel="dns-prefetch" href="https://user-images.githubusercontent.com/">
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    62
            <meta name="viewport" content="width=device-width">
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    63
            <link href="https://github.com/codepath/android_guides/commits/master.atom" rel="alternate" title="Recent Commits to android_guides:master" type="application/atom+xml">
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    64
        </head>
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    65
    </html>
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    66
""".trimIndent()
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    67
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    68
private val githubRecentCommitsFeedInfo =
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    69
    FeedInformation(
307
f1b40d8534be extract htmlparsers into a new library module
Da Risk <da_risk@geekorum.com>
parents: 137
diff changeset
    70
        href = "https://github.com/codepath/android_guides/commits/master.atom",
0
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    71
        type = "application/atom+xml", title = "Recent Commits to android_guides:master")
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    72
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    73
private val htmlDocWithCombinedFeeds = """
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    74
      <html>
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    75
        <head>
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    76
            <title>RSS Advisory Board</title>
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    77
            <link href="https://github.com/codepath/android_guides/commits/master.atom" rel="alternate" title="Recent Commits to android_guides:master" type="application/atom+xml">
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    78
            <link rel="dns-prefetch" href="https://user-images.githubusercontent.com/">
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    79
            <link rel="alternate" type="application/rss+xml" href="rssboard">
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    80
            <base href="http://feeds.rssboard.org/">
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    81
            <link rel="alternate" type="application/rss+xml" title="RSS Advisory Board" href="http://feeds.rssboard.org/rssboard" />
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    82
            <meta name="viewport" content="width=device-width">
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    83
        </head>
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    84
    </html>
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    85
""".trimIndent()
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    86
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    87
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    88
class FeedExtractorTest {
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    89
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    90
    lateinit var feedExtractor: FeedExtractor
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    91
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    92
    @BeforeTest
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    93
    fun setUp() {
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    94
        feedExtractor = FeedExtractor()
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    95
    }
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    96
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    97
    @Test
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
    98
    fun testThatWhenEmptyHtmlDocReturnNoFeedInformation() {
1305
ecb49cb4d40b htmlparsers: use ksoup instead of jsoup
Da Risk <da_risk@geekorum.com>
parents: 1174
diff changeset
    99
        val doc = Ksoup.parse(emptyHtmlDoc)
0
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   100
        val result = feedExtractor.extract(doc)
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   101
        assertThat(result).isEmpty()
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   102
    }
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   103
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   104
    @Test
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   105
    fun testThatWhenHtmlDocWithoutFeedsReturnNoFeedInformation() {
1305
ecb49cb4d40b htmlparsers: use ksoup instead of jsoup
Da Risk <da_risk@geekorum.com>
parents: 1174
diff changeset
   106
        val doc = Ksoup.parse(htmlDocWithoutFeeds)
0
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   107
        val result = feedExtractor.extract(doc)
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   108
        assertThat(result).isEmpty()
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   109
    }
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   110
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   111
    @Test
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   112
    fun testThatWhenHtmlDocWithRssFeedsReturnCorrectFeedInfo() {
1305
ecb49cb4d40b htmlparsers: use ksoup instead of jsoup
Da Risk <da_risk@geekorum.com>
parents: 1174
diff changeset
   113
        val doc = Ksoup.parse(htmlDocWithOneRssFeed)
0
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   114
        val result = feedExtractor.extract(doc)
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   115
        assertThat(result).containsExactly(rssAdvisoryBoardFeedInfo)
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   116
    }
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   117
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   118
    @Test
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   119
    fun testThatWhenHtmlDocWithAtomFeedsReturnCorrectFeedInfo() {
1305
ecb49cb4d40b htmlparsers: use ksoup instead of jsoup
Da Risk <da_risk@geekorum.com>
parents: 1174
diff changeset
   120
        val doc = Ksoup.parse(htmlDocWithOneAtomFeed)
0
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   121
        val result = feedExtractor.extract(doc)
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   122
        assertThat(result).containsExactly(githubRecentCommitsFeedInfo)
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   123
    }
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   124
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   125
    @Test
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   126
    fun testThatWhenHtmlDocWithRelativeRssFeedsReturnCorrectFeedInfo() {
1305
ecb49cb4d40b htmlparsers: use ksoup instead of jsoup
Da Risk <da_risk@geekorum.com>
parents: 1174
diff changeset
   127
        val doc = Ksoup.parse(htmlDocWithOneRelativeRssFeed)
0
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   128
        val result = feedExtractor.extract(doc)
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   129
        assertThat(result).containsExactly(rssAdvisoryBoardFeedInfoNoTitle)
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   130
    }
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   131
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   132
    @Test
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   133
    fun testThatWhenHtmlDocWithCombinedFeedsReturnCorrectFeedInfo() {
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   134
        val result = feedExtractor.extract(htmlDocWithCombinedFeeds)
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   135
        assertThat(result).containsExactly(rssAdvisoryBoardFeedInfoNoTitle,
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   136
            rssAdvisoryBoardFeedInfo,
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   137
            githubRecentCommitsFeedInfo)
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   138
    }
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   139
14443efede32 Initial commit
Da Risk <da_risk@geekorum.com>
parents:
diff changeset
   140
}