From: Alex O. <no...@gi...> - 2025-05-21 08:39:59
|
Branch: refs/heads/webarchive-commons-2.0.1 Home: https://github.com/internetarchive/heritrix3 Commit: 567e27181d4e26ce138633ab405b4785f4e19a88 https://github.com/internetarchive/heritrix3/commit/567e27181d4e26ce138633ab405b4785f4e19a88 Author: Alex Osborne <aos...@nl...> Date: 2025-05-21 (Wed, 21 May 2025) Changed paths: M CHANGELOG.md M commons/pom.xml M commons/src/main/java/org/archive/io/Arc2Warc.java M commons/src/main/java/org/archive/io/Warc2Arc.java M commons/src/main/java/org/archive/net/UURI.java M commons/src/main/java/org/archive/net/UURIFactory.java M commons/src/main/java/org/archive/surt/SURTTokenizer.java M commons/src/main/java/org/archive/util/UriUtils.java M commons/src/test/java/org/archive/surt/SURTTokenizerTest.java M contrib/src/main/java/org/archive/crawler/frontier/AMQPUrlReceiver.java M contrib/src/main/java/org/archive/modules/AMQPPublishProcessor.java M contrib/src/main/java/org/archive/modules/extractor/ExtractorYoutubeDL.java M contrib/src/main/java/org/archive/modules/extractor/KnowledgableExtractorJS.java M contrib/src/test/java/org/archive/modules/extractor/ExtractorPDFContentTest.java M contrib/src/test/java/org/archive/modules/extractor/ExtractorYoutubeFormatStreamTest.java M engine/src/main/java/org/archive/crawler/frontier/AbstractFrontier.java M engine/src/main/java/org/archive/crawler/frontier/FrontierJournal.java M engine/src/main/java/org/archive/crawler/frontier/HostnameQueueAssignmentPolicy.java M engine/src/main/java/org/archive/crawler/postprocessor/CandidatesProcessor.java M engine/src/main/java/org/archive/crawler/postprocessor/DispositionProcessor.java M engine/src/main/java/org/archive/crawler/prefetch/PreconditionEnforcer.java M engine/src/main/java/org/archive/crawler/reporting/CrawlerLoggerModule.java M engine/src/test/java/org/archive/crawler/datamodel/CrawlURITest.java M engine/src/test/java/org/archive/crawler/frontier/BdbMultipleWorkQueuesTest.java M engine/src/test/java/org/archive/crawler/frontier/FrontierJournalTest.java M engine/src/test/java/org/archive/crawler/prefetch/QuotaEnforcerTest.java M engine/src/test/java/org/archive/crawler/util/BdbUriUniqFilterTest.java M engine/src/test/java/org/archive/crawler/util/BloomUriUniqFilterTest.java M engine/src/test/java/org/archive/crawler/util/FPUriUniqFilterTest.java M engine/src/test/java/org/archive/crawler/util/TopNSetTest.java M engine/src/test/java/org/archive/modules/fetcher/FormAuthTest.java M modules/src/main/java/org/archive/modules/CrawlURI.java M modules/src/main/java/org/archive/modules/Processor.java M modules/src/main/java/org/archive/modules/credential/HtmlFormCredential.java M modules/src/main/java/org/archive/modules/deciderules/AddRedirectFromRootServerToScope.java M modules/src/main/java/org/archive/modules/deciderules/ExternalGeoLocationDecideRule.java M modules/src/main/java/org/archive/modules/deciderules/ResourceNoLongerThanDecideRule.java M modules/src/main/java/org/archive/modules/extractor/Extractor.java M modules/src/main/java/org/archive/modules/extractor/ExtractorCSS.java M modules/src/main/java/org/archive/modules/extractor/ExtractorDOC.java M modules/src/main/java/org/archive/modules/extractor/ExtractorHTML.java M modules/src/main/java/org/archive/modules/extractor/ExtractorHTTP.java M modules/src/main/java/org/archive/modules/extractor/ExtractorImpliedURI.java M modules/src/main/java/org/archive/modules/extractor/ExtractorJS.java M modules/src/main/java/org/archive/modules/extractor/ExtractorMultipleRegex.java M modules/src/main/java/org/archive/modules/extractor/ExtractorPDF.java M modules/src/main/java/org/archive/modules/extractor/ExtractorRobotsTxt.java M modules/src/main/java/org/archive/modules/extractor/ExtractorSitemap.java M modules/src/main/java/org/archive/modules/extractor/ExtractorURI.java M modules/src/main/java/org/archive/modules/extractor/ExtractorXML.java M modules/src/main/java/org/archive/modules/extractor/JerichoExtractorHTML.java M modules/src/main/java/org/archive/modules/extractor/UriErrorLoggerModule.java M modules/src/main/java/org/archive/modules/fetcher/AbstractCookieStore.java M modules/src/main/java/org/archive/modules/fetcher/FetchDNS.java M modules/src/main/java/org/archive/modules/fetcher/FetchFTP.java M modules/src/main/java/org/archive/modules/fetcher/FetchHTTP.java M modules/src/main/java/org/archive/modules/fetcher/FetchHTTP2.java M modules/src/main/java/org/archive/modules/fetcher/FetchHTTPCookieStore.java M modules/src/main/java/org/archive/modules/fetcher/FetchHTTPRequest.java M modules/src/main/java/org/archive/modules/fetcher/FetchSFTP.java M modules/src/main/java/org/archive/modules/fetcher/FetchWhois.java M modules/src/main/java/org/archive/modules/forms/FormLoginProcessor.java M modules/src/main/java/org/archive/modules/net/CrawlServer.java M modules/src/main/java/org/archive/modules/net/RobotsPolicy.java M modules/src/main/java/org/archive/modules/net/ServerCache.java M modules/src/main/java/org/archive/modules/seeds/TextSeedModule.java M modules/src/main/java/org/archive/modules/writer/ARCWriterProcessor.java M modules/src/main/java/org/archive/state/ModuleTestBase.java M modules/src/test/java/org/archive/modules/canonicalize/FixupQueryStringTest.java M modules/src/test/java/org/archive/modules/canonicalize/RegexRuleTest.java M modules/src/test/java/org/archive/modules/canonicalize/RulesCanonicalizationPolicyTest.java M modules/src/test/java/org/archive/modules/canonicalize/StripSessionCFIDsTest.java M modules/src/test/java/org/archive/modules/canonicalize/StripSessionIDsTest.java M modules/src/test/java/org/archive/modules/canonicalize/StripUserinfoRuleTest.java M modules/src/test/java/org/archive/modules/canonicalize/StripWWWNRuleTest.java M modules/src/test/java/org/archive/modules/canonicalize/StripWWWRuleTest.java M modules/src/test/java/org/archive/modules/deciderules/MatchesListRegexDecideRuleTest.java M modules/src/test/java/org/archive/modules/deciderules/MatchesStatusCodeDecideRuleTest.java M modules/src/test/java/org/archive/modules/deciderules/NotMatchesStatusCodeDecideRuleTest.java M modules/src/test/java/org/archive/modules/deciderules/ViaSurtPrefixedDecideRuleTest.java M modules/src/test/java/org/archive/modules/extractor/ExtractorHTMLTest.java M modules/src/test/java/org/archive/modules/extractor/JerichoExtractorHTMLTest.java M modules/src/test/java/org/archive/modules/extractor/UnitTestUriLoggerModule.java M modules/src/test/java/org/archive/modules/fetcher/CookieFetchHTTPIntegrationTest.java M modules/src/test/java/org/archive/modules/fetcher/FetchHTTPTest.java M modules/src/test/java/org/archive/modules/forms/FormLoginProcessorTest.java M modules/src/test/java/org/archive/modules/net/ServerCacheTest.java M modules/src/test/java/org/archive/modules/recrawl/ContentDigestHistoryTest.java Log Message: ----------- Upgrade webarchive-commons from 1.3.0 to 2.0.1 (removes httpclient 3) To unsubscribe from these emails, change your notification settings at https://github.com/internetarchive/heritrix3/settings/notifications |