47 lines
2.1 KiB
Python
47 lines
2.1 KiB
Python
|
#! /usr/bin/env python3
|
||
|
|
||
|
import datetime
|
||
|
import argparse
|
||
|
import sys
|
||
|
|
||
|
from simplemediawiki import build_user_agent
|
||
|
|
||
|
import ArchWiki
|
||
|
from ArchWiki.ArchWiki import language_names
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
aparser = argparse.ArgumentParser(description="Download pages from Arch Wiki and optimize them for offline browsing")
|
||
|
aparser.add_argument("--output-directory", type=str, required=True, help="Path where the downloaded pages should be stored.")
|
||
|
aparser.add_argument("--force", action="store_true", help="Ignore timestamp, always download the page from the wiki.")
|
||
|
aparser.add_argument("--clean", action="store_true", help="Clean the output directory after downloading, useful for removing pages deleted/moved on the wiki. Warning: any unknown files found in the output directory will be deleted!")
|
||
|
aparser.add_argument("--safe-filenames", action="store_true", help="Force using ASCII file names instead of the default Unicode.")
|
||
|
aparser.add_argument("--langs", type=str, nargs='+', help="Download only pages of specified languages")
|
||
|
aparser.add_argument("--list-langs", action="store_true", help="List supported languages")
|
||
|
|
||
|
args = aparser.parse_args()
|
||
|
if args.list_langs:
|
||
|
for lang in language_names.values():
|
||
|
print(lang['subtag'], lang['english'])
|
||
|
sys.exit()
|
||
|
|
||
|
if args.force:
|
||
|
epoch = datetime.datetime.utcnow()
|
||
|
else:
|
||
|
# this should be the date of the latest incompatible change
|
||
|
epoch = datetime.datetime(2016, 3, 3, 18, 0, 0)
|
||
|
|
||
|
user_agent = build_user_agent(__file__, ArchWiki.__version__, ArchWiki.__url__)
|
||
|
aw = ArchWiki.ArchWiki(user_agent=user_agent, safe_filenames=args.safe_filenames, langs=args.langs)
|
||
|
optimizer = ArchWiki.Optimizer(aw, args.output_directory)
|
||
|
|
||
|
downloader = ArchWiki.Downloader(aw, args.output_directory, epoch, optimizer=optimizer)
|
||
|
downloader.download_css()
|
||
|
aw.print_namespaces()
|
||
|
for ns in ["0", "4", "12", "14"]:
|
||
|
downloader.process_namespace(ns)
|
||
|
|
||
|
downloader.download_images()
|
||
|
|
||
|
if args.clean:
|
||
|
downloader.clean_output_directory()
|