Iterate through pages in dump

This commit is contained in:
Joscha 2022-09-30 00:09:49 +02:00
parent 76a4fbb6ad
commit fe1db32c0e

View file

@ -1,2 +1,21 @@
import sys
from pathlib import Path
import mwxml # https://pythonhosted.org/mwxml/
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
def process_page(page):
# https://pythonhosted.org/mwxml/iteration.html#mwxml.Page
eprint(f"{page.id:8} - {page.title}")
if len(list(page)) != 1:
eprint(f"{page.id:8} - {page.title} - {len(list(page))}")
def main():
print("Hello world")
dump = mwxml.Dump.from_file(sys.stdin)
for page in dump.pages:
process_page(page)