Iterate through pages in dump
This commit is contained in:
parent
76a4fbb6ad
commit
fe1db32c0e
1 changed files with 20 additions and 1 deletions
21
sift/sift.py
21
sift/sift.py
|
|
@ -1,2 +1,21 @@
|
|||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import mwxml # https://pythonhosted.org/mwxml/
|
||||
|
||||
|
||||
def eprint(*args, **kwargs):
|
||||
print(*args, file=sys.stderr, **kwargs)
|
||||
|
||||
|
||||
def process_page(page):
|
||||
# https://pythonhosted.org/mwxml/iteration.html#mwxml.Page
|
||||
eprint(f"{page.id:8} - {page.title}")
|
||||
if len(list(page)) != 1:
|
||||
eprint(f"{page.id:8} - {page.title} - {len(list(page))}")
|
||||
|
||||
|
||||
def main():
|
||||
print("Hello world")
|
||||
dump = mwxml.Dump.from_file(sys.stdin)
|
||||
for page in dump.pages:
|
||||
process_page(page)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue