Download imgur images
This commit is contained in:
parent
006e7d4e38
commit
61e8d79777
1 changed files with 73 additions and 0 deletions
73
archive_imgur_images/download_images.py
Executable file
73
archive_imgur_images/download_images.py
Executable file
|
|
@ -0,0 +1,73 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
DESCRIPTION = """
|
||||||
|
Download imgur images that have not yet been downloaded.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def log(image_ids, n, image_id, msg):
|
||||||
|
print(f"{image_id} ({n+1:_}/{len(image_ids):_}): {msg}")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description=DESCRIPTION,
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--info",
|
||||||
|
"-i",
|
||||||
|
type=Path,
|
||||||
|
default=Path("imgur_images.json"),
|
||||||
|
help="the json file with the links",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--dir",
|
||||||
|
"-d",
|
||||||
|
type=Path,
|
||||||
|
default=Path("imgur_images"),
|
||||||
|
help="the directory to save the images in",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print("Loading image ids")
|
||||||
|
with open(args.info) as f:
|
||||||
|
image_ids = json.load(f)["image_ids"]
|
||||||
|
|
||||||
|
args.dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
for n, image_id in enumerate(image_ids):
|
||||||
|
# Yes, I know not all images are pngs. Use file(1) or any other tool of
|
||||||
|
# your choice to fix the extensions after downloading the files.
|
||||||
|
image_path = args.dir / f"{image_id}.png"
|
||||||
|
# Yes, imgur ignores the file extension and doesn't even redirect. It
|
||||||
|
# just serves the file.
|
||||||
|
image_link = f"https://i.imgur.com/{image_id}.png"
|
||||||
|
|
||||||
|
if image_path.exists():
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
r = requests.get(image_link)
|
||||||
|
if r.status_code == 404:
|
||||||
|
log(image_ids, n, image_id, "Not found (404)")
|
||||||
|
continue
|
||||||
|
elif r.status_code != 200:
|
||||||
|
log(image_ids, n, image_id, f"Weird status code: {r.status_code}")
|
||||||
|
continue
|
||||||
|
with open(image_path, "wb") as f:
|
||||||
|
f.write(r.content)
|
||||||
|
log(image_ids, n, image_id, "Downloaded")
|
||||||
|
except Exception as e:
|
||||||
|
log(image_ids, n, image_id, f"Error fetching {image_link}: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Loading…
Add table
Add a link
Reference in a new issue