I would like to download all images from the albumartporn communitty
You must log in or register to comment.
People tend to hate on AI but this is what it was made for.
all images part1 https://files.catbox.moe/1o0cgg.zip
all images part2 https://files.catbox.moe/t3pk4k.zip
Slop generated AI script:
import os import requests from urllib.parse import urlparse from tqdm import tqdm import logging # Set up logging logging.basicConfig( level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # Config COMMUNITY = "albumartporn" LEMMA_INSTANCE = "https://lemmy.world/" # You can change this to a different instance DEST_FOLDER = "albumartporn_images" MAX_PAGES = 100 # Increased to download more images SORT = "TopAll" # Changed to get the best quality images first os.makedirs(DEST_FOLDER, exist_ok=True) def get_posts(page): url = f"{LEMMA_INSTANCE}/api/v3/post/list" params = { "community_name": COMMUNITY, "sort": SORT, "page": page } try: logger.debug(f"Fetching posts from page {page}") resp = requests.get(url, params=params) resp.raise_for_status() posts = resp.json().get("posts", []) logger.debug(f"Found {len(posts)} posts on page {page}") return posts except Exception as e: logger.error(f"Error fetching posts from page {page}: {e}") return [] def download_image(url, filename): try: logger.debug(f"Downloading image from {url}") resp = requests.get(url, stream=True, timeout=10) resp.raise_for_status() file_size = int(resp.headers.get('content-length', 0)) logger.debug(f"Image size: {file_size} bytes") with open(filename, "wb") as f: for chunk in resp.iter_content(1024): f.write(chunk) logger.debug(f"Successfully downloaded {filename}") except Exception as e: logger.error(f"Failed to download {url}: {e}") def is_image_url(url): is_img = url.lower().endswith((".jpg", ".jpeg", ".png", ".gif", ".webp")) logger.debug(f"URL {url} is image: {is_img}") return is_img def main(): logger.info(f"Starting download from {COMMUNITY} community") logger.info(f"Sorting by: {SORT}") logger.info(f"Maximum pages to process: {MAX_PAGES}") image_count = 0 for page in range(1, MAX_PAGES + 1): logger.info(f"Processing page {page}/{MAX_PAGES}") posts = get_posts(page) if not posts: logger.warning(f"No more posts on page {page}.") break for post in tqdm(posts, desc=f"Page {page}"): post_data = post.get("post", {}) url = post_data.get("url") if not url: logger.debug("Post has no URL, skipping") continue if not is_image_url(url): logger.debug(f"URL is not an image: {url}") continue parsed_url = urlparse(url) filename = os.path.basename(parsed_url.path) filepath = os.path.join(DEST_FOLDER, filename) if os.path.exists(filepath): logger.debug(f"File already exists: {filepath}") continue download_image(url, filepath) image_count += 1 logger.info(f"✅ Download complete. Downloaded {image_count} images.") if __name__ == "__main__": main()
There isn’t a straightforward way to do it as far as I can see - most likely because instances usually don’t want tonnes of requests for tonnes of data.
If you have knowledge in programming it would be feasible to write a script that either uses the Lemmy API to get this, or otherwise web scrape it.