rumimevlevi to Asklemmy@lemmy.ml · 2 days agoAny way to download all images from an lemmy communitiesmessage-squaremessage-square2linkfedilinkarrow-up118arrow-down12file-text
arrow-up116arrow-down1message-squareAny way to download all images from an lemmy communitiesrumimevlevi to Asklemmy@lemmy.ml · 2 days agomessage-square2linkfedilinkfile-text
minus-squaregeneva_convenience@lemmy.mllinkfedilinkarrow-up4arrow-down1·edit-21 day agoPeople tend to hate on AI but this is what it was made for. all images part1 https://files.catbox.moe/1o0cgg.zip all images part2 https://files.catbox.moe/t3pk4k.zip Slop generated AI script: import os import requests from urllib.parse import urlparse from tqdm import tqdm import logging # Set up logging logging.basicConfig( level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # Config COMMUNITY = "albumartporn" LEMMA_INSTANCE = "https://lemmy.world/" # You can change this to a different instance DEST_FOLDER = "albumartporn_images" MAX_PAGES = 100 # Increased to download more images SORT = "TopAll" # Changed to get the best quality images first os.makedirs(DEST_FOLDER, exist_ok=True) def get_posts(page): url = f"{LEMMA_INSTANCE}/api/v3/post/list" params = { "community_name": COMMUNITY, "sort": SORT, "page": page } try: logger.debug(f"Fetching posts from page {page}") resp = requests.get(url, params=params) resp.raise_for_status() posts = resp.json().get("posts", []) logger.debug(f"Found {len(posts)} posts on page {page}") return posts except Exception as e: logger.error(f"Error fetching posts from page {page}: {e}") return [] def download_image(url, filename): try: logger.debug(f"Downloading image from {url}") resp = requests.get(url, stream=True, timeout=10) resp.raise_for_status() file_size = int(resp.headers.get('content-length', 0)) logger.debug(f"Image size: {file_size} bytes") with open(filename, "wb") as f: for chunk in resp.iter_content(1024): f.write(chunk) logger.debug(f"Successfully downloaded {filename}") except Exception as e: logger.error(f"Failed to download {url}: {e}") def is_image_url(url): is_img = url.lower().endswith((".jpg", ".jpeg", ".png", ".gif", ".webp")) logger.debug(f"URL {url} is image: {is_img}") return is_img def main(): logger.info(f"Starting download from {COMMUNITY} community") logger.info(f"Sorting by: {SORT}") logger.info(f"Maximum pages to process: {MAX_PAGES}") image_count = 0 for page in range(1, MAX_PAGES + 1): logger.info(f"Processing page {page}/{MAX_PAGES}") posts = get_posts(page) if not posts: logger.warning(f"No more posts on page {page}.") break for post in tqdm(posts, desc=f"Page {page}"): post_data = post.get("post", {}) url = post_data.get("url") if not url: logger.debug("Post has no URL, skipping") continue if not is_image_url(url): logger.debug(f"URL is not an image: {url}") continue parsed_url = urlparse(url) filename = os.path.basename(parsed_url.path) filepath = os.path.join(DEST_FOLDER, filename) if os.path.exists(filepath): logger.debug(f"File already exists: {filepath}") continue download_image(url, filepath) image_count += 1 logger.info(f"✅ Download complete. Downloaded {image_count} images.") if __name__ == "__main__": main()
People tend to hate on AI but this is what it was made for.
all images part1 https://files.catbox.moe/1o0cgg.zip
all images part2 https://files.catbox.moe/t3pk4k.zip
Slop generated AI script:
import os import requests from urllib.parse import urlparse from tqdm import tqdm import logging # Set up logging logging.basicConfig( level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # Config COMMUNITY = "albumartporn" LEMMA_INSTANCE = "https://lemmy.world/" # You can change this to a different instance DEST_FOLDER = "albumartporn_images" MAX_PAGES = 100 # Increased to download more images SORT = "TopAll" # Changed to get the best quality images first os.makedirs(DEST_FOLDER, exist_ok=True) def get_posts(page): url = f"{LEMMA_INSTANCE}/api/v3/post/list" params = { "community_name": COMMUNITY, "sort": SORT, "page": page } try: logger.debug(f"Fetching posts from page {page}") resp = requests.get(url, params=params) resp.raise_for_status() posts = resp.json().get("posts", []) logger.debug(f"Found {len(posts)} posts on page {page}") return posts except Exception as e: logger.error(f"Error fetching posts from page {page}: {e}") return [] def download_image(url, filename): try: logger.debug(f"Downloading image from {url}") resp = requests.get(url, stream=True, timeout=10) resp.raise_for_status() file_size = int(resp.headers.get('content-length', 0)) logger.debug(f"Image size: {file_size} bytes") with open(filename, "wb") as f: for chunk in resp.iter_content(1024): f.write(chunk) logger.debug(f"Successfully downloaded {filename}") except Exception as e: logger.error(f"Failed to download {url}: {e}") def is_image_url(url): is_img = url.lower().endswith((".jpg", ".jpeg", ".png", ".gif", ".webp")) logger.debug(f"URL {url} is image: {is_img}") return is_img def main(): logger.info(f"Starting download from {COMMUNITY} community") logger.info(f"Sorting by: {SORT}") logger.info(f"Maximum pages to process: {MAX_PAGES}") image_count = 0 for page in range(1, MAX_PAGES + 1): logger.info(f"Processing page {page}/{MAX_PAGES}") posts = get_posts(page) if not posts: logger.warning(f"No more posts on page {page}.") break for post in tqdm(posts, desc=f"Page {page}"): post_data = post.get("post", {}) url = post_data.get("url") if not url: logger.debug("Post has no URL, skipping") continue if not is_image_url(url): logger.debug(f"URL is not an image: {url}") continue parsed_url = urlparse(url) filename = os.path.basename(parsed_url.path) filepath = os.path.join(DEST_FOLDER, filename) if os.path.exists(filepath): logger.debug(f"File already exists: {filepath}") continue download_image(url, filepath) image_count += 1 logger.info(f"✅ Download complete. Downloaded {image_count} images.") if __name__ == "__main__": main()