I would like to download all images from the albumartporn communitty

  • geneva_convenience@lemmy.ml
    link
    fedilink
    arrow-up
    4
    arrow-down
    1
    ·
    edit-2
    1 day ago

    People tend to hate on AI but this is what it was made for.

    all images part1 https://files.catbox.moe/1o0cgg.zip

    all images part2 https://files.catbox.moe/t3pk4k.zip

    Slop generated AI script:
    
    import os
    import requests
    from urllib.parse import urlparse
    from tqdm import tqdm
    import logging
    
    # Set up logging
    logging.basicConfig(
        level=logging.DEBUG,
        format='%(asctime)s - %(levelname)s - %(message)s'
    )
    logger = logging.getLogger(__name__)
    
    # Config
    COMMUNITY = "albumartporn"
    LEMMA_INSTANCE = "https://lemmy.world/"  # You can change this to a different instance
    DEST_FOLDER = "albumartporn_images"
    MAX_PAGES = 100  # Increased to download more images
    SORT = "TopAll"  # Changed to get the best quality images first
    
    os.makedirs(DEST_FOLDER, exist_ok=True)
    
    def get_posts(page):
        url = f"{LEMMA_INSTANCE}/api/v3/post/list"
        params = {
            "community_name": COMMUNITY,
            "sort": SORT,
            "page": page
        }
        try:
            logger.debug(f"Fetching posts from page {page}")
            resp = requests.get(url, params=params)
            resp.raise_for_status()
            posts = resp.json().get("posts", [])
            logger.debug(f"Found {len(posts)} posts on page {page}")
            return posts
        except Exception as e:
            logger.error(f"Error fetching posts from page {page}: {e}")
            return []
    
    def download_image(url, filename):
        try:
            logger.debug(f"Downloading image from {url}")
            resp = requests.get(url, stream=True, timeout=10)
            resp.raise_for_status()
            file_size = int(resp.headers.get('content-length', 0))
            logger.debug(f"Image size: {file_size} bytes")
            
            with open(filename, "wb") as f:
                for chunk in resp.iter_content(1024):
                    f.write(chunk)
            logger.debug(f"Successfully downloaded {filename}")
        except Exception as e:
            logger.error(f"Failed to download {url}: {e}")
    
    def is_image_url(url):
        is_img = url.lower().endswith((".jpg", ".jpeg", ".png", ".gif", ".webp"))
        logger.debug(f"URL {url} is image: {is_img}")
        return is_img
    
    def main():
        logger.info(f"Starting download from {COMMUNITY} community")
        logger.info(f"Sorting by: {SORT}")
        logger.info(f"Maximum pages to process: {MAX_PAGES}")
        
        image_count = 0
        for page in range(1, MAX_PAGES + 1):
            logger.info(f"Processing page {page}/{MAX_PAGES}")
            posts = get_posts(page)
            if not posts:
                logger.warning(f"No more posts on page {page}.")
                break
    
            for post in tqdm(posts, desc=f"Page {page}"):
                post_data = post.get("post", {})
                url = post_data.get("url")
    
                if not url:
                    logger.debug("Post has no URL, skipping")
                    continue
    
                if not is_image_url(url):
                    logger.debug(f"URL is not an image: {url}")
                    continue
    
                parsed_url = urlparse(url)
                filename = os.path.basename(parsed_url.path)
                filepath = os.path.join(DEST_FOLDER, filename)
                
                if os.path.exists(filepath):
                    logger.debug(f"File already exists: {filepath}")
                    continue
                    
                download_image(url, filepath)
                image_count += 1
    
        logger.info(f"✅ Download complete. Downloaded {image_count} images.")
    
    if __name__ == "__main__":
        main()