""" Wikipedia Picture of the Day (POTD) download example Baseline synchronous example for comparison: downloads metadata and images in the simple but slow synchronous way i.e. one after the other. """ import calendar import datetime import re import os import io import time import requests import argparse SAVE_DIR = 'pictures/' POTD_BASE_URL = 'http://en.wikipedia.org/wiki/Template:POTD/' class NoPictureForDate(Exception): '''No Picture of the Day found for {day}''' def build_page_url(iso_date): return POTD_BASE_URL + iso_date def fetch(url): response = requests.get(url) return response def extract_image_url(html): re_image = r'src="(//upload\..*?)"' image_url = re.search(re_image, html) return 'http:' + image_url.group(1) def format_date(year, month, day): return '{year}-{month:02d}-{day:02d}'.format(**locals()) def list_days_of_month(year, month): lastday = calendar.monthrange(year, month)[1] days = [format_date(year, month, day) for day in range(1, lastday + 1)] return days def build_save_path(iso_date, url): head, filename = os.path.split(url) return os.path.join(SAVE_DIR, iso_date+'_'+filename) def save_one(iso_date, verbose): page_url = build_page_url(iso_date) response = fetch(page_url) if response.status_code != 200: msg = NoPictureForDate.__doc__.format(day=iso_date) raise NoPictureForDate(msg) img_url = extract_image_url(response.text) response = fetch(img_url) path = build_save_path(iso_date, img_url) if verbose: print('saving: '+path) with io.open(path, 'wb') as fp: fp.write(response.content) return len(response.content) def save_month(year_month, verbose): year, month = [int(s) for s in year_month.split('-')] total_size = 0 img_count = 0 dates = list_days_of_month(year, month) for date in dates: try: total_size += save_one(date, verbose) img_count += 1 except NoPictureForDate: continue return img_count, total_size def main(save_one=save_one, save_month=save_month): """Get "Picture of The Day" from English Wikipedia for a given date or month""" parser = argparse.ArgumentParser(description=main.__doc__) parser.add_argument('date', help='year, month and (optional) day in YYYY-MM-DD format') parser.add_argument('-q', '--max_qty', type=int, help='maximum number of files to download') parser.add_argument('-v', '--verbose', action='store_true', help='display progress information') args = parser.parse_args() t0 = time.time() if len(args.date) == len('YYYY-MM-DD'): img_count = 1 total_size = save_one(args.date, args.verbose) else: img_count, total_size = save_month(args.date, args.verbose) elapsed = time.time() - t0 print("images: %3d | total size: %6.1f Kbytes | elapsed time: %3ds" % (img_count, total_size/1024.0, elapsed)) if __name__ == '__main__': main()