#!/usr/bin/env python # # make_inst_index.py - v0.2 # # Copyright (c) 2008 - Rich Burridge - Sun Microsystems Inc. # All Rights Reserved. # # Usage: # # $ python ./make_inst_index.py > index.html # # Script to look at all the filenames in a specified directory that are the # saved PDF's of various projects from the Instructables web site, then use # the various Instructable web pages to create an index.html file that # shows thumbnails and titles for each PDF file. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published # by the Free Software Foundation; either version 2, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. import os import sys import urllib # -------- START OF USER CONFIGURATION SECTION -------- # Directory containing the saved Instructables PDF's. # instDir = "/home/richb/Desktop/Instructables" # Maxium number of Instructables rating pages. # maxPage = 971 # -------- END OF USER CONFIGURATION SECTION -------- # Base URL of the Instructables by rating. (20 items per page). # url = "http://www.instructables.com/tag/type:id/?sort=RATING&offset=" # The list of PDF files to search for. # PDF_files = [] # Whether to print out debug messages. # debug = True def get_next_inst_page(url): results = urllib.urlopen(url).read() start = results.find("hugeThumbs") for i in range(0, 20): token = "/id/" begin = results.find(token, start) end = results.find('">', begin + len(token)) id = results[begin+len(token):end-1] + ".pdf" start = end token = "/files/deriv" begin = results.find(token, start) end = results.find('"', begin + len(token)) image = "http://www.instructables.com/" + results[begin:end] start = end token = "/id/" begin = results.find(token, start) end = results.find("", begin + len(token)) begin = results.find(">", begin + len(token)) label = results[begin+1:end] start = end if id in PDF_files: if debug: print >> sys.stderr, "FOUND: %s ( %s )" % (label, id) print "" % \ (id, image, label) PDF_files.remove(id) if debug: print >> sys.stderr, "PDF list length: %d" % len(PDF_files) if len(PDF_files) == 0: sys.exit(0) if __name__ == "__main__": for dir, _, files in os.walk(instDir): PDF_files.extend(file for file in files) print "Instructables" for i in range(0, maxPage): if debug: print >> sys.stderr, "Checking page: %d" % i get_next_inst_page(url + str(i * 20)) print "" # ChangeLog # 21st Dec 2008 - 0.2 - richb - Adjusted FOUND debug message for a README.txt. # 21st Dec 2008 - 0.1 - richb - Initial version.