#!/usr/bin/env python
#
# make_inst_index.py - v0.2
#
# Copyright (c) 2008 - Rich Burridge - Sun Microsystems Inc.
# All Rights Reserved.
#
# Usage:
#
# $ python ./make_inst_index.py > index.html
#
# Script to look at all the filenames in a specified directory that are the
# saved PDF's of various projects from the Instructables web site, then use
# the various Instructable web pages to create an index.html file that
# shows thumbnails and titles for each PDF file.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
import os
import sys
import urllib
# -------- START OF USER CONFIGURATION SECTION --------
# Directory containing the saved Instructables PDF's.
#
instDir = "/home/richb/Desktop/Instructables"
# Maxium number of Instructables rating pages.
#
maxPage = 971
# -------- END OF USER CONFIGURATION SECTION --------
# Base URL of the Instructables by rating. (20 items per page).
#
url = "http://www.instructables.com/tag/type:id/?sort=RATING&offset="
# The list of PDF files to search for.
#
PDF_files = []
# Whether to print out debug messages.
#
debug = True
def get_next_inst_page(url):
results = urllib.urlopen(url).read()
start = results.find("hugeThumbs")
for i in range(0, 20):
token = "/id/"
begin = results.find(token, start)
end = results.find('">', begin + len(token))
id = results[begin+len(token):end-1] + ".pdf"
start = end
token = "/files/deriv"
begin = results.find(token, start)
end = results.find('"', begin + len(token))
image = "http://www.instructables.com/" + results[begin:end]
start = end
token = "/id/"
begin = results.find(token, start)
end = results.find("", begin + len(token))
begin = results.find(">", begin + len(token))
label = results[begin+1:end]
start = end
if id in PDF_files:
if debug:
print >> sys.stderr, "FOUND: %s ( %s )" % (label, id)
print "" % \
(id, image, label)
PDF_files.remove(id)
if debug:
print >> sys.stderr, "PDF list length: %d" % len(PDF_files)
if len(PDF_files) == 0:
sys.exit(0)
if __name__ == "__main__":
for dir, _, files in os.walk(instDir):
PDF_files.extend(file for file in files)
print "