#!/usr/bin/env python # # auto_bookmarks.py - v0.2 # # Copyright (c) 2008 - Rich Burridge - Sun Microsystems Inc. # All Rights Reserved. # # Script to automatically generate links for a variety of topics, for a # variety of languages, tools, areas of computer technology and many other # subjects that I'm interested in. # These are written to standard out in HTML format. # # Google AJAX Search API usage based on the example from the DCortesi . blog: # http://dcortesi.com/2008/05/28/google-ajax-search-api-example-python-code/ # # Uses simplejson, a simple, fast, extensible JavaScript Object Notation # (JSON) encoder/decoder for Python: # http://pypi.python.org/pypi/simplejson # # Usage: # $ python auto_bookmarks.py > auto_bookmarks.html # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published # by the Free Software Foundation; either version 2, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. import urllib import simplejson # List of languages, tools, areas of computer technology etc. to check. # subjects = [ 'ActionScript', 'Adobe Acrobat', 'Adobe AIR', 'Adobe Creative Suite', 'Ajax', 'Algorithms', 'Amazon', 'Apache', 'Arduino', 'Artificial Intelligence', 'ASP', 'Assembly Language', 'Astronomy', 'Automata Theory', 'Awk', 'Backgammon', 'Bash', 'BitTorrent', 'Blogging', 'Bluetooth', 'Board Games', 'Botnets', 'BSD', 'Chess', 'C Language', 'C# Language', 'C++ Language', 'Collective Intelligence', 'Computer Architecture', 'Computer Audio', 'Computer Networking', 'Cryptography', 'CSS', 'Data Structure', 'Debugging', 'Design Patterns', 'Digital Imaging', 'Digital Photography', 'Digital Video', 'DirectX', 'Distributed Systems', 'Dojo', 'DSP', 'eBay', 'Electronics', 'Electronics Circuits', 'Embedded Linux Systems', 'Firefox', 'Firewalls', 'Flash', 'Flex Language', 'Flickr', 'Games', 'Gaming', 'GarageBand', 'GIMP', 'Gmail', 'GNOME', 'Google', 'Google Apps', 'Google Earth', 'Google Maps', 'Greasemonkey', 'Gtk+', 'Hacking', 'Home Theater', 'HTML', 'iPhone', 'iPod', 'IRC', 'iTunes', 'Java Language', 'JavaScript Language', 'Ksh', 'LaTeX', 'Linksys', 'Linux', 'Linux Desktop', 'Linux Multimedia', 'Linux Power Tools', 'Lisp Language', 'Mac Cocoa', 'Mac iLife', 'Mac iMovie', 'Mac OS X', 'Mac Programming', 'Macromedia Dreamweaver', 'Mapping', 'Mapping', 'Mathematica', 'Maya', 'Mind Performance', 'Modeling', 'MySQL', 'Network Security', 'Neural Networks', 'Nokia Phone', 'Numerical Recipes', 'Online Games', 'OpenGL', 'OpenSolaris', 'Othello', 'Paint Shop Pro', 'Pattern Recognition', 'PC', 'PC Building', 'PDF', 'Perl Language', 'Photoshop', 'PHP', 'Podcasting', 'Processing Language', 'PSP', 'Puzzles', 'Python Language', 'Radio', 'Rails', 'Recusion', 'Regular Expressions', 'Retro Gaming', 'Robotics', 'RSS', 'Ruby Language', 'Scheme Language', 'Sed', 'Shell', 'Sketchup', 'Skype', 'Smalltalk Language', 'Smart Home', 'Solaris', 'Spidering', 'SQL', 'Statistics', 'Tcl Language', 'TeX', 'Thunderbird', 'TiVo', 'Tk', 'Ubuntu', 'USB', 'Vi', 'Video Blogging', 'Vim', 'VoIP', 'Web Design', 'Wii', 'Window XP', 'Wireless', 'XHTML', 'XML', 'XPath', 'XPointer', 'XSLT', 'Yahoo', 'YouTube' ] # List of topics to check for each subject. # topics = [ 'Home Page', 'Reference', 'FAQ', 'Tutorial', 'Programming', 'Cookbook', 'Hacks', 'Power Tools', 'HOWTO' ] # The Google AJAX search URL. # search_URL = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&%s' def do_search(query_string): """Perform a Google AJAX search. Return the best result (i.e. the one which has the most keywords from the query string in the URL title). Arguments: - query_string: the search string. """ query = urllib.urlencode({ 'q' : query_string }) url = search_URL % (query) search_results = urllib.urlopen(url) json = simplejson.loads(search_results.read()) results = json['responseData']['results'] best_count = 0 best_result = results[0]['url'] for result in results: cur_count = 0 title = result['title'] query_tokens = query_string.split(' ') for query_token in query_tokens: if title.find(query_token) != -1: cur_count += 1 if cur_count > best_count: best_result = result['url'] best_count = cur_count return best_result def write_preamble(): """Write out the initial HTML commands for the page.""" print "" print "" print "Auto Bookmarks" print "" print "" print "

Auto Bookmarks

" def query_language(subject, topic): """For this subject and this topic, get the best Google search result (the one that has the most subject and topic words in the title) and return an HTML hyperlink string. Arguments: - subject: the subject of interest. - topic: the topic of interest. """ query_string = "%s %s" % (subject, topic) result = do_search(query_string) print "[%s] " % (result, topic) def write_postamble(): """Write out the final HTML commands for the page.""" print "" print "" if __name__ == "__main__": write_preamble() for subject in subjects: print "
%s: " % subject for topic in topics: query_language(subject, topic) print write_postamble() # ChangeLog # 5th Sep 2008 - 0.2 - richb - Extended to many more subjects and topics. # Better google search result pick algorithm. # 1st Sep 2008 - 0.1 - richb - Initial version.