#! /usr/bin/python

# for handling the api via http
import urllib, urllib2, base64, httplib

# for xml parsing
from xml.dom.ext.reader import Sax2

# other useful system libs
import getopt
import string
import sys
import time

# globals, you can set the values here if you import deli.py,
# and override on the command-line if you run it
bookmarks="bookmarks.html"
username="username"
password="password"
baseTag="ExperimentalImports"  # this will assign the specified tag(s) to every url imported
                               # by the script, may be handy later

# update this if the the api url changes
baseUrl="http://del.icio.us/api/"

# debug
debug = False

# urllib2 handling code from the great tutorial at:
# http://www.voidspace.org.uk/python/articles/urllib2.shtml

# Table mapping response codes to messages; entries have the
# form {code: (shortmessage, longmessage)}.
httpresponses = {
    100: ('Continue', 'Request received, please continue'),
    101: ('Switching Protocols',
          'Switching to new protocol; obey Upgrade header'),

    200: ('OK', 'Request fulfilled, document follows'),
    201: ('Created', 'Document created, URL follows'),
    202: ('Accepted',
          'Request accepted, processing continues off-line'),
    203: ('Non-Authoritative Information',
            'Request fulfilled from cache'),
    204: ('No response', 'Request fulfilled, nothing follows'),
    205: ('Reset Content', 'Clear input form for further input.'),
    206: ('Partial Content', 'Partial content follows.'),

    300: ('Multiple Choices',
          'Object has several resources -- see URI list'),
    301: ('Moved Permanently',
            'Object moved permanently -- see URI list'),
    302: ('Found', 'Object moved temporarily -- see URI list'),
    303: ('See Other', 'Object moved -- see Method and URL list'),
    304: ('Not modified',
          'Document has not changed since given time'),
    305: ('Use Proxy',
            'You must use proxy specified in Location'
            ' to access this resource.'),
    307: ('Temporary Redirect',
          'Object moved temporarily -- see URI list'),

    400: ('Bad request',
          'Bad request syntax or unsupported method'),
    401: ('Unauthorized',
          'No permission -- see authorization schemes'),
    402: ('Payment required',
          'No payment -- see charging schemes'),
    403: ('Forbidden',
          'Request forbidden -- authorization will not help'),
    404: ('Not Found', 'Nothing matches the given URI'),
    405: ('Method Not Allowed',
          'Specified method is invalid for this server.'),
    406: ('Not Acceptable',
            'URI not available in preferred format.'),
    407: ('Proxy Authentication Required',
            'You must authenticate with '
            'this proxy before proceeding.'),
    408: ('Request Time-out',
            'Request timed out; try again later.'),
    409: ('Conflict', 'Request conflict.'),
    410: ('Gone',
          'URI no longer exists and has been permanently removed.'),
    411: ('Length Required', 'Client must specify Content-Length.'),
    412: ('Precondition Failed',
            'Precondition in headers is false.'),
    413: ('Request Entity Too Large', 'Entity is too large.'),
    414: ('Request-URI Too Long', 'URI is too long.'),
    415: ('Unsupported Media Type',
            'Entity body in unsupported format.'),
    416: ('Requested Range Not Satisfiable',
          'Cannot satisfy request range.'),
    417: ('Expectation Failed',
          'Expect condition could not be satisfied.'),

    500: ('Internal error', 'Server got itself in trouble'),
    501: ('Not Implemented',
          'Server does not support this operation'),
    502: ('Bad Gateway',
            'Invalid responses from another server/proxy.'),
    503: ('Service temporarily overloaded',
          'The server cannot '
          'process the request due to a high load'),
    504: ('Gateway timeout',
          'The gateway server did not receive a timely response'),
    505: ('HTTP Version not supported', 'Cannot fulfill request.'),
    }

# TODO: handle 503's, or retries for other reasons ...
def api(method, args):
    maxRetries = 5

    while maxRetries > 0:

        # delicious api asks for a 1 second delay between calls: drag!
        # impose it here in hardcode fashion; an improvement would
        # be to latch the time of last api call and sleep to the next 1s
        # interval.  whatever.
        #
        time.sleep(1)
        
        url = baseUrl + method
        request = urllib2.Request(url, args)
        b64 = base64.encodestring('%s:%s' % (username, password))[:-1]
        request.add_header('Authorization', 'Basic %s' % b64)
        try:
            data = urllib2.urlopen(request)
            return True,data
        except IOError, e:
            code = e.code
            if code in httpresponses:
                print "%d: %s, %s" % \
                      (code, httpresponses[code][0], httpresponses[code][1])
            else:
                print "%d: unknown code" % code
            if code == 503:
                time.sleep(1)
            maxRetries -= 1
        except httplib.BadStatusLine, line:
            # occasionally urllib2 gets a response that it chokes on
            # haven't caught one live yet to debug further ...
            print "httplib.BadStatusLine"
            print line
            maxRetries -= 1
            
    return False,data

def getPosts():
    posts = []
    success,data = api("posts/all", None)
    if not success:
        return

    # create Reader object
    reader = Sax2.Reader()
        
    # parse the document
    doc = reader.fromStream(data)

    for post in doc.getElementsByTagName("post"):
        posts.append(post.getAttribute("href"))
        
    return posts

def getTags():
    tags = []
    success,data = api("tags/get", None)
    if not success:
        return

    # create Reader object
    reader = Sax2.Reader()
        
    # parse the document
    doc = reader.fromStream(data)

    for tag in doc.getElementsByTagName("tag"):
        tags.append(tag.getAttribute("tag"))
        
    return tags

def renameTag(old, new):
    args = {
        "old" : old,
        "new" : new
        }

    urlArgs = urllib.urlencode(args)

    success,data = api("tags/rename?", urlArgs)
    if not success:
        return False
    
    # create Reader object
    reader = Sax2.Reader()
        
    # parse the document
    doc = reader.fromStream(data)

    # XML response is always:
    #<result>done</result>
    #
    # In DOM the "result" tag is a NodeList with 1 Element Node,
    #    said Element Node has 0 attributes and 1 childNode, whose nodeValue is "done"
    #
    result = doc.getElementsByTagName("result")
    if len(result) != 1:
        print "unexpected xml response:"
        print data
        return False
    
    if hasattr(result[0], "childNodes") and result[0].childNodes[0].nodeValue == "done":
        print "renamed"
        return True

    print "unexpected xml data"
    print data
    return result
    return False

def post(url, desc, tags):
    tagsString = string.join(tags)

    args = {
        "url" : url,
        "description" : desc,
        "tags" : tagsString,
        "replace" : "no"            # docs say no is default, but play it safe
        }

    urlArgs = urllib.urlencode(args)
    
    success,data = api("posts/add?", urlArgs)
    if not success:
        return
    
    # create Reader object
    reader = Sax2.Reader()
        
    # parse the document
    doc = reader.fromStream(data)

    # XML response if the post was successful:
    #<result code="done" />
    #
    # XML response if the post failed:
    #<result code="something went wrong" />
    #
    # In DOM the "result" tag is a NodeList with 1 Element Node,
    #    said Element Node has 1 attribute, "code", and 0 children
    #
    
    result = doc.getElementsByTagName("result")
    if len(result) != 1:
        print "unexpected xml response: length != 1"
        print "post failed for:", desc
        print "\ttags were:", tagsString
        print data
        return False
    
    code = result[0].getAttribute("code")
    if code == "done":
        print "successfully posted:", desc
        print "\ttags were:", tagsString
        return result

    print "unexpected xml response: %s" % code
    print "post failed for:", desc
    print "\ttags were:", tagsString
    return False


#################################################
#
# HTML parser for Firefox bookmarks.html format
#
# NB: This was derived by examining the format of my bookmarks.html,
#     nothing more rigorous than that.
#
#     I haven't looked for a specification of the format, or even
#     looked at any other samples of Firefox bookmark output, so there
#     are certain to be holes in this logic.
#

from HTMLParser import HTMLParser

class MyHTMLParser(HTMLParser):

    def __init__(self):
        # this is python: explicitly invoke base class constructor
        HTMLParser.__init__(self)
        self.inH3        = False
        self.inA         = False
        self.tagCount    = 0
        self.tags        = []
        self.currentTag  = ""
        self.href        = ""
        self.description = ""
        self.ignore      = ""

    def setBaseTag(self, baseTag):
        self.tags.append(baseTag)

    def setIgnoreUrls(self, ignore):
        self.ignore = ignore
        
    # remove white space
    # remove apostrophes, quote, double-quotes, colons, commas
    def normalizeText(self, text):
        text = text.replace('\'', '')
        text = text.replace('"', '')
        text = text.replace('`', '')
        text = text.replace(':', '')
        text = text.replace(',', '')
        text = text.replace(' ', '')
        text = text.replace('	', '')
        return text

    def handle_starttag(self, tag, attrs):
        if tag == "a":
            self.inA = True
            for attr in attrs:
                if attr[0] == "href":
                    self.href = attr[1]
                    

        if tag == "h3":
            self.inH3 = True
            self.tagCount += 1

        if tag == "dl":
            pass
            #print "Entering folder list; tags are", self.tags

    def handle_endtag(self, tag):
        if tag == "h3":
            self.tags.append(self.currentTag)
            self.currentTag = ""
            self.inH3 = False

        if tag == "a":
            if debug == True:
                print
                print "href =", self.href
                print "description =", self.description
                print "tags =", self.tags
                
            # validate href
            validHref = True
            if len(self.href) == 0:
                validHref = False
            if not self.href.split(":")[0] in ["http", "https", "news", "ftp"]:
                validHref = False
            if self.href in self.ignore:
                validHref = False

            # actually post here, make sure there's a url to post
            if validHref:
                post(self.href, self.description, self.tags)
            
            self.href = ""
            self.description = ""
            self.inA = False

        # exiting a dl means end of a bookmarks folder, pop the last tag off
        if tag == "dl":
            self.tags = self.tags[:-1]

    # handle any data: note that this will miss the "escaped" stuff
    # fix this by adding handle_charref, etc methods
    def handle_data(self, data):
        if self.inH3:
            self.currentTag += self.normalizeText(data)

        if self.inA:
            self.description += data

def doit():
    # retrieve the full list of posts to avoid requests for dup's
    print "Retrieving your existing posts to speed the upload ..."
    posts = getPosts()
    print "Got them: you have %d posts on del.icio.us now" % len(posts)

    # construct and configure the parser
    parser = MyHTMLParser()
    if baseTag and len(baseTag) > 0:
        parser.setBaseTag(baseTag)
    parser.setIgnoreUrls(posts)

    # initiate the parse; this will submit requests to delicious
    parser.feed(open(bookmarks).read())

    # cleanup
    parser.close()

def usage():
    print "Usage: deli.py --bookmarks=<file> --username=<username> --password=<password [--tag=<tags>]"
    print "       bookmarks, username, password should be self explanatory"
    print "       tags is a white-space separated list of tags to apply to all bookmarks"

def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], "b:u:p:t:",
                                   ["bookmarks=", "username=", "password=", "tags="])
    except getopt.GetoptError:
        # print help information and exit:
        usage()
        sys.exit(2)

    # use the globals
    global bookmarks
    global username
    global password
    global baseTag
    
    for o, a in opts:
        if o in ("-b", "--bookmarks"):
            bookmarks = a
        if o in ("-u", "--username"):
            username = a
        if o in ("-p", "--password"):
            password = a
        if o in ("-t", "--tags"):
            baseTag = a

    if not bookmarks or not username or not password:
        usage()
        sys.exit(1)

    # go forth
    doit()

main()