The track information stored in iTunes is pretty interesting from a visualization point of view, as it contains dates, durations, categories, groupings — all the sorts of things that make for complex, interesting data to look at.The only issue is … it’s in iTunes, and I’d like to get a CSV version of it so I can use it in a bunch of tools.
So, here is the result; a couple of Python scripts that use standard libraries to read the XML file exported by iTunes and convert it to CSV. It’s not general or robust code, just some script that worked for me and should be pretty easy to modify for you. I’m not a Pythonista, mostly doing Java, so apologies for non-idiomatic usage. Feel free to correct or suggest in the comments as this is also a learning exercise for me.
''' MUSIC.PY ''' import datetime, math EPOCH = datetime.datetime(1970,1,1) def remove_whitespace(node): to_kill = [] # build list of nodes to remove WS from for child in node.childNodes: if child.nodeType == node.TEXT_NODE and not child.data.strip(): to_kill.append(child) elif child.hasChildNodes(): remove_whitespace(child) # Remove the items and unlink to save memory for node in to_kill: node.parentNode.removeChild(node) node.unlink() def _isType(node, name): return node.hasChildNodes() and node.childNodes[0].nodeValue == name def getItems(node, name): ch = node.childNodes items = [ ch[i] for i in xrange(1,len(ch)) if _isType(ch[i-1], name) ] return items[0].getElementsByTagName('dict') def get_item(node, target): c = node.childNodes item = [ c[i+1] for i in xrange(0,len(c)-1) if _isType(c[i], target) ] if len(item) == 0: if target == 'Play Count': return 0 return None result = _get_item_value(item[0]) if target == 'Total Time': return math.floor(result / 1000) if result == None: print "No children for", target, " ... ", item[0] return result def _get_item_value(item): ''' Convert the DOM item to a native type ''' if not item.hasChildNodes(): return None data = item.childNodes[0].nodeValue if item.nodeName == 'string': return data.replace('"', "'") if item.nodeName == 'integer': return int(data) # it's a date d = datetime.datetime.strptime(data, '%Y-%m-%dT%H:%M:%SZ') delta = d - EPOCH return delta.days ''' MAIN.PY ''' RC = "/Users/graham/Documents/data/music/Library.xml" TRACKS = "/Users/graham/Documents/data/music/LibraryTracks.csv" TRACK_INFO = ["Track ID", "Name", "Artist", "Genre", "Total Time", "Year", "Date Added", "Play Count", "Rating", ] from xml.dom import minidom from music import * import csv srcfile = open(SRC) dom = minidom.parse(srcfile) remove_whitespace(dom) srcfile.close() main = dom.getElementsByTagName('plist')[0].childNodes[0] tracks = getItems(main, 'Tracks') # Write out the values to a csv file c = csv.writer(open(TRACKS, "wb")) c.writerow(TRACK_INFO) for i in tracks: row = map(lambda x: get_item(i, x), TRACK_INFO) c.writerow(row)