jystewart.net : reading, writing, web development

services | portfolio | code | blog | about | contact

Code

FeedMarshal

Language: Python
Date: September 2004

This is an inelegant piece of code that forms a very important part of my python atom server implementation. The server utilises Mark Piligrim's excellent Universal Feed Parser module to convert XML feeds into python classes and dictionaries for easier manipulation. Unfortunately considerable searching did not turn up any implementation of code to convert those new data structures back into XML once they had been modified. This code was written to fill that hole.

At present it uses some iteration that is a legacy of initial explorations of the data structures and a variety of other features that could be 'cleaner'. If time allows I hope to compare it more closely with the atom syndication format specifications to make sure it will handle feeds other than those I am using it for, and to improve the implementation so that the code is more elegant, faster and more robust. But for now there are other projects!

The Code:

#!/usr/bin/python
"""FeedMarshal

This is a rough and ready class that takes Universal Feed Parser objects and returns xml dom
objects (atom 0.3). It was written to scratch a very particular itch. There are plans to 
update it to remove the iteration and to ensure full compliance with the atom spec, but for
now it is what it is.

Recommended: Python 2.3 or later

Please send any suggestions to james@jystewart.net

By James Stewart - 23rd September 2004
"""

__version__ = "0.1"
__license__ = "Python"
__copyright__ = "Copyright 2004 James Stewart"
__author__ = "James Stewart <http://jystewart.net>"

import feedparser
from xml.dom import minidom

class FeedMarshal:

	def __init__(self, feed):
		self.xml = minidom.parseString("<feed></feed>")
		self.xml.getElementsByTagName('feed')[0].setAttribute('version','0.3')
		self.feed = feed

	def run(self):
		self.iterate(self.feed)
		return self.xml
		
	def iterate(self, feedvar, parent={}):
		if str(type(feedvar)) == "<class 'feedparser.FeedParserDict'>":
			for a in feedvar:
				self.iterate(a, feedvar)
		elif str(type(feedvar)) == "<type 'str'>":
			if parent.has_key(feedvar):
				if feedvar == 'links':
					for link in parent[feedvar]:
						linkage = self.parse_entry_generic(link,'link')
						if str(type(linkage)) == "<type 'instance'>":
							self.xml.getElementsByTagName('feed')[0].appendChild(linkage)
				elif feedvar == 'author_detail':
					returned = self.parse_author(parent[feedvar])
					if str(type(returned)) == "<type 'instance'>":
						self.xml.getElementsByTagName('feed')[0].appendChild(returned)
				elif feedvar == 'entries':
					returned = self.parse_entries()
					if str(type(returned)) == "<type 'instance'>":
						self.xml.getElementsByTagName('feed')[0].appendChild(returned)
				elif feedvar == 'title_detail' or feedvar == 'author' or feedvar == 'modified_parsed' or feedvar == 'bozo' or feedvar == 'encoding': 
					''
				else:
					temp = self.xml.createElement(feedvar)
					jump = self.iterate(parent[feedvar])
					if str(type(jump)) == "<type 'instance'>":
						temp.appendChild(jump)
					self.xml.getElementsByTagName('feed')[0].appendChild(temp)
		elif str(type(feedvar)) == "<type 'time.struct_time'>":
			returned = self.parse_time(feedvar,'modified')
			if str(type(returned)) == "<type 'instance'>":
				self.xml.getElementsByTagName('feed')[0].appendChild(returned)
		elif str(type(feedvar)) == "<type 'unicode'>":
			return self.xml.createTextNode(feedvar)

	def parse_entries(self):
		entries = self.xml.createElement('entries')
		for entrance in self.feed['entries']:
			entry = self.xml.createElement('entry')
			for contents in entrance['content']:
				content = self.parse_entry_generic(contents,'content')
				if str(type(content)) == "<type 'instance'>":
					entry.appendChild(content)
			description = self.parse_entry_description(entrance['description'])
			if str(type(description)) == "<type 'instance'>":
				entry.appendChild(description)
			for linkage in entrance['links']:
				links = self.parse_entry_generic(linkage, 'link')
				if str(type(links)) == "<type 'instance'>":
					entry.appendChild(links)
			title = self.parse_entry_generic(entrance['title_detail'],'title')
			if str(type(title)) == "<type 'instance'>":
				entry.appendChild(title)
			modified_parsed = self.parse_time(entrance['modified_parsed'],'modified')
			if str(type(modified_parsed)) == "<type 'instance'>":
				entry.appendChild(modified_parsed)
			issued_parsed = self.parse_time(entrance['issued_parsed'],'issued')
			if str(type(issued_parsed)) == "<type 'instance'>":
				entry.appendChild(issued_parsed)
			id = self.xml.createElement('id')
			id.appendChild(self.xml.createTextNode(entrance['id']))
			entry.appendChild(id)
			entries.appendChild(entry)
		return entries

	def parse_entry_generic(self, content, type):
		# Takes content, a dictionary, and returns an xml hierarchy topped with an element named 	type
		story = self.xml.createElement(type)
		for part in content:
			if part == 'value':
				story.appendChild(self.xml.createTextNode(content['value']))
			else:
				if content[part]:
					story.setAttribute(part,content[part])
		return story

	def parse_entry_description(self, content):
		description = self.xml.createElement('description')
		description.appendChild(self.xml.createTextNode(content))
		return description
	
	def parse_author(self, authorclass):
		author = self.xml.createElement('author')
		for element in authorclass:
			temp = self.xml.createElement(element)
			text = self.xml.createTextNode(authorclass[element])
			temp.appendChild(text)
			author.appendChild(temp)
		return author
						
	def parse_time(self, time, version):
		timing = self.xml.createElement(version)
		# these values all need proper padding
		entry = self.xml.createTextNode("%04.d-%02.d-%02.dT%02.d:%02.d:%02.dZ" % (time[0],time[1], time[2], time[3], time[4], time[5]))
		timing.appendChild(entry)
		return timing