FeedMarshal
Language: Python
Date: September 2004
This is an inelegant piece of code that forms a very important part of my python atom server implementation. The server utilises Mark Piligrim's excellent Universal Feed Parser module to convert XML feeds into python classes and dictionaries for easier manipulation. Unfortunately considerable searching did not turn up any implementation of code to convert those new data structures back into XML once they had been modified. This code was written to fill that hole.
At present it uses some iteration that is a legacy of initial explorations of the data structures and a variety of other features that could be 'cleaner'. If time allows I hope to compare it more closely with the atom syndication format specifications to make sure it will handle feeds other than those I am using it for, and to improve the implementation so that the code is more elegant, faster and more robust. But for now there are other projects!
The Code:
#!/usr/bin/python
"""FeedMarshal
This is a rough and ready class that takes Universal Feed Parser objects and returns xml dom
objects (atom 0.3). It was written to scratch a very particular itch. There are plans to
update it to remove the iteration and to ensure full compliance with the atom spec, but for
now it is what it is.
Recommended: Python 2.3 or later
Please send any suggestions to james@jystewart.net
By James Stewart - 23rd September 2004
"""
__version__ = "0.1"
__license__ = "Python"
__copyright__ = "Copyright 2004 James Stewart"
__author__ = "James Stewart <http://jystewart.net>"
import feedparser
from xml.dom import minidom
class FeedMarshal:
def __init__(self, feed):
self.xml = minidom.parseString("<feed></feed>")
self.xml.getElementsByTagName('feed')[0].setAttribute('version','0.3')
self.feed = feed
def run(self):
self.iterate(self.feed)
return self.xml
def iterate(self, feedvar, parent={}):
if str(type(feedvar)) == "<class 'feedparser.FeedParserDict'>":
for a in feedvar:
self.iterate(a, feedvar)
elif str(type(feedvar)) == "<type 'str'>":
if parent.has_key(feedvar):
if feedvar == 'links':
for link in parent[feedvar]:
linkage = self.parse_entry_generic(link,'link')
if str(type(linkage)) == "<type 'instance'>":
self.xml.getElementsByTagName('feed')[0].appendChild(linkage)
elif feedvar == 'author_detail':
returned = self.parse_author(parent[feedvar])
if str(type(returned)) == "<type 'instance'>":
self.xml.getElementsByTagName('feed')[0].appendChild(returned)
elif feedvar == 'entries':
returned = self.parse_entries()
if str(type(returned)) == "<type 'instance'>":
self.xml.getElementsByTagName('feed')[0].appendChild(returned)
elif feedvar == 'title_detail' or feedvar == 'author' or feedvar == 'modified_parsed' or feedvar == 'bozo' or feedvar == 'encoding':
''
else:
temp = self.xml.createElement(feedvar)
jump = self.iterate(parent[feedvar])
if str(type(jump)) == "<type 'instance'>":
temp.appendChild(jump)
self.xml.getElementsByTagName('feed')[0].appendChild(temp)
elif str(type(feedvar)) == "<type 'time.struct_time'>":
returned = self.parse_time(feedvar,'modified')
if str(type(returned)) == "<type 'instance'>":
self.xml.getElementsByTagName('feed')[0].appendChild(returned)
elif str(type(feedvar)) == "<type 'unicode'>":
return self.xml.createTextNode(feedvar)
def parse_entries(self):
entries = self.xml.createElement('entries')
for entrance in self.feed['entries']:
entry = self.xml.createElement('entry')
for contents in entrance['content']:
content = self.parse_entry_generic(contents,'content')
if str(type(content)) == "<type 'instance'>":
entry.appendChild(content)
description = self.parse_entry_description(entrance['description'])
if str(type(description)) == "<type 'instance'>":
entry.appendChild(description)
for linkage in entrance['links']:
links = self.parse_entry_generic(linkage, 'link')
if str(type(links)) == "<type 'instance'>":
entry.appendChild(links)
title = self.parse_entry_generic(entrance['title_detail'],'title')
if str(type(title)) == "<type 'instance'>":
entry.appendChild(title)
modified_parsed = self.parse_time(entrance['modified_parsed'],'modified')
if str(type(modified_parsed)) == "<type 'instance'>":
entry.appendChild(modified_parsed)
issued_parsed = self.parse_time(entrance['issued_parsed'],'issued')
if str(type(issued_parsed)) == "<type 'instance'>":
entry.appendChild(issued_parsed)
id = self.xml.createElement('id')
id.appendChild(self.xml.createTextNode(entrance['id']))
entry.appendChild(id)
entries.appendChild(entry)
return entries
def parse_entry_generic(self, content, type):
# Takes content, a dictionary, and returns an xml hierarchy topped with an element named type
story = self.xml.createElement(type)
for part in content:
if part == 'value':
story.appendChild(self.xml.createTextNode(content['value']))
else:
if content[part]:
story.setAttribute(part,content[part])
return story
def parse_entry_description(self, content):
description = self.xml.createElement('description')
description.appendChild(self.xml.createTextNode(content))
return description
def parse_author(self, authorclass):
author = self.xml.createElement('author')
for element in authorclass:
temp = self.xml.createElement(element)
text = self.xml.createTextNode(authorclass[element])
temp.appendChild(text)
author.appendChild(temp)
return author
def parse_time(self, time, version):
timing = self.xml.createElement(version)
# these values all need proper padding
entry = self.xml.createTextNode("%04.d-%02.d-%02.dT%02.d:%02.d:%02.dZ" % (time[0],time[1], time[2], time[3], time[4], time[5]))
timing.appendChild(entry)
return timing
