Quantcast
Channel: Another Android Blog » Android Development
Viewing all articles
Browse latest Browse all 10

RSS XLM Document Parsing/Parser

$
0
0

I’ve seen a lot of examples for parsing XML using SAX. As far as I can tell, that is the most efficient method for doing so. Honestly though, I’m not really sure how much of a crutch using the DOM would be on speed. I thought, as a learning experience, I would break from this model and sharpen my skills with the org.w3c.dom.Doucment. Seems how I didn’t find any decent examples of this online, here’s a little addition to the collective knowledge pool.

I handled the XML parsing work in what I dubbed an RSSDocument class and described the functions inside of a DocumentHandler interface. Also, I created a couple POJO (plain old java object) classes for RSS channels and items. I am going to break from my normal tutorial based method and simply list the code. If I find time later, I will come back and present an honest RSS tutorial.

This code is from a project I may or may not finish. The back end is done, but other projects call and I may not be able to get to the front end. A word of note, I wrote this code to work with RSS 2.0 specifications. I’ve never even looked at older RSS specifications. Finally, I don’t know how much speed would be gained using SAX versus this method, comments welcome on the subject. Now here’s that code…

POJO RSS Item

public class Item implements ItemHandler
{
	//----------------------------OBJECTS AND FIELDS---------------------------//
 
	protected long id, channelID;
	protected String title, description, pubDate, enclosureUrl,
				enclosureLength, enclosureType;
	protected boolean isNew;
 
	//-------------------------------CONSTRUCTOR-------------------------------//
 
	public Item ()
	{
		id = -1;
		channelID = -1;
		title = null;
		description = null;
		pubDate = null;
		enclosureUrl = null;
		enclosureLength = null;
		enclosureType = null;
	}
 
	//---------------------------GETTERS AND SETTERS---------------------------//
 
	public long getID() { return id; }
	public String getTitle() { return title; }
	public long getChannelID() { return channelID; }
	public String getDescription() { return description; }
	public String getPubDate() { return pubDate; }
	public String getEnclosureUrl() { return enclosureUrl; }
	public String getEnclosureLength() { return enclosureLength; }
	public String getEnclosureType() { return enclosureType; }
	public Boolean getIsNew() { return isNew; }
 
	// TODO verify the data passed in the the setters.
	public void setID(long id) { this.id = id; }
	public void setTitle(String title) { this.title = title; }
	public void setChannelID(long channelID) { this.channelID = channelID; }
	public void setDescription(String description) { this.description = description; }
	public void setPubDate(String pubDate) { this.pubDate = pubDate; }
	public void setEnclosureUrl(String enclosureUrl) { this.enclosureUrl = enclosureUrl; }
	public void setEnclosureLength(String enclosureLength) { this.enclosureLength = enclosureLength; }
	public void setEnclosureType(String enclosureType) { this.enclosureType = enclosureType; }
	public void setIsNew(Boolean isNew) { this.isNew = isNew; }
 
	@Override
	public String toString()
	{
		String s = "";
 
		s += id + "\n";
		s += title + "\n";
		s += channelID + "\n";
		s += description + "\n";
		s += pubDate + "\n";
		s += enclosureUrl + "\n";
		s += enclosureLength + "\n";
		s += enclosureType + "\n";
		s += isNew + "\n";
 
		return s;
	}
}

POJO RSS Channel

package com.BrilliantBot.BrilliantRSS.rss;
 
public class Channel implements ChannelHandler
{
	//----------------------------OBJECTS AND FIELDS---------------------------//
 
	long id;
	String url, link, title, description, imageUrl, imageUri;
 
	//-------------------------------CONSTRUCTOR-------------------------------//
 
	public Channel()
	{
		id = -1;
		url = null;
		link = null;
		title = null;
		description = null;
		imageUrl = null;
		imageUri = null;
 
	}
 
	//---------------------------GETTERS AND SETTERS---------------------------//
 
	public long getID() { return id; }
	public String getURL() { return url; }
	public String getLink() { return link; }
	public String getTitle() { return title; }
	public String getDescription() { return description; }
	public String getImageUri() { return imageUri; }
	public String getImageUrl() { return imageUrl; }
 
	// TODO verify that the setters are getting the correct data.
	public void setID(long id) { this.id = id; }
	public void setURL(String url) { this.url = url; }
	public void setLink(String link) { this.link = link; }
	public void setTitle(String title) { this.title = title; }
	public void setDescription(String description) { this.description = description; }
	public void setImageUri(String uri) { this.imageUri = uri; }
	public void setImageUrl(String url) { this.imageUrl = url; }
 
	@Override
	public String toString()
	{
		String s = "";
 
		s += id + "\n";
		s += url + "\n";
		s += link + "\n";
		s += title + "\n";
		s += description + "\n";
		s += imageUrl + "\n";
		s += imageUri + "\n";
 
		return s;
	}
}

DocumentHandler Interface

package com.BrilliantBot.BrilliantRSS.controllers.interfaces;
 
import java.util.ArrayList;
 
import org.w3c.dom.Document;
import com.BrilliantBot.BrilliantRSS.rss.Channel;
import com.BrilliantBot.BrilliantRSS.rss.Item;
 
/**
 * The objective of a DocumentHandler is to extract a channel and
 * its items from a Document object.
 * 
 * @author Randall Mitchell | BrilliantBot.com | AnotherAndroidBlog.com
 */
public interface DocumentHandler
{
	/**
	 * used to pass in the document to the handler for processing.
	 * @param document the document to be processed.
	 */
	void processDocument(Document document);
 
	/**
	 * passes out the channel processed from a given document.
	 * @return the processed channel.
	 */
	Channel getChannel();
 
	/**
	 * passes out an ArrayList of items processed from a given document.
	 * @return the processed ArrayList of items.
	 */
	ArrayList<Item> getItems();
}

RSSDocument Class that implements DocumentHandler

This class basically takes a Document object and passes out the channel and an array of items on request. Internally, it fishes out the channel node from the document. Then it takes the channel and looks for different elements of the channel in root’s children and processes them individually. When it finds a node named item, it sends the node to a separate method that processes the children of that item.

package com.BrilliantBot.BrilliantRSS.controllers;
 
import java.util.ArrayList;
 
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
 
import com.BrilliantBot.BrilliantRSS.controllers.interfaces.DocumentHandler;
import com.BrilliantBot.BrilliantRSS.rss.Channel;
import com.BrilliantBot.BrilliantRSS.rss.Item;
 
public class RSSDocument implements DocumentHandler
{
	//---------------------------KEY OBJECTS---------------------------//
 
	Channel channel;
	ArrayList<Item> items;
	int processedNodes;
 
	//----------------------------CONSTANTS----------------------------//
 
	protected final String RSS = "rss";
 
	protected final String CHANNEL = "channel";
	protected final String CHANNEL_TITLE = "title";
	protected final String CHANNEL_LINK = "link";
	protected final String CHANNEL_DESCRIPTION = "description";
	protected final String CHANNEL_IMAGE = "image";
	protected final String CHANNEL_IMAGE_URL = "url";
 
	protected final String ITEM = "item";
	protected final String ITEM_TITLE = "title";
	protected final String ITEM_DESCRIPTION = "description";
	protected final String ITEM_PUB_DATE = "pubDate";
	protected final String ITEM_ENCLOSURE = "enclosure";
	protected final String ITEM_ENCLOSURE_URL = "url";
	protected final String ITEM_ENCLOSURE_LENGTH = "length";
	protected final String ITEM_ENCLOSURE_TYPE = "type";
 
	//---------------------------CONSTRUCTOR---------------------------//
 
	public RSSDocument()
	{
		channel = new Channel();
		items = new ArrayList<Item>();
		processedNodes = 0;
	}
 
	//-------------------------PROCESS METHODS-------------------------//
 
	@Override
	public void processDocument(Document document)
	{
		Node root = getRoot(document);
		// we are going to move through the root document's nodes.  each node will be
		// checked against possible types of nodes and process if the types match.
		for (int i=0; i < root.getChildNodes().getLength(); i++)
		{
			// get the node at 'i'.
			Node node = ((NodeList)root.getChildNodes()).item(i);
 
			String nodeName = node.getNodeName().toLowerCase();
 
			// check the node against each type.
			// if it matches, handle the node accordingly.
			if      ( nodeName.equals(ITEM) )
			{ processItemNode(node); processedNodes++; }
 
			else if ( nodeName.equals(CHANNEL_IMAGE) )
			{ processChannelImage(node); processedNodes++; }
 
			else if ( nodeName.equals(CHANNEL_TITLE) )
			{
				channel.setTitle( node.getFirstChild().getNodeValue() );
				processedNodes++;
			}
 
			else if ( nodeName.equals(CHANNEL_LINK) )
			{
				channel.setLink( node.getFirstChild().getNodeValue() );
				processedNodes++;
			}
 
			else if ( nodeName.equals(CHANNEL_DESCRIPTION) )
			{
				channel.setDescription( node.getFirstChild().getNodeValue() );
				processedNodes++;
			}
		}
	}
 
	protected Node getRoot(Document document)
	{
		// prepare the document for processing.
		Node root = null;
 
		for (int i=1; i < document.getChildNodes().getLength(); i++)
		{
			Node node = ((NodeList)document.getChildNodes()).item(i);
			if (node.getNodeName().toLowerCase().equals(RSS))
			{
				// node is the RSS node, find the channel root node inside.
				for (int j=0; j < node.getChildNodes().getLength(); j++)
				{
					Node subnode = ((NodeList)node.getChildNodes()).item(j);
					if (subnode.getNodeName().toLowerCase().equals(CHANNEL))
					{
						return subnode;
					}
				}
			}
		}
		return root;
 
	}
	/**
	 * handles the image node of the channel.  the information
	 * is saved in the channel object rather than a separate node.
	 *
	 * @param imageNode the document node representing the channel image.
	 */
	protected void processChannelImage(Node imageNode)
	{
		// find and set the image URL
		for (int i=0; i < imageNode.getChildNodes().getLength(); i++)
		{
			// get the subnode of imageNode at position 'i'.
			Node subnode = ((NodeList)imageNode.getChildNodes()).item(i);
 
 
			// if it's the image URL, set the channel's image URL.
			if ( subnode.getNodeName().toLowerCase().equals(CHANNEL_IMAGE_URL) )
			{ channel.setImageUrl( subnode.getFirstChild().getNodeValue() ); }
		}
	}
 
	/**
	 * handles individual channel items.  the information is saved in item
	 * objects inside of an ArrayList.
	 * @param itemNode a document node representing a channel item.
	 */
	protected void processItemNode(Node itemNode)
	{
		// item to be created from item node.
		Item item = new Item();
 
		// compare each subnode of item to constants,
		// process when matches occur.
		for (int i=0; i < itemNode.getChildNodes().getLength(); i++)
		{
			// get the subnode of itemNode located at 'i'.
			Node subnode = ((NodeList)itemNode.getChildNodes()).item(i);
 
			// compare the subnode name to possible names,
			// process when there is a match.
			if      ( subnode.getNodeName().toLowerCase().equals(ITEM_TITLE) )
			{ item.setTitle( subnode.getFirstChild().getNodeValue() ); }
 
			else if ( subnode.getNodeName().toLowerCase().equals(ITEM_DESCRIPTION) )
			{ item.setDescription( subnode.getFirstChild().getNodeValue() ); }
 
			else if ( subnode.getNodeName().toLowerCase().equals(ITEM_PUB_DATE) )
			{ item.setPubDate( subnode.getFirstChild().getNodeValue() ); }
 
			// the enclose has subnodes that need to be processed.
			else if ( subnode.getNodeName().toLowerCase().equals(ITEM_ENCLOSURE) )
			{
				// compare each subnode of enclosure to constants,
				// process when matches occur.
				for (int j=0; j < subnode.getChildNodes().getLength(); j++)
				{
					Node encSubNode = ((NodeList)subnode.getChildNodes()).item(j);
 
					// compare the enclosure subnode name to possible
					// names and process when there is a match.
					if ( encSubNode.getNodeName().toLowerCase().equals(ITEM_ENCLOSURE_URL) )
					{ item.setEnclosureUrl( encSubNode.getFirstChild().getNodeValue() ); }
 
					if ( encSubNode.getNodeName().toLowerCase().equals(ITEM_ENCLOSURE_LENGTH) )
					{ item.setEnclosureLength( encSubNode.getFirstChild().getNodeValue() ); }
 
					if ( encSubNode.getNodeName().toLowerCase().equals(ITEM_ENCLOSURE_TYPE) )
					{ item.setEnclosureType( encSubNode.getFirstChild().getNodeValue() ); }
				}
			}
		}
 
		// add the item to the items list.
		items.add(item);
	}
 
	//------------------------GETTER METHODS------------------------//
 
	@Override
	public Channel getChannel()
	{
		return channel;
	}
 
	@Override
	public ArrayList<Item> getItems()
	{
		return items;
	}
 
	@Override
	public String toString()
	{
		String string = "";
 
		string += "Number of child nodes to root: " + processedNodes + "\n";
 
		string += channel.toString() + "\n";
 
		for (int i = 0; i < items.size(); i++)
		{
			string += ((Item)items.get(i)).toString(); 
		}
		return string;
	}
}

Viewing all articles
Browse latest Browse all 10

Trending Articles