Find a pragmatic way to process smaller XML documents and fragments from the following code examples of: - string concatenation and string parsing
- XML DOM with XmlDocument
- Reading XML with XmlReader
- Linq to XML with XElement
- Element tree with F# Discriminated Unions
RSS Test Fragment
<item>
<title>Space Exploration</title>
<link>http://liftoff.msfc.nasa.gov/</link>
<description>
Sky watchers in Europe, Asia, and parts of Alaska and Canada
will experience a partial eclipse of the Sun on Saturday, May 31.
</description>
</item>
XML Writing Examples
String concatenation
"<item>\r\n" +
"\t<title>" + title + "</title>\r\n" +
"\t<link>" + link + "</link>\r\n" +
"\t<description>" + description + "</description>\r\n" +
"</item>\r\n"
XmlWriter
open System.Xml
let output = StringBuilder()
use writer = XmlWriter.Create(output)
writer.WriteStartElement("item")
writer.WriteElementString("title", title)
writer.WriteElementString("link", link)
writer.WriteElementString("description", description)
writer.WriteEndElement()
writer.Close()
output.ToString()
XmlDocument
let doc = new XmlDocument()
let item = doc.AppendChild(doc.CreateElement("item"))
let Append name value =
let child = doc.CreateElement name
child.InnerText <- value
item.AppendChild child |> ignore
Append "title" title
Append "link" link
Append "description" description
doc.OuterXml
XElement
type XElement (name:string, [<ParamArray>] values:obj []) =
inherit System.Xml.Linq.XElement
(System.Xml.Linq.XName.op_Implicit(name), values)
let item =
XElement("item",
XElement("title", title),
XElement("link", link),
XElement("description", description))
item.ToString()
F# Tree and XmlWriter
/// F# Element Tree
type Xml =
| Element of string * string * Xml seq
member this.WriteContentTo(writer:XmlWriter) =
let rec Write element =
match element with
| Element (name, value, children) ->
writer.WriteStartElement(name)
writer.WriteString(value)
children |> Seq.iter (fun child -> Write child)
writer.WriteEndElement()
Write this
override this.ToString() =
let output = StringBuilder()
using (new XmlTextWriter(new StringWriter(output),
Formatting=Formatting.Indented))
this.WriteContentTo
output.ToString()
let item =
Element("item","",
[
Element("title",title,[])
Element("link",link,[])
Element("description",description,[])
])
item.ToString()
XML Writing Comparison Summary Table (faster times are better)
Technique |
Time |
Remarks |
string concat |
11 |
Concise, fastest, but strings not escaped |
XmlWriter |
31 |
Verbose |
XmlDocument |
39 |
Verbose |
XElement |
44 |
Concise |
F# Tree |
24 |
Concise |
XML Reading Examples
Event based string parser
/// Example event based XML parser (a bit like SAX)
type XmlEvent =
| Element of string * string
| EndElement of string
static member Parse (xml:string) f =
let opens, closes =
[0..(xml.Length-1)] |> Seq.fold (fun (xs,ys) i ->
match xml.Chars(i) with
| '<' -> (i::xs,ys)
| '>' -> (xs,i::ys)
| _ -> (xs,ys)
) ([],[])
let lastValue = (List.hd closes, xml.Length)
let tags = Seq.zip (opens |> List.rev) (closes |> List.rev)
let values =
Seq.append
(Seq.pairwise tags
|> Seq.map (fun ((_,end1),(start2,_)) -> (end1,start2)))
[lastValue]
Seq.zip tags values
|> Seq.iter (fun ((tagStart,tagEnd),(valStart,valEnd)) ->
let (|EmptyTag|_|) (tag:string) =
if tag.EndsWith("/") then
Some(tag.Substring(0,tag.Length-1)) else None
let (|EndTag|_|) (tag:string) =
if tag.StartsWith("/") then
Some(tag.Substring(1,tag.Length-1)) else None
let (|ProcessingInstruction|_|) (tag:string) =
if tag.StartsWith("?") && tag.EndsWith("?") then
Some(tag.Substring(1, tag.Length-2)) else None
let tag = xml.Substring(tagStart+1, tagEnd-(tagStart+1))
let value = xml.Substring(valStart+1, valEnd-(valStart+1))
match tag with
| EmptyTag name -> f (Element(name,"")); f(EndElement(name))
| EndTag name -> f (EndElement(name))
| ProcessingInstruction _ -> ()
| _ -> f (Element(tag,value.Trim()))
)
type RssItem =
{
mutable Title : String option
mutable Link : String option
mutable Description : String option
}
static member Empty =
{ Title=None; Link=None; Description=None }
let item = RssItem.Empty
let MatchElement (name,value) =
match name with
| "title" -> item.Title <- Some(value)
| "link" -> item.Link <- Some(value)
| "description" -> item.Description <- Some(value)
| _ -> ()
let tags = Stack<string>()
XmlEvent.Parse xml (fun event ->
match event with
| Element (name,value) -> tags.Push(name); MatchElement(name,value);
| EndElement name -> let tag = tags.Pop() in Debug.Assert((tag = name))
)
item
XmlReader
use reader = XmlReader.Create(new StringReader(xml))
reader.MoveToElement() |> ignore
reader.ReadStartElement("item")
{
Title = Some(reader.ReadElementString("title"))
Link = Some(reader.ReadElementString("link"))
Description = Some(reader.ReadElementString("description"))
}
XmlDocument
let doc = XmlDocument()
doc.LoadXml(xml)
let item = doc.DocumentElement
{
Title = Some(item.["title"].InnerText)
Link = Some(item.["link"].InnerText)
Description = Some(item.["description"].InnerText)
}
XElement
open System.Xml.Linq
let e = XElement.Parse(xml)
{
Title = Some(e.Element(XName.op_Implicit("title")).Value);
Link = Some(e.Element(XName.op_Implicit("link")).Value)
Description = Some(e.Element(XName.op_Implicit("description")).Value)
}
F# Tree and XmlWriter
/// Element tree type
type ElementTree =
| ParentElement of string * ElementTree seq
| ValueElement of string * string
| EmptyElement of string
static member Parse (reader:XmlReader) =
let rec ParseElement depth =
let name = reader.Name
let mutable value = None
let mutable children = None
while reader.Read() && reader.Depth >= depth do
match reader.NodeType with
| XmlNodeType.Element ->
let collection =
match children with
| Some xs -> xs
| None ->
let xs = new ResizeArray<ElementTree>()
children <- Some xs
xs
match reader.IsEmptyElement with
| true -> EmptyElement reader.Name
| false -> ParseElement (reader.Depth+1)
|> collection.Add |> ignore
| XmlNodeType.Text ->
let builder =
match value with
| Some x -> x
| None -> let x = StringBuilder() in value <- Some x; x
builder.Append reader.Value |> ignore
| _ -> ()
done
match children, value with
| None, None -> EmptyElement(name)
| None, Some value -> ValueElement(name, value.ToString())
| Some children, _ -> ParentElement(name, children)
reader.MoveToContent () |> ignore
ParseElement (reader.Depth+1)
member element.WriteContentTo (writer:XmlWriter) =
let rec WriteElement el =
match el with
| ParentElement (name,children) ->
writer.WriteStartElement(name)
children |> Seq.iter (fun child -> WriteElement child)
writer.WriteEndElement()
| ValueElement (name,value) ->
writer.WriteElementString(name,value)
| EmptyElement name ->
writer.WriteStartElement(name);
writer.WriteEndElement()
writer.WriteWhitespace(Environment.NewLine)
WriteElement element
writer.Close()
member element.ToXml () =
let output = StringBuilder()
let settings = XmlWriterSettings(Indent=true)
using (XmlWriter.Create(output, settings))
element.WriteContentTo
output.ToString()
member element.Name =
match element with
| ParentElement (name,_) -> name
| ValueElement (name,_) -> name
| EmptyElement (name) -> name
member element.Value =
match element with
| ParentElement (_,children) -> String.Empty
| ValueElement (_,value) -> value
| EmptyElement (_) -> String.Empty
member element.Children =
match element with
| ParentElement (_, children) -> children
| ValueElement (_,_) -> Seq.empty
| EmptyElement (_) -> Seq.empty
member element.FindElement name =
element.Children |> Seq.find (fun child -> child.Name = name)
let reader = XmlReader.Create(new StringReader(xml))
let root = ElementTree.Parse(reader)
{
Title = Some(root.FindElement("title").Value)
Link = Some(root.FindElement("link").Value)
Description = Some(root.FindElement("description").Value)
}
XML Reading Comparison Summary Table (faster times are better)
Technique |
Time |
Remarks |
Event based |
1383 |
Verbose, error prone and ridiculously slow |
XmlReader |
73 |
Concise |
XmlDocument |
65 |
Concise |
XElement |
64 |
Concise - shame about implicit conversions |
F# Tree |
68 |
Concise |
Note
For larger XML docs try other techniques like XPath and reflection based serialization e.g.: