Phillip Trelford's Array

POKE 36879,255

F# Rope

The ICFP 2009 programming contest starts on Friday 26th June.

Solving the ICFP 2007 contest task required a scalable string implementation, for which a Rope is a good fit, and the SGI C++ library provides as rope<T,alloc>. A Rope is represented as a binary tree with substrings as leaves.

Rope operations:

  • Appending a string involves creating a new branch with the left as the existing rope and the right as the string to add.
  • Truncation involves creating a new leaf substring with a reduced length.   
  • Insertion involves creating a new branch containing a branch with the left side to the insertion index and the string to insert, plus the right side from the insertion index.
  • Deletion involves creating a new branch with a left substring with reduced length and a right substring with an increased index. 
  • Iterating over the Rope’s characters involves recursively visiting the tree’s substrings.
    For performance, the character length of a branch may be stored, and on branch creation a check made for empty nodes. To optimize small appends branches may be reduced to new strings.
    Ropes are useful for manipulation of large strings, for example with a text editor.
    The following is a naive F# Rope, implemented for fun:
type Rope =
    | Sub of string * int * int
    | Concat of Rope * Rope * int    
    static member Empty = 
        Sub(String.Empty,0,0)
    member this.Length = 
        match this with
        | Sub(_,_,l) -> l
        | Concat(_,_,l) -> l        
    static member Create(s:string) = 
        Rope.Create(s, 0, s.Length)
    static member Create(s, i, l) = 
        Sub(s, i, l)
    static member private Create(lhs:Rope,rhs:Rope) =
        match lhs.Length, rhs.Length with        
        | 0,0 -> Rope.Empty
        | 0,n -> rhs
        | n,0 -> lhs
        | n1,n2 -> Concat(lhs,rhs,n1+n2)   
    member this.to_seq() =
        let rec to_seq = function
            | Sub(s,i,l) -> 
                seq { for n=0 to (l-1) do yield (s.Chars(i+n)) done }
            | Concat(lhs,rhs,_) -> 
                Seq.append (to_seq lhs) (to_seq rhs)
        to_seq this    
    override this.ToString() =
        let builder = System.Text.StringBuilder(this.Length)
        let rec toString = function
            | Sub(s,i,l) -> builder.Append(s, i, l) |> ignore
            | Concat(lhs,rhs,_) -> toString lhs; toString rhs
        toString this
        builder.ToString()
    member this.Append (s:string) = 
        Concat(this, Rope.Create s, this.Length + s.Length)    
    [<OverloadID("Insert(index,Rope)")>]
    member this.Insert(index,value:Rope) =
        let rec insert pos (rope:Rope) =            
            match rope.Length, rope with
            | n, _ when (pos+n) <= index || (pos) > index ->
                rope            
            | n, Sub(s,i,l) ->
                let lhs = Sub(s,i,max 0 (index-pos))
                let rhs = Sub(s,min (i+l) (i+(index-pos)),
                            max 0 (l-(index-pos)))
                Rope.Create(Rope.Create(lhs,value), rhs)
            | n, Concat(l,r,_) ->
                let n = l.Length
                Rope.Create (insert pos l, insert (pos+n) r)
        insert 0 this
    [<OverloadID("Insert(index,string)")>]
    member this.Insert(index,s:string) = 
        this.Insert(index,Sub(s,0,s.Length))      
    member this.Remove(index,length) =
        let rec remove pos (rope:Rope) =
            match rope.Length, rope with
            | n, _ when (pos + n) <= index || pos >= (index + length) ->
                rope            
            | n, Sub(s,i,l) when pos = index -> 
                Sub(s,i+length, max 0 (l-length))
            | n, Sub(s,i,l) when (pos+l) = (index+length) -> 
                Sub(s,i,max 0 (index-pos))
            | n, Sub(s,i,l) ->
                Rope.Create( 
                    Sub(s,i,max 0 (index-pos)), 
                    Sub(s,min (i+l) (i+(index-pos)+length),
                        max 0 (l-(index-pos)-length)))
            | n, Concat(lhs,rhs,_) -> 
                Rope.Create (
                    remove pos lhs, 
                    remove (pos+lhs.Length) rhs)
        remove 0 this                   
    member this.Item        
        with get (index:int) =
            let rec get pos = function
                | Sub(s,i,l) -> s.[i+index-pos]                   
                | Concat(lhs,rhs,_) -> 
                    if index >= (pos + lhs.Length) then 
                        get (pos+lhs.Length) rhs
                    else 
                        get pos lhs 
            get 0 this                     
    member this.Substring(index:int,length:int) =
        let rec sub pos (rope:Rope) = 
            match rope.Length, rope with            
            | n, Sub(s,i,l) ->                                                    
                let offset = index - pos
                if offset < 0 then     
                    Sub(s, i, min (length+offset) l)    
                else
                    Sub(s, i + offset, min length (l-offset))                                    
            | n, Concat(lhs,rhs,_) ->
                if index >= (pos + lhs.Length) then sub (pos+lhs.Length) rhs
                else
                    if (index+length) < (pos+lhs.Length) then
                        sub pos lhs
                    else
                        Rope.Create (sub pos lhs, sub (pos+lhs.Length) rhs)
        sub 0 this

F# XML Comparison (XElement vs XmlDocument vs XmlReader/XmlWriter vs Discriminated Unions)

    Find a pragmatic way to process smaller XML documents and fragments from the following code examples of:
  • string concatenation and string parsing
  • XML DOM with XmlDocument
  • Reading XML with XmlReader
  • Linq to XML with XElement
  • Element tree with F# Discriminated Unions

    RSS Test Fragment

     

    <item>
      <title>Space Exploration</title>
      <link>http://liftoff.msfc.nasa.gov/</link>
      <description>
        Sky watchers in Europe, Asia, and parts of Alaska and Canada
        will experience a partial eclipse of the Sun on Saturday, May 31.
      </description>
    </item>
    

     

    XML Writing Examples

String concatenation

"<item>\r\n" +
"\t<title>" + title + "</title>\r\n" +
"\t<link>" + link + "</link>\r\n" +
"\t<description>" + description + "</description>\r\n" + 
"</item>\r\n" 

 

XmlWriter

open System.Xml
let output = StringBuilder()
use writer = XmlWriter.Create(output)
writer.WriteStartElement("item")
writer.WriteElementString("title", title)
writer.WriteElementString("link", link)
writer.WriteElementString("description", description)
writer.WriteEndElement()
writer.Close()
output.ToString()

 

XmlDocument

let doc = new XmlDocument()        
let item = doc.AppendChild(doc.CreateElement("item"))
let Append name value =
    let child = doc.CreateElement name
    child.InnerText <- value
    item.AppendChild child |> ignore
Append "title" title   
Append "link" link
Append "description" description 
doc.OuterXml

 

XElement

type XElement (name:string, [<ParamArray>] values:obj []) = 
    inherit System.Xml.Linq.XElement
        (System.Xml.Linq.XName.op_Implicit(name), values)     

 

let item = 
    XElement("item", 
        XElement("title", title),
        XElement("link", link),
        XElement("description", description))
item.ToString()
  

 

F# Tree and XmlWriter 

/// F# Element Tree
type Xml = 
    | Element of string * string * Xml seq    
    member this.WriteContentTo(writer:XmlWriter) =
        let rec Write element =
            match element with
            | Element (name, value, children) -> 
                writer.WriteStartElement(name)
                writer.WriteString(value)
                children |> Seq.iter (fun child -> Write child)
                writer.WriteEndElement()
        Write this                
    override this.ToString() =
        let output = StringBuilder()             
        using (new XmlTextWriter(new StringWriter(output), 
                Formatting=Formatting.Indented))
            this.WriteContentTo        
        output.ToString()
let item = 
    Element("item","",
        [ 
        Element("title",title,[])
        Element("link",link,[])
        Element("description",description,[])
        ])
item.ToString()

 

XML Writing Comparison Summary Table (faster times are better)

Technique Time Remarks
string concat 11 Concise, fastest, but strings not escaped
XmlWriter 31 Verbose
XmlDocument 39 Verbose
XElement 44 Concise
F# Tree 24 Concise

 

XML Reading Examples

Event based string parser

/// Example event based XML parser (a bit like SAX)
type XmlEvent =
    | Element of string * string
    | EndElement of string              
    static member Parse (xml:string) f =                       
        let opens, closes =                
            [0..(xml.Length-1)] |> Seq.fold (fun (xs,ys) i ->              
                match xml.Chars(i) with
                | '<' -> (i::xs,ys)
                | '>' -> (xs,i::ys)
                | _ -> (xs,ys)
            ) ([],[])        
        let lastValue = (List.hd closes, xml.Length)
        let tags = Seq.zip (opens |> List.rev) (closes |> List.rev)
        let values =
            Seq.append              
                (Seq.pairwise tags 
                    |> Seq.map (fun ((_,end1),(start2,_)) -> (end1,start2)))
                [lastValue]                     
        Seq.zip tags values
        |> Seq.iter (fun ((tagStart,tagEnd),(valStart,valEnd)) ->           
            let (|EmptyTag|_|) (tag:string) = 
                if tag.EndsWith("/") then 
                    Some(tag.Substring(0,tag.Length-1)) else None            
            let (|EndTag|_|) (tag:string) =
                if tag.StartsWith("/") then 
                    Some(tag.Substring(1,tag.Length-1)) else None
            let (|ProcessingInstruction|_|) (tag:string) =
                if tag.StartsWith("?") && tag.EndsWith("?") then 
                    Some(tag.Substring(1, tag.Length-2)) else None   
            let tag = xml.Substring(tagStart+1, tagEnd-(tagStart+1)) 
            let value = xml.Substring(valStart+1, valEnd-(valStart+1))                  
            match tag with
            | EmptyTag name -> f (Element(name,"")); f(EndElement(name))
            | EndTag name -> f (EndElement(name))             
            | ProcessingInstruction _ -> ()                        
            | _ -> f (Element(tag,value.Trim()))            
        )        
type RssItem =
    { 
        mutable Title : String option
        mutable Link : String option
        mutable Description : String option        
    }
    static member Empty =
        { Title=None; Link=None; Description=None }
let item = RssItem.Empty
let MatchElement (name,value) =        
    match name with
    | "title" -> item.Title <- Some(value)
    | "link" -> item.Link <- Some(value)
    | "description" -> item.Description <- Some(value)
    | _ -> ()    
let tags = Stack<string>()
XmlEvent.Parse xml (fun event -> 
    match event with
    | Element (name,value) -> tags.Push(name); MatchElement(name,value); 
    | EndElement name -> let tag = tags.Pop() in Debug.Assert((tag = name))
)
item

 

XmlReader

use reader = XmlReader.Create(new StringReader(xml))
reader.MoveToElement() |> ignore
reader.ReadStartElement("item")
{ 
    Title = Some(reader.ReadElementString("title"))
    Link = Some(reader.ReadElementString("link"))
    Description = Some(reader.ReadElementString("description"))
}

 

XmlDocument

let doc = XmlDocument()
doc.LoadXml(xml)
let item = doc.DocumentElement
{
    Title = Some(item.["title"].InnerText)
    Link = Some(item.["link"].InnerText)
    Description = Some(item.["description"].InnerText)
}

 

XElement

open System.Xml.Linq
let e = XElement.Parse(xml)
{   
    Title = Some(e.Element(XName.op_Implicit("title")).Value);
    Link = Some(e.Element(XName.op_Implicit("link")).Value)
    Description = Some(e.Element(XName.op_Implicit("description")).Value)
}

F# Tree and XmlWriter

/// Element tree type
type ElementTree =     
    | ParentElement of string * ElementTree seq
    | ValueElement of string * string        
    | EmptyElement of string
    static member Parse (reader:XmlReader) =
        let rec ParseElement depth =                   
            let name = reader.Name                                                     
            let mutable value = None
            let mutable children = None            
            while reader.Read() && reader.Depth >= depth do
                match reader.NodeType with                
                | XmlNodeType.Element ->                    
                    let collection = 
                        match children with 
                        | Some xs -> xs 
                        | None -> 
                            let xs = new ResizeArray<ElementTree>() 
                            children <- Some xs
                            xs                                                                                            
                    match reader.IsEmptyElement with
                    | true -> EmptyElement reader.Name
                    | false -> ParseElement (reader.Depth+1)
                    |> collection.Add |> ignore                           
                | XmlNodeType.Text -> 
                    let builder =
                        match value with
                        | Some x -> x
                        | None -> let x = StringBuilder() in value <- Some x; x
                    builder.Append reader.Value |> ignore
                | _ -> ()                                                                             
            done                
            match children, value with
            | None, None -> EmptyElement(name) 
            | None, Some value -> ValueElement(name, value.ToString())
            | Some children, _ -> ParentElement(name, children)        
        reader.MoveToContent () |> ignore     
        ParseElement (reader.Depth+1)
    member element.WriteContentTo (writer:XmlWriter) =
        let rec WriteElement el =            
            match el with
            | ParentElement (name,children) ->
                writer.WriteStartElement(name)
                children |> Seq.iter (fun child -> WriteElement child)
                writer.WriteEndElement()
            | ValueElement (name,value) -> 
                writer.WriteElementString(name,value)
            | EmptyElement name -> 
                writer.WriteStartElement(name); 
                writer.WriteEndElement()
            writer.WriteWhitespace(Environment.NewLine) 
        WriteElement element
        writer.Close()             
    member element.ToXml () =
        let output = StringBuilder()
        let settings = XmlWriterSettings(Indent=true)
        using (XmlWriter.Create(output, settings))                        
            element.WriteContentTo    
        output.ToString()         
    member element.Name = 
        match element with         
        | ParentElement (name,_) -> name                
        | ValueElement (name,_) -> name
        | EmptyElement (name) -> name
    member element.Value = 
        match element with         
        | ParentElement (_,children) -> String.Empty
        | ValueElement (_,value) -> value
        | EmptyElement (_) -> String.Empty    
    member element.Children = 
        match element with
        | ParentElement (_, children) -> children
        | ValueElement (_,_) -> Seq.empty
        | EmptyElement (_) -> Seq.empty                 
    member element.FindElement name = 
        element.Children |> Seq.find (fun child -> child.Name = name)                

 

let reader = XmlReader.Create(new StringReader(xml))
let root = ElementTree.Parse(reader)
{
    Title = Some(root.FindElement("title").Value)
    Link = Some(root.FindElement("link").Value)
    Description = Some(root.FindElement("description").Value)
}    

 

XML Reading Comparison Summary Table (faster times are better)

Technique Time Remarks
Event based 1383 Verbose, error prone and ridiculously slow
XmlReader 73 Concise
XmlDocument 65 Concise
XElement 64 Concise - shame about implicit conversions
F# Tree 68 Concise

Note

For larger XML docs try other techniques like XPath and reflection based serialization e.g.: 

Skills Matter: F# Intro Talk Material - Slides & Code Samples

Thanks to everyone who made it down to Skills Matter on Tuesday for Scott Cowan's Lucene.Net talk and my F# Introduction talk. There was a great turnout, with more chairs needing to be brought in, plus some excellent questions, and also a good group for the pub after. My slides are attached to this post and a video/podcast should be available soon here:

http://skillsmatter.com/podcast/open-source-dot-net/phil-trelford-f-introduction 

I have also put the code samples from my F# Intro talk up on the F# Wiki:

  • fsweet sample WFP Twitter client script (<200 lines)
  • Mastermind sample WPF mini-game (<300 lines)

Below a screen shot of the fsweet script, showing a Twitter friends timeline, used to make a live Twitter update during the talk. 

screenshot of fsweet F# Twitter client

FSharpIntroduction.pptx (376.59 kb)

fsweet.fsx (8.58 kb)