Anonymous
Domino 2.0 Rich Internet Applications with IBM Lotus Notes/Domino
You are here: Today » Notes Rich Text: now doing Semantic XHTML
« Update on IBM Blog converting images from Notes Rich Text in MIME
Notes Rich Text to XHTML: the code »

Notes Rich Text: now doing Semantic XHTML

A reader of my blog, Jan Schulz, has written his own htmlTidy routine to use while converting Notes RichText to XHTML. It looks for font sizes to output headers and paragraphs, making the result much nicer, Semantic XHTML. Thanks, Jan!

The code

Function htmlTidy(Byval html As String)As String
    Dim crlf As String, tmp As String,arr As Variant, i As Long,tmpx As Variant
    Dim x As Integer,y As Integer,z As Integer
    Dim t As Variant, u As Long, r As String
    Dim p As Variant, tmpp As String
    Dim tmpxsize3 As Integer
    Dim tmpxsize4 As Integer
    Dim tz As Integer
    Dim tend As Integer
    
    tmp = html
    crlf=Chr$(13) & Chr$(10)
'    tmp=Replace(tmp, "<br>", "<br />")
    tmp=Replace(tmp, "<u>", "")
    tmp=Replace(tmp, "</u>", "")
    tmp=Replace(tmp, "<i>", "<em>")
    tmp=Replace(tmp, "</i>", "</em>")
    tmp=Replace(tmp, "<b>", "<strong>")
    tmp=Replace(tmp, "</b>", "</strong>")
    
    p=Split(tmp,crlf)    
    For x=1 To Ubound(p)
        ' first: if it has a <br> on the start, we have now a > there...
        If Left$(p(x),4) = "<br>" Then
            ' To be a headline, at the start must be a <font size=x and the next </font> must be at the end of the line
            If Left$(p(x),9) = "<br><font" Then
                If Not (Instr(5, p(x), "</font>" ) < Len(p(x))-7) Then
                    ' ok, we are a line which might be a headline, but only if teh size is right...
                    tEnd = Instr(5,p(x),">")
                    tmpx=Mid$(p(x),1,tEnd)
                    If Instr(1,tmpx,"size=3") <> 0 Then
                    ' replace start with <h2> and next </font> with a </h2>
                        p(x) = "<h2>" + Mid$(p(x), tend+1, Len(p(x))-7-Len(tmpx) ) + "</h2>"
                        p(x)=Replace(p(x), "<em>","")
                        p(x)=Replace(p(x), "</em>","")
                        p(x)=Replace(p(x), "<strong>","")
                        p(x)=Replace(p(x), "</strong>","")
                    Elseif Instr(1,tmpx,"size=4") <> 0 Then
                    ' replace start with <h2> and next </font> with a </h2>
                        p(x) = "<h1>" + Mid$(p(x), tend+1, Len(p(x))-7-Len(tmpx) ) + "</h1>"
                        p(x)=Replace(p(x), "<em>","")
                        p(x)=Replace(p(x), "</em>","")
                        p(x)=Replace(p(x), "<strong>","")
                        p(x)=Replace(p(x), "</strong>","")
                    Else
                        ' just a paragraph...
                        p(x) = "<p>" + Mid$(p(x), 5) + "</p>"
                    End If
                Else
                    ' just a paragraph...
                    p(x) = "<p>" + Mid$(p(x), 5) + "</p>"
                End If
            Else             
                p(x) = "<p>" + Mid$(p(x), 5) + "</p>"
            End If
        End If
    Next
    tmp = Join(p, crlf)
    
    x=Instr(1,tmp,"<font")
    While x<>0
        tmpx=Mid$(tmp,x,Instr(x,tmp,">")-x+1)
        tmp=Replace(tmp, tmpx, "")
        x = Instr(x,tmp,"<font")
    Wend
    tmp=Replace(tmp, "</font>", "")
    
    x=Instr(1,tmp,"<table ")
    While x<>0
        tmpx=Mid$(tmp,x,Instr(x,tmp,">")-x+1)
        tmp=Replace(tmp, tmpx, "<table>")
        x = Instr(1,tmp,"<table ")
    Wend
    
    t=Split(tmp,"<tr")
    For x=1 To Ubound(t)
        u=Instr(t(x),"</table>")
        If u=0 Then
            t(x)=t(x)+"</tr>"
        Else
            t(x)=Left(t(x),u-1)+"</tr>"+Right(t(x),Len(t(x))-(u-1))
        End If
        t(x)="<tr"+Mid(t(x), Instr(t(x), ">"))
    Next
    tmp=Join(t, "")
    
    t=Split(tmp,"<td")
    For x=1 To Ubound(t)
        u=Instr(t(x),"</tr>")
        If u=0 Then
            t(x)=t(x)+"</td>"
        Else
            t(x)=Left(t(x),u-1)+"</td>"+Right(t(x),Len(t(x))-(u-1))
        End If
        t(x)="<td"+Mid(t(x), Instr(t(x), ">"))        
    Next
    tmp=Join(t, "")
    
    t=Split(tmp,"<li")
    For x=1 To Ubound(t)
        u=Instr(t(x),"</ul>")
        If u=0 Then
            u=Instr(t(x),"</ol>")            
        End If
        If u=0 Then
            t(x)=t(x)+"</li>"
        Else
            t(x)=Left(t(x),u-1)+"</li>"+Right(t(x),Len(t(x))-(u-1))
        End If
        t(x)="<li"+Mid(t(x), Instr(t(x), ">"))
    Next
    tmp=Join(t,"")
    
    t=Split(tmp,"<a href=")
    For x=1 To Ubound(t)
        u=Instr(t(x),">")
        t(x)=|<a href="|+Left(t(x),u-1)+|"|+Mid(t(x), u)
    Next
    tmp=Join(t,"")
    
    tmp=Replace(tmp, "</ul>", crlf+"</ul>"+crlf)
    tmp=Replace(tmp, "</ol>", crlf+"</ol>"+crlf)
    tmp=Replace(tmp, crlf+"</li>", "</li>"+crlf)
    tmp=Replace(tmp, "</tr><tr>", "</tr>"+crlf+"<tr>")
    tmp=Replace(tmp, crlf+"</td>", "</td>"+crlf)
    tmp=Replace(tmp, "</tr></table>", crlf+"</tr>"+crlf+"</table>")
    tmp=Replace(tmp, |<div align=center>|, |<div style="text-align:center">|)
    tmp=Replace(tmp, |<div align=right>|, |<div style="text-align:right">|)
    tmp=Replace(tmp, "</div>", crlf+"</div>"+crlf)
    
    htmlTidy=tmp
End Function

Validating XHTML

We have to test the result to see if it always outputs valid XHTML. To make it valid as XHTML, we have to make it 100% dummy-proof, e.g. always rendering properly nested tags, not allowing block elements in inline ones etc. Also: there should only ever be one h1 on the page, containing the main subject. So headers in the Rich Text field would start with h2.

Maybe in a next post I will make a little tool that allows to compare the original RichText rendered by Domino as MIME and the result of the htmlTidy function with preview, view source and a direct link to validate the tags with The W3C Markup Validation Service. This would make testing and improving the code more fun. Also, eventually the entire code should go in a class, allowing to convert more than one Rich Text field at once without saving the document for every field.

And there is still a third path to cover: a Rich Text editor in the browser. There are a lot of them on the internet already, but they are all fairly heavy and most of them don't output semantic valid XHTML. Another mountain to climb.

Star rating

0%

Comments

To add a comment, log in or register as new user. It's free and safe.