HTML代码转换到RTF格式 procedure HTMLtoRTF(html: string; var rtf: TRichedit); var i, dummy, row: Integer; cfont: TFont; { Standard sschrift } Tag, tagparams: string; params: TStringList; function GetTag(s: string; var i: Integer; var Tag, tagparams: string): Boolean; var a_tag: Boolean; begin GetTag := False; Tag := ''; tagparams := ''; a_tag := False; while i <= Length(s) do begin Inc(i); // es wird nochein tag geöffnet --> das erste war kein tag; if s[i] = '<' then begin GetTag := False; Exit; end; if s[i] = '>' then begin GetTag := True; Exit; end; if not a_tag then begin if s[i] = ' ' then begin if Tag <> '' then a_tag := True; end else Tag := Tag + s[i]; end else tagparams := tagparams + s[i]; end; end; procedure GetTagParams(tagparams: string; var params: TStringList); var i: Integer; s: string; gleich: Boolean; // kontrolliert ob nach dem zeichen bis zum nächsten zeichen ausser // leerzeichen ein Ist-Gleich-Zeichen kommt function notGleich(s: string; i: Integer): Boolean; begin notGleich := True; while i <= Length(s) do begin Inc(i); if s[i] = '=' then begin notGleich := False; Exit; end else if s[i] <> ' ' then Exit; end; end; begin Params.Clear; s := ''; for i := 1 to Length(tagparams) do begin if (tagparams[i] <> ' ') then begin if tagparams[i] <> '=' then gleich := False; if (tagparams[i] <> '''') and (tagparams[i] <> '"') then s := s + tagparams[i] end else begin if (notGleich(tagparams, i)) and (not Gleich) then begin params.Add(s); s := ''; end else Gleich := True; end; end; params.Add(s); end; function HtmlToColor(Color: string): TColor; begin Result := StringToColor('$' + Copy(Color, 6, 2) + Copy(Color, 4, 2) + Copy(Color, 2, 2)); end; procedure TransformSpecialChars(var s: string; i: Integer); var c: string; z, z2: Byte; i2: Integer; const nchars = 9; chars: array[1..nchars, 1..2] of string = (('Ö', 'Ö'), ('ö', 'ö'), ('Ä', 'Ä'), ('ä', 'ä'), ('Ü', 'Ü'), ('¨¹', '¨¹'), ('ß', 'ß'), ('<', '<'), ('>', '>')); begin // Maximal die nächsten 7 zeichen auf sonderzeichen ¨¹berpr¨¹fen c := ''; i2 := i; for z := 1 to 7 do begin c := c + s[i2]; for z2 := 1 to nchars do begin if chars[z2, 1] = c then begin Delete(s, i, Length(c)); Insert(chars[z2, 2], s, i); Exit; end; end; Inc(i2); end; end; // HtmlTag Schriftgröße in pdf größe umwandeln function CalculateRTFSize(pt: Integer): Integer; begin case pt of 1: Result := 6; 2: Result := 9; 3: Result := 12; 4: Result := 15; 5: Result := 18; 6: Result := 22; else Result := 30; end; end; // Die Font-Stack Funktionen type fontstack = record Font: array[1..100] of tfont; Pos: Byte; end; procedure CreateFontStack(var s: fontstack); begin s.Pos := 0; end; procedure PushFontStack(var s: Fontstack; fnt: TFont); begin Inc(s.Pos); s.Font[s.Pos] := TFont.Create; s.Font[s.Pos].Assign(fnt); end; procedure PopFontStack(var s: Fontstack; var fnt: TFont); begin if (s.Font[s.Pos] <> nil) and (s.Pos > 0) then begin fnt.Assign(s.Font[s.Pos]); // vom stack nehmen s.Font[s.Pos].Free; Dec(s.Pos); end; end;
procedure FreeFontStack(var s: Fontstack); begin while s.Pos > 0 do begin s.Font[s.Pos].Free; Dec(s.Pos); end; end; var fo_cnt: array[1..1000] of tfont; fo_liste: array[1..1000] of Boolean; fo_pos: TStringList; fo_stk: FontStack; wordwrap, liste: Boolean; begin CreateFontStack(fo_Stk); fo_Pos := TStringList.Create; rtf.Lines.BeginUpdate; rtf.Lines.Clear; // Das wordwrap vom richedit merken wordwrap := rtf.wordwrap; rtf.WordWrap := False; // erste Zeile hinzuf¨¹gen rtf.Lines.Add(''); Params := TStringList.Create; cfont := TFont.Create; cfont.Assign(rtf.Font); i := 1; row := 0; Liste := False; // Den eigentlichen Text holen und die Formatiorung merken rtf.selstart := 0; if Length(html) = 0 then Exit; repeat; if html[i] = '<' then begin dummy := i; GetTag(html, i, Tag, tagparams); GetTagParams(tagparams, params); // Das Font-Tag if Uppercase(Tag) = 'FONT' then begin // Schrift auf fontstack sichern pushFontstack(fo_stk, cfont); if params.Values['size'] <> '' then cfont.Size := CalculateRTFSize(StrToInt(params.Values['size'])); if params.Values['color'] <> '' then cfont.Color := htmltocolor(params.Values['color']); end else if Uppercase(Tag) = '/FONT' then popFontstack(fo_stk, cfont) else // Die H-Tags-Überschriften if Uppercase(Tag) = 'H1' then begin // Schrift auf fontstack sichern pushFontstack(fo_stk, cfont); cfont.Size := 6; end else if Uppercase(Tag) = '/H1' then popFontstack(fo_stk, cfont) else // Die H-Tags-Überschriften if Uppercase(Tag) = 'H2' then begin // Schrift auf fontstack sichern pushFontstack(fo_stk, cfont); cfont.Size := 9; end else if Uppercase(Tag) = '/H2' then popFontstack(fo_stk, cfont) else // Die H-Tags-Überschriften if Uppercase(Tag) = 'H3' then begin // Schrift auf fontstack sichern pushFontstack(fo_stk, cfont); cfont.Size := 12; end else if Uppercase(Tag) = '/H3' then popFontstack(fo_stk, cfont) else // Die H-Tags-Überschriften if Uppercase(Tag) = 'H4' then begin // Schrift auf fontstack sichern pushFontstack(fo_stk, cfont); cfont.Size := 15; end else if Uppercase(Tag) = '/H4' then popFontstack(fo_stk, cfont) else // Die H-Tags-Überschriften if Uppercase(Tag) = 'H5' then begin // Schrift auf fontstack sichern pushFontstack(fo_stk, cfont); cfont.Size := 18; end else if Uppercase(Tag) = '/H5' then popFontstack(fo_stk, cfont) else // Die H-Tags-Überschriften if Uppercase(Tag) = 'H6' then begin // Schrift auf fontstack sichern pushFontstack(fo_stk, cfont); cfont.Size := 22; end else if Uppercase(Tag) = '/H6' then popFontstack(fo_stk, cfont) else // Die H-Tags-Überschriften if Uppercase(Tag) = 'H7' then begin // Schrift auf fontstack sichern pushFontstack(fo_stk, cfont); cfont.Size := 27; end else if Uppercase(Tag) = '/H7' then popFontstack(fo_stk, cfont) else // Bold-Tag if Uppercase(Tag) = 'B' then cfont.Style := cfont.Style + [fsbold] else if Uppercase(Tag) = '/B' then cfont.Style := cfont.Style - [fsbold] else // Italic-Tag if Uppercase(Tag) = 'I' then cfont.Style := cfont.Style + [fsitalic] else if Uppercase(Tag) = '/I' then cfont.Style := cfont.Style - [fsitalic] else // underline-Tag if Uppercase(Tag) = 'U' then cfont.Style := cfont.Style + [fsunderline] else if Uppercase(Tag) = '/U' then cfont.Style := cfont.Style - [fsunderline] else // underline-Tag if Uppercase(Tag) = 'UL' then liste := True else if Uppercase(Tag) = '/UL' then begin liste := False; rtf.Lines.Add(''); Inc(row); rtf.Lines.Add(''); Inc(row); end else // BR - Breakrow tag if (Uppercase(Tag) = 'BR') or (Uppercase(Tag) = 'LI') then begin rtf.Lines.Add(''); Inc(row); end; // unbekanntes tag als text ausgeben // else rtf.Lines[row]:=RTF.lines[row]+'<'+tag+' '+tagparams+'>'; fo_pos.Add(IntToStr(rtf.selstart)); fo_cnt[fo_pos.Count] := TFont.Create; fo_cnt[fo_pos.Count].Assign(cfont); fo_liste[fo_pos.Count] := liste; end else begin // Spezialzeichen ¨¹bersetzen if html[i] = '&' then Transformspecialchars(html, i); if (Ord(html[i]) <> 13) and (Ord(html[i]) <> 10) then rtf.Lines[row] := RTF.Lines[row] + html[i]; end; Inc(i); until i >= Length(html); // dummy eintragen fo_pos.Add('999999'); // Den fertigen Text formatieren for i := 0 to fo_pos.Count - 2 do begin rtf.SelStart := StrToInt(fo_pos[i]); rtf.SelLength := StrToInt(fo_pos[i + 1]) - rtf.SelStart; rtf.SelAttributes.Style := fo_cnt[i + 1].Style; rtf.SelAttributes.Size := fo_cnt[i + 1].Size; rtf.SelAttributes.Color := fo_cnt[i + 1].Color; // die font wieder freigeben; fo_cnt[i + 1].Free; end; // die Paragraphen also Listen setzen i := 0; while i <= fo_pos.Count - 2 do begin if fo_liste[i + 1] then begin rtf.SelStart := StrToInt(fo_pos[i + 1]); while fo_liste[i + 1] do Inc(i); rtf.SelLength := StrToInt(fo_pos[i - 1]) - rtf.SelStart; rtf.Paragraph.Numbering := nsBullet; end; Inc(i); end; rtf.Lines.EndUpdate; Params.Free; cfont.Free; rtf.WordWrap := wordwrap; FreeFontStack(fo_stk); end;
谢谢 fei19790920(饭桶的马甲(抵制日货)) 不过我需要的最好还是有一个控件
你去www.2ccc.com,这里边有很多控件
{$D-,L-,Y-} unit rtf2html;(* --------------------------------------------------------------------------------Revision history: Nb. Date: Author What was done. 003 ? ? ? 002 21-aug-1997 TSE Minor (very minor) cleanup before release 001 20-aug-1997 TSE Unit created - RtfToHTML function designed and written.Author list: TSE Troels Skovmand Eriksen [email protected] [email protected] ? ? ?Supported features: rev. 001 Indents, Bullets, Left-, Centered & Rightjustified text, Text styles (bold, italics and underline), Fonts (face, size, color). rev. 002 - do - rev. 3 ?-------------------------------------------------------------------------------- This unit and all procedures and functions herein is released as freeware. Any components or units created using this unit or portions hereoff must be released as freeware (This does not cover applications - they may be shareware/commercial as needed). Part of the function RtfToHTML may be covered by some obscure Microsoft copyright since it reads the RTF format - check it out yourself, if you do something worthwhile. Please let this preface stay if you publish a changed/updated version of this unit and write all changes the "Revision history" section above. Who-Dun-it information should be inserted in the "Author list" and the "Supported features" section should be updated. This makes it easier to pass the blame :-) I'm finished with this unit for now - but please email a copy of any changes you make to me - Troels S Eriksen.-------------------------------------------------------------------------------- The following should be fixed - if anybody want to do it? * Should be rewritten into a conversion class - could be tricky, since it seems like a stream only contains 4 Kb at a time ... * Code should be cleaned up - this below is not that fast ... * The indents (\li###) should be translated to <BLOCKQUOTE> or just a <UL> with no <LI> elements. * The hanging paragraphs should be translated to definitionlists ( the <DL COMPACT> <DT> term <DD> definition </DL> structure ) * Tabs (\tab) should be fixed somehow ( heck, I DO want a <TAB> code ! ) * Embedded objects / pictures should be converted to .gif's - I know it's possible * Some nice way to handle links ( the way .rtf-sources for helpfiles do ? ) * A even more nice way of handling tables - could fix the indent / tab-problems as well-------------------------------------------------------------------------------- The idea and logic behind this weirdo function : Well, the idea was to write a pascal RTF-2-HTML converter which doesn't just do some "search and replace" - but actually converts the dammed stuff. Since there's a difference between HTML and RTF in the code-sequencing, I decided to try storing all paragraph and textformatting information in two records (PARFMT and TXTFMT) and only write the contents of these to the output "stream" when needed. This first attempt is successfull - not highly, but it'll convert the contents of a TRichEdit control and most other .rtf documents to HTML and keep the general layout.Enjoy it Troels S Eriksen, Den--------------------------------------------------------------------------------*) {$LONGSTRINGS ON}interface function RtfToHtml(const rtf:string):string;implementation uses Classes, SysUtils;function RtfToHtml(const rtf:string):string;type TState = record FntTbl : boolean; ColTbl : boolean; FntLst, ColLst : TStringList; end; TPARFMT = record Alignment : TAlignment; { h鴍re, venstre, centreret tekst } Bullets : integer; { Skriv bulletliste <UL> = 1 Skriv element <LI> = 2 Skriv element slut </LI> = 3 Skriv liste slut </UL> = 4 } Written : boolean; { true hvis skrevet til streng } end; TTXTFMT = record ChangeF : boolean; DefFont : integer; Font : integer; Fontsize : integer; Color : integer; Bold : integer; Italics : integer; Underline : integer; Written : boolean; end;var indx : integer; // index i rtf-streng ParFmt : TParFmt; TxtFmt : TTxtFmt; State : TState; Group : integer; Col : string[10]; Fnt : string[63]; procedure WriteChar(c:Char); var S : string; begin s:=''; // First - get ready to write paragraph formatting With PARFMT do if not Written then begin // TextAttr's must be off before starting a new paragraph { add "uses forms" to the implementation or interface statement, then call application.processmessages here - this would allow you to work the application interface will saving a large file. } With TXTFMT do begin if bold>1 then begin s:=s+'</B>'; if bold=3 then bold:=0; end; if italics>1 then begin s:=s+'</I>'; if italics=3 then Italics:=0; end; if underline>1 then begin s:=s+'</U>'; if underline=3 then Underline:=0; end; end; { Write either bulletlist or left-, center, rightjustified paragraph (doing it this way makes bulletlists leftjustified no matter what) } case Bullets of 0 : case Alignment of taLeftJustify : s:=s+#13#10'<P>'; taRightJustify: s:=s+#13#10'<P ALIGN=RIGHT>'; taCenter : s:=s+#13#10'<P ALIGN=CENTER>'; end; 1 : s:=s+#13#10'<UL>'; 2 : s:=s+#13#10'<LI>'; 3 : s:=s+'</LI>'; 4 : begin s:=s+#13#10'</UL>'; Bullets:=0; end; 5 : begin s:=s+'<BR>'#13#10#160#32#160#32#160; Bullets:=0; end; end;
// If any textattr's was on before - they are re-enabled With TXTFMT do begin If Bold=2 then s:=s+'<B>'; If Italics=2 then s:=s+'<I>'; If Underline=2 then s:=s+'<U>'; end; Written:=TRUE; end; { PARFMT } // Second - Write any textattr's With TXTFMT do if not written then begin // If font has changed - write it If changeF then begin s:=s+'<FONT FACE="'+state.fntlst.strings[Font]+ '" COLOR="'+state.collst.strings[Color]+ '" SIZE="'+IntToStr(FontSize)+'">'; ChangeF:=FALSE; end; // If any textattr's should be written - do it case Bold of 1 : begin s:=s+'<B>'; bold:=2; end; 3 : begin s:=s+'</B>'; Bold:=0; end; end; case Italics of 1 : begin s:=s+'<I>'; Italics:=2; end; 3 : begin s:=s+'</I>'; Italics:=0; end; end; case Underline of 1 : begin s:=s+'<U>'; Underline:=2; end; 3 : begin s:=s+'</U>'; Underline:=0; end; end; Written:=TRUE; end; // At last - write the character it self case c of #0 : result:=result+s; // Writes pending codes only #9 : result:=result+s+#9; // Writes tab char '>' : result:=result+s+'>'; // Writes "greater than" '<' : result:=result+s+'<'; // Writes "less than" else result:=result+s+c; // Writes a character end; end; { WriteChar } function Resolve(c:char):integer; { Convert char to integer value - used to decode \'## to an ansi-value } begin case byte(c) of 48..57 : Result:=byte(c)-48; 65..70 : Result:=byte(c)-55; else Result:=0; end; end; { resolve } function CollectCode(i:integer):integer; var Value, Keyword : string; a : integer; begin KeyWord:=''; // First - check if keyword is any "special" keyword or is a normal one ... case rtf[i+1] of '*' : begin // Ignorre to end of group a:=group; repeat case rtf[i] of '{' : inc(group); '}' : dec(group); end; inc(i); until (group+1)=a; result:=i-1; end; #39 : begin // Decode hex value WriteChar(char(resolve(upcase(rtf[i+2]))*16+resolve(upcase(rtf[i+3])))); Inc(i,3); result:=i; end; '\','{','}' : begin // Return special character WriteChar(rtf[i+1]); inc(i); result:=i; end; else begin // First - get keyword ... repeat keyword:=keyword+rtf[i]; inc(i); until (rtf[i] in ['{','\','}',' ',';','-','0'..'9']); // Second - get any value following ... Value :=''; While (rtf[i] in ['a'..'z','-','0'..'9']) do begin value:=value+rtf[i]; inc(i); end; if rtf[i]=' ' then inc(i); while (rtf[i] in ['{','}',';']) do inc(i); result:=i-1; { Check which keyword and what to do - NB: Test shows that using IF THEN ELSE .. is approx. 10% more efficient than calling EXIT } if keyword='\par' then with PARFMT do begin // New paragraph or bullet item if Bullets=2 then Bullets:=3; Written:=FALSE; end else if keyword='\f' then case state.fnttbl of true : begin // Make fontlist fnt:=''; While rtf[i]<>' ' do inc(i); // Ignore fontfamily info etc inc(i); While rtf[i]<>';' do begin // Read font name Fnt:=Fnt+rtf[i]; inc(i); end; dec(group); // Stop group result:=i+1; // Move one beyond group end State.FntLst.Add(Fnt); // Add fontname to fontlist end; { true } false: With TXTFMT do begin // Use fontlist a:=StrToIntDef(value,0); if font<>a then begin // Change Textattr's to new font ChangeF:=TRUE; Written:=FALSE; FONT :=a; end; end; { false } end else if keyword='\plain' then with TXTFMT do begin // Zero textattr's If bold=2 then Bold:=3; If Italics=2 then Italics:=3; If Underline=2 then Underline:=3; if (bold=3) or (italics=3) or (underline=3) or (Color<>0) then begin color:=0; Written:=FALSE; WriteChar(#0); end; end else if keyword='\fs' then with TXTFMT do begin // Change fontsize case StrToIntDef(value,11) div 2 of 1.. 5 : a:=1; 6.. 9 : a:=2; 10..11 : a:=3; 12..13 : a:=4; 14..15 : a:=5; else a:=6; end;
if a<>Fontsize then begin Written:=False; Fontsize:=a; ChangeF:=TRUE; end; end else if keyword='\tab' then begin WriteChar(#9); end else if keyword='\ul' then with TXTFMT do begin // Set underline Written:=FALSE; if underline=0 then Underline:=1; end else if keyword='\b' then with TXTFMT do begin // Set bold Written:=FALSE; if bold=0 then Bold:=1; end else if keyword='\i' then with TXTFMT do begin // Set italics Written:=FALSE; if italics=0 then Italics:=1; end else if keyword='\cf' then with TXTFMT do begin // Change fontcolor a:=StrToIntDef(value,0); If Color<>a then begin Written:=FALSE; ChangeF:=TRUE; Color:=a; end; end else if keyword='\qc' then begin // Set paragraphformat (center) PARFMT.Alignment:=taCenter; PARFMT.Written:=FALSE; end else if keyword='\qr' then begin // Set paragraphformat (right) PARFMT.Alignment:=taRightJustify; PARFMT.Written:=FALSE; end else if keyword='\pntext' then with PARFMT do begin // Start bullet list item Written :=FALSE; Bullets :=2; a:=group; repeat case rtf[i] of '{' : inc(group); '}' : dec(group); end; inc(i); until (group+1)=a; result:=i-1; end else if keyword='\fi' then with PARFMT do begin // Start bullet list Written :=FALSE; Bullets :=1; WriteChar(#0); end else if keyword='\pard' then with PARFMT do begin // Stop paragraph / Bulletlist Alignment:=taLeftJustify; If Bullets>0 then Bullets:=4; Written:=FALSE; end else if keyword='\red' then begin col:='#'+IntToHex(StrToIntDef(value,255),2); // Get Red color end else if keyword='\green' then begin col:=col+IntToHex(StrToIntDef(value,255),2); // Get Green color end else if keyword='\blue' then begin col:=col+IntToHex(StrToIntDef(value,255),2); // Get blue color State.ColLst.Add(col); // Add RGB in colorlist end else if keyword='\deff' then with TXTFMT do begin DefFont:=StrToIntDef(value,0); // Default font end else if keyword='\fonttbl' then begin state.fnttbl:=true; // Create font-list end else if keyword='\colortbl' then begin state.coltbl:=true; // Create color-list end else if keyword='\deflang' then begin state.fnttbl:=False; // Update is finished With PARFMT do begin // Setup paragraphformat Alignment:=taLeftJustify; Written:=false; Bullets:=0; end; With TXTFMT do begin // Setup font-format Font :=DefFont; Fontsize :=3; Color :=0; Bold :=0; Italics :=0; Underline :=0; Written :=false; end; state.coltbl:=True; // Update is finished end; { last if then } end; { case else } end; end; { collectcode } function CleanUp(s:string):string; // This could be done without, but - hey - it's nice var a : integer; begin // Nice up any empty <P>aragraph statements While pos(#13#10'<P>'#13#10'<P',s)>0 do begin a:=pos(#13#10'<P>'#13#10'<P',s); system.delete(s,a,6); system.insert('</P>',s,a); end; result:=s; end; { cleanup }var crsr : integer;begin try State.FntLst:=TstringList.Create; // Create fontlist State.ColLst:=TstringList.Create; // Create colorlist indx:=0; result:=''; repeat inc(indx); case rtf[indx] of #0..#31 : ; // Ascii ctrl-char - ignorre '{' : Inc(group); '}' : Dec(group); '\' : indx:=collectcode(indx); // Code found - the fun starts ... else begin WriteChar(rtf[indx]); // Write char and any pending html-codes ... Inc(indx); // Speedwrite normal chars till next special one while (indx<length(rtf)) and not (rtf[indx] in ['{','}','\','<','>',#00..#31]) do begin result:=result+rtf[indx]; inc(indx); end; dec(indx); end; end; until indx=length(rtf); finally result:=cleanup(result); // Return the HTML document State.FntLst.free; State.ColLst.free; end; end;end.
procedure HTMLtoRTF(html: string; var rtf: TRichedit);
var
i, dummy, row: Integer;
cfont: TFont; { Standard sschrift }
Tag, tagparams: string;
params: TStringList; function GetTag(s: string; var i: Integer; var Tag, tagparams: string): Boolean;
var
a_tag: Boolean;
begin
GetTag := False;
Tag := '';
tagparams := '';
a_tag := False; while i <= Length(s) do
begin
Inc(i);
// es wird nochein tag geöffnet --> das erste war kein tag;
if s[i] = '<' then
begin
GetTag := False;
Exit;
end; if s[i] = '>' then
begin
GetTag := True;
Exit;
end; if not a_tag then
begin
if s[i] = ' ' then
begin
if Tag <> '' then a_tag := True;
end
else
Tag := Tag + s[i];
end
else
tagparams := tagparams + s[i];
end;
end; procedure GetTagParams(tagparams: string; var params: TStringList);
var
i: Integer;
s: string;
gleich: Boolean; // kontrolliert ob nach dem zeichen bis zum nächsten zeichen ausser
// leerzeichen ein Ist-Gleich-Zeichen kommt
function notGleich(s: string; i: Integer): Boolean;
begin
notGleich := True;
while i <= Length(s) do
begin
Inc(i);
if s[i] = '=' then
begin
notGleich := False;
Exit;
end
else if s[i] <> ' ' then Exit;
end;
end;
begin
Params.Clear;
s := '';
for i := 1 to Length(tagparams) do
begin
if (tagparams[i] <> ' ') then
begin
if tagparams[i] <> '=' then gleich := False;
if (tagparams[i] <> '''') and (tagparams[i] <> '"') then s := s + tagparams[i]
end
else
begin
if (notGleich(tagparams, i)) and (not Gleich) then
begin
params.Add(s);
s := '';
end
else
Gleich := True;
end;
end;
params.Add(s);
end; function HtmlToColor(Color: string): TColor;
begin
Result := StringToColor('$' + Copy(Color, 6, 2) + Copy(Color, 4,
2) + Copy(Color, 2, 2));
end; procedure TransformSpecialChars(var s: string; i: Integer);
var
c: string;
z, z2: Byte;
i2: Integer;
const
nchars = 9;
chars: array[1..nchars, 1..2] of string =
(('Ö', 'Ö'), ('ö', 'ö'), ('Ä', 'Ä'), ('ä', 'ä'),
('Ü', 'Ü'), ('¨¹', '¨¹'), ('ß', 'ß'), ('<', '<'),
('>', '>'));
begin
// Maximal die nächsten 7 zeichen auf sonderzeichen ¨¹berpr¨¹fen
c := '';
i2 := i;
for z := 1 to 7 do
begin
c := c + s[i2];
for z2 := 1 to nchars do
begin
if chars[z2, 1] = c then
begin
Delete(s, i, Length(c));
Insert(chars[z2, 2], s, i);
Exit;
end;
end;
Inc(i2);
end;
end; // HtmlTag Schriftgröße in pdf größe umwandeln
function CalculateRTFSize(pt: Integer): Integer;
begin
case pt of
1: Result := 6;
2: Result := 9;
3: Result := 12;
4: Result := 15;
5: Result := 18;
6: Result := 22;
else
Result := 30;
end;
end; // Die Font-Stack Funktionen
type
fontstack = record
Font: array[1..100] of tfont;
Pos: Byte;
end; procedure CreateFontStack(var s: fontstack);
begin
s.Pos := 0;
end; procedure PushFontStack(var s: Fontstack; fnt: TFont);
begin
Inc(s.Pos);
s.Font[s.Pos] := TFont.Create;
s.Font[s.Pos].Assign(fnt);
end; procedure PopFontStack(var s: Fontstack; var fnt: TFont);
begin
if (s.Font[s.Pos] <> nil) and (s.Pos > 0) then
begin
fnt.Assign(s.Font[s.Pos]);
// vom stack nehmen
s.Font[s.Pos].Free;
Dec(s.Pos);
end;
end;
begin
while s.Pos > 0 do
begin
s.Font[s.Pos].Free;
Dec(s.Pos);
end;
end;
var
fo_cnt: array[1..1000] of tfont;
fo_liste: array[1..1000] of Boolean;
fo_pos: TStringList;
fo_stk: FontStack;
wordwrap, liste: Boolean;
begin
CreateFontStack(fo_Stk); fo_Pos := TStringList.Create; rtf.Lines.BeginUpdate;
rtf.Lines.Clear;
// Das wordwrap vom richedit merken
wordwrap := rtf.wordwrap;
rtf.WordWrap := False; // erste Zeile hinzuf¨¹gen
rtf.Lines.Add('');
Params := TStringList.Create; cfont := TFont.Create;
cfont.Assign(rtf.Font); i := 1;
row := 0;
Liste := False;
// Den eigentlichen Text holen und die Formatiorung merken
rtf.selstart := 0;
if Length(html) = 0 then Exit;
repeat; if html[i] = '<' then
begin
dummy := i;
GetTag(html, i, Tag, tagparams);
GetTagParams(tagparams, params); // Das Font-Tag
if Uppercase(Tag) = 'FONT' then
begin
// Schrift auf fontstack sichern
pushFontstack(fo_stk, cfont);
if params.Values['size'] <> '' then
cfont.Size := CalculateRTFSize(StrToInt(params.Values['size'])); if params.Values['color'] <> '' then cfont.Color :=
htmltocolor(params.Values['color']);
end
else if Uppercase(Tag) = '/FONT' then popFontstack(fo_stk, cfont)
else // Die H-Tags-Überschriften
if Uppercase(Tag) = 'H1' then
begin
// Schrift auf fontstack sichern
pushFontstack(fo_stk, cfont);
cfont.Size := 6;
end
else if Uppercase(Tag) = '/H1' then popFontstack(fo_stk, cfont)
else // Die H-Tags-Überschriften
if Uppercase(Tag) = 'H2' then
begin
// Schrift auf fontstack sichern
pushFontstack(fo_stk, cfont);
cfont.Size := 9;
end
else if Uppercase(Tag) = '/H2' then popFontstack(fo_stk, cfont)
else // Die H-Tags-Überschriften
if Uppercase(Tag) = 'H3' then
begin
// Schrift auf fontstack sichern
pushFontstack(fo_stk, cfont);
cfont.Size := 12;
end
else if Uppercase(Tag) = '/H3' then popFontstack(fo_stk, cfont)
else // Die H-Tags-Überschriften
if Uppercase(Tag) = 'H4' then
begin
// Schrift auf fontstack sichern
pushFontstack(fo_stk, cfont);
cfont.Size := 15;
end
else if Uppercase(Tag) = '/H4' then popFontstack(fo_stk, cfont)
else // Die H-Tags-Überschriften
if Uppercase(Tag) = 'H5' then
begin
// Schrift auf fontstack sichern
pushFontstack(fo_stk, cfont);
cfont.Size := 18;
end
else if Uppercase(Tag) = '/H5' then popFontstack(fo_stk, cfont)
else // Die H-Tags-Überschriften
if Uppercase(Tag) = 'H6' then
begin
// Schrift auf fontstack sichern
pushFontstack(fo_stk, cfont);
cfont.Size := 22;
end
else if Uppercase(Tag) = '/H6' then popFontstack(fo_stk, cfont)
else // Die H-Tags-Überschriften
if Uppercase(Tag) = 'H7' then
begin
// Schrift auf fontstack sichern
pushFontstack(fo_stk, cfont);
cfont.Size := 27;
end
else if Uppercase(Tag) = '/H7' then popFontstack(fo_stk, cfont)
else // Bold-Tag if Uppercase(Tag) = 'B' then cfont.Style := cfont.Style + [fsbold]
else if Uppercase(Tag) = '/B' then cfont.Style := cfont.Style - [fsbold]
else // Italic-Tag if Uppercase(Tag) = 'I' then cfont.Style := cfont.Style + [fsitalic]
else if Uppercase(Tag) = '/I' then cfont.Style := cfont.Style - [fsitalic]
else // underline-Tag if Uppercase(Tag) = 'U' then cfont.Style := cfont.Style + [fsunderline]
else if Uppercase(Tag) = '/U' then cfont.Style := cfont.Style - [fsunderline]
else // underline-Tag if Uppercase(Tag) = 'UL' then liste := True
else if Uppercase(Tag) = '/UL' then
begin
liste := False;
rtf.Lines.Add('');
Inc(row);
rtf.Lines.Add('');
Inc(row);
end
else // BR - Breakrow tag if (Uppercase(Tag) = 'BR') or (Uppercase(Tag) = 'LI') then
begin
rtf.Lines.Add('');
Inc(row);
end; // unbekanntes tag als text ausgeben
// else rtf.Lines[row]:=RTF.lines[row]+'<'+tag+' '+tagparams+'>'; fo_pos.Add(IntToStr(rtf.selstart));
fo_cnt[fo_pos.Count] := TFont.Create;
fo_cnt[fo_pos.Count].Assign(cfont);
fo_liste[fo_pos.Count] := liste;
end
else
begin
// Spezialzeichen ¨¹bersetzen
if html[i] = '&' then Transformspecialchars(html, i); if (Ord(html[i]) <> 13) and (Ord(html[i]) <> 10) then
rtf.Lines[row] := RTF.Lines[row] + html[i];
end; Inc(i); until i >= Length(html);
// dummy eintragen
fo_pos.Add('999999'); // Den fertigen Text formatieren
for i := 0 to fo_pos.Count - 2 do
begin
rtf.SelStart := StrToInt(fo_pos[i]);
rtf.SelLength := StrToInt(fo_pos[i + 1]) - rtf.SelStart;
rtf.SelAttributes.Style := fo_cnt[i + 1].Style;
rtf.SelAttributes.Size := fo_cnt[i + 1].Size;
rtf.SelAttributes.Color := fo_cnt[i + 1].Color; // die font wieder freigeben;
fo_cnt[i + 1].Free;
end; // die Paragraphen also Listen setzen
i := 0;
while i <= fo_pos.Count - 2 do
begin
if fo_liste[i + 1] then
begin
rtf.SelStart := StrToInt(fo_pos[i + 1]);
while fo_liste[i + 1] do Inc(i);
rtf.SelLength := StrToInt(fo_pos[i - 1]) - rtf.SelStart;
rtf.Paragraph.Numbering := nsBullet;
end;
Inc(i);
end;
rtf.Lines.EndUpdate;
Params.Free;
cfont.Free;
rtf.WordWrap := wordwrap;
FreeFontStack(fo_stk);
end;
unit rtf2html;(*
--------------------------------------------------------------------------------Revision history:
Nb. Date: Author What was done.
003 ? ? ?
002 21-aug-1997 TSE Minor (very minor) cleanup before release
001 20-aug-1997 TSE Unit created - RtfToHTML function
designed and written.Author list:
TSE Troels Skovmand Eriksen [email protected]
[email protected]
? ? ?Supported features:
rev. 001 Indents, Bullets, Left-, Centered & Rightjustified text,
Text styles (bold, italics and underline),
Fonts (face, size, color).
rev. 002 - do -
rev. 3 ?-------------------------------------------------------------------------------- This unit and all procedures and functions herein is released as
freeware. Any components or units created using this unit or
portions hereoff must be released as freeware (This does not
cover applications - they may be shareware/commercial as needed). Part of the function RtfToHTML may be covered by some obscure
Microsoft copyright since it reads the RTF format - check it out
yourself, if you do something worthwhile. Please let this preface stay if you publish a changed/updated
version of this unit and write all changes the "Revision history"
section above. Who-Dun-it information should be inserted in the
"Author list" and the "Supported features" section should be updated.
This makes it easier to pass the blame :-) I'm finished with this unit for now - but please email a copy of
any changes you make to me - Troels S Eriksen.-------------------------------------------------------------------------------- The following should be fixed - if anybody want to do it? * Should be rewritten into a conversion class - could be tricky, since
it seems like a stream only contains 4 Kb at a time ...
* Code should be cleaned up - this below is not that fast ...
* The indents (\li###) should be translated to <BLOCKQUOTE> or
just a <UL> with no <LI> elements.
* The hanging paragraphs should be translated to definitionlists ( the
<DL COMPACT> <DT> term <DD> definition </DL> structure )
* Tabs (\tab) should be fixed somehow ( heck, I DO want
a <TAB> code ! )
* Embedded objects / pictures should be converted to .gif's
- I know it's possible
* Some nice way to handle links ( the way .rtf-sources for
helpfiles do ? )
* A even more nice way of handling tables - could fix the
indent / tab-problems as well-------------------------------------------------------------------------------- The idea and logic behind this weirdo function : Well, the idea was to write a pascal RTF-2-HTML converter which
doesn't just do some "search and replace" - but actually converts
the dammed stuff. Since there's a difference between HTML and RTF in the
code-sequencing, I decided to try storing all paragraph and
textformatting information in two records (PARFMT and
TXTFMT) and only write the contents of these to the output
"stream" when needed. This first attempt is successfull - not highly, but it'll convert
the contents of a TRichEdit control and most other .rtf documents
to HTML and keep the general layout.Enjoy it
Troels S Eriksen, Den--------------------------------------------------------------------------------*)
{$LONGSTRINGS ON}interface function RtfToHtml(const rtf:string):string;implementation
uses
Classes, SysUtils;function RtfToHtml(const rtf:string):string;type
TState = record
FntTbl : boolean;
ColTbl : boolean;
FntLst,
ColLst : TStringList;
end; TPARFMT = record
Alignment : TAlignment; { h鴍re, venstre, centreret tekst }
Bullets : integer; { Skriv bulletliste <UL> = 1
Skriv element <LI> = 2
Skriv element slut </LI> = 3
Skriv liste slut </UL> = 4 }
Written : boolean; { true hvis skrevet til streng }
end; TTXTFMT = record
ChangeF : boolean;
DefFont : integer;
Font : integer;
Fontsize : integer;
Color : integer;
Bold : integer;
Italics : integer;
Underline : integer;
Written : boolean;
end;var
indx : integer; // index i rtf-streng
ParFmt : TParFmt;
TxtFmt : TTxtFmt;
State : TState; Group : integer;
Col : string[10];
Fnt : string[63]; procedure WriteChar(c:Char);
var
S : string;
begin
s:='';
// First - get ready to write paragraph formatting
With PARFMT do if not Written then begin
// TextAttr's must be off before starting a new paragraph
{
add "uses forms" to the implementation or interface statement,
then call application.processmessages here - this would allow
you to work the application interface will saving a large file.
}
With TXTFMT do begin
if bold>1 then begin
s:=s+'</B>';
if bold=3 then bold:=0;
end;
if italics>1 then begin
s:=s+'</I>';
if italics=3 then Italics:=0;
end;
if underline>1 then begin
s:=s+'</U>';
if underline=3 then Underline:=0;
end;
end;
{ Write either bulletlist or left-, center, rightjustified paragraph
(doing it this way makes bulletlists leftjustified no matter what) }
case Bullets of
0 : case Alignment of
taLeftJustify : s:=s+#13#10'<P>';
taRightJustify: s:=s+#13#10'<P ALIGN=RIGHT>';
taCenter : s:=s+#13#10'<P ALIGN=CENTER>';
end;
1 : s:=s+#13#10'<UL>';
2 : s:=s+#13#10'<LI>';
3 : s:=s+'</LI>';
4 : begin
s:=s+#13#10'</UL>';
Bullets:=0;
end;
5 : begin
s:=s+'<BR>'#13#10#160#32#160#32#160;
Bullets:=0;
end;
end;
With TXTFMT do begin
If Bold=2 then s:=s+'<B>';
If Italics=2 then s:=s+'<I>';
If Underline=2 then s:=s+'<U>';
end;
Written:=TRUE;
end; { PARFMT }
// Second - Write any textattr's
With TXTFMT do if not written then begin
// If font has changed - write it
If changeF then begin
s:=s+'<FONT FACE="'+state.fntlst.strings[Font]+
'" COLOR="'+state.collst.strings[Color]+
'" SIZE="'+IntToStr(FontSize)+'">';
ChangeF:=FALSE;
end;
// If any textattr's should be written - do it
case Bold of
1 : begin
s:=s+'<B>';
bold:=2;
end;
3 : begin
s:=s+'</B>';
Bold:=0;
end;
end;
case Italics of
1 : begin
s:=s+'<I>';
Italics:=2;
end;
3 : begin
s:=s+'</I>';
Italics:=0;
end;
end;
case Underline of
1 : begin
s:=s+'<U>';
Underline:=2;
end;
3 : begin
s:=s+'</U>';
Underline:=0;
end;
end;
Written:=TRUE;
end;
// At last - write the character it self
case c of
#0 : result:=result+s; // Writes pending codes only
#9 : result:=result+s+#9; // Writes tab char
'>' : result:=result+s+'>'; // Writes "greater than"
'<' : result:=result+s+'<'; // Writes "less than"
else result:=result+s+c; // Writes a character
end;
end; { WriteChar } function Resolve(c:char):integer;
{ Convert char to integer value - used to decode \'## to an ansi-value }
begin
case byte(c) of
48..57 : Result:=byte(c)-48;
65..70 : Result:=byte(c)-55;
else Result:=0;
end;
end; { resolve } function CollectCode(i:integer):integer;
var
Value,
Keyword : string;
a : integer;
begin
KeyWord:='';
// First - check if keyword is any "special" keyword or is a normal one ...
case rtf[i+1] of
'*' : begin // Ignorre to end of group
a:=group;
repeat
case rtf[i] of
'{' : inc(group);
'}' : dec(group);
end;
inc(i);
until (group+1)=a;
result:=i-1;
end;
#39 : begin // Decode hex value
WriteChar(char(resolve(upcase(rtf[i+2]))*16+resolve(upcase(rtf[i+3]))));
Inc(i,3);
result:=i;
end;
'\','{','}' : begin // Return special character
WriteChar(rtf[i+1]);
inc(i);
result:=i;
end;
else begin
// First - get keyword ...
repeat
keyword:=keyword+rtf[i];
inc(i);
until (rtf[i] in ['{','\','}',' ',';','-','0'..'9']);
// Second - get any value following ...
Value :='';
While (rtf[i] in ['a'..'z','-','0'..'9']) do begin
value:=value+rtf[i];
inc(i);
end;
if rtf[i]=' ' then inc(i);
while (rtf[i] in ['{','}',';']) do inc(i);
result:=i-1;
{ Check which keyword and what to do - NB: Test shows that using
IF THEN ELSE .. is approx. 10% more efficient than calling EXIT }
if keyword='\par' then with PARFMT do begin
// New paragraph or bullet item
if Bullets=2 then Bullets:=3;
Written:=FALSE;
end else if keyword='\f' then case state.fnttbl of
true : begin // Make fontlist
fnt:='';
While rtf[i]<>' ' do inc(i); // Ignore fontfamily info etc
inc(i);
While rtf[i]<>';' do begin // Read font name
Fnt:=Fnt+rtf[i];
inc(i);
end;
dec(group); // Stop group
result:=i+1; // Move one beyond group end
State.FntLst.Add(Fnt); // Add fontname to fontlist
end; { true }
false: With TXTFMT do begin // Use fontlist
a:=StrToIntDef(value,0);
if font<>a then begin // Change Textattr's to new font
ChangeF:=TRUE;
Written:=FALSE;
FONT :=a;
end;
end; { false }
end else if keyword='\plain' then
with TXTFMT do begin // Zero textattr's
If bold=2 then Bold:=3;
If Italics=2 then Italics:=3;
If Underline=2 then Underline:=3;
if (bold=3) or (italics=3) or (underline=3) or (Color<>0) then begin
color:=0;
Written:=FALSE;
WriteChar(#0);
end;
end else if keyword='\fs' then with TXTFMT do begin // Change fontsize
case StrToIntDef(value,11) div 2 of
1.. 5 : a:=1;
6.. 9 : a:=2;
10..11 : a:=3;
12..13 : a:=4;
14..15 : a:=5;
else a:=6;
end;
Written:=False;
Fontsize:=a;
ChangeF:=TRUE;
end;
end else if keyword='\tab' then begin
WriteChar(#9);
end else if keyword='\ul' then with TXTFMT do begin // Set underline
Written:=FALSE;
if underline=0 then Underline:=1;
end else if keyword='\b' then with TXTFMT do begin // Set bold
Written:=FALSE;
if bold=0 then Bold:=1;
end else if keyword='\i' then with TXTFMT do begin // Set italics
Written:=FALSE;
if italics=0 then Italics:=1;
end else if keyword='\cf' then with TXTFMT do begin // Change fontcolor
a:=StrToIntDef(value,0);
If Color<>a then begin
Written:=FALSE;
ChangeF:=TRUE;
Color:=a;
end;
end else if keyword='\qc' then begin // Set paragraphformat (center)
PARFMT.Alignment:=taCenter;
PARFMT.Written:=FALSE;
end else if keyword='\qr' then begin // Set paragraphformat (right)
PARFMT.Alignment:=taRightJustify;
PARFMT.Written:=FALSE;
end else if keyword='\pntext' then
with PARFMT do begin // Start bullet list item
Written :=FALSE;
Bullets :=2;
a:=group;
repeat
case rtf[i] of
'{' : inc(group);
'}' : dec(group);
end;
inc(i);
until (group+1)=a;
result:=i-1;
end else if keyword='\fi' then with PARFMT do begin // Start bullet list
Written :=FALSE;
Bullets :=1;
WriteChar(#0);
end else if keyword='\pard' then
with PARFMT do begin // Stop paragraph / Bulletlist
Alignment:=taLeftJustify;
If Bullets>0 then
Bullets:=4;
Written:=FALSE;
end else if keyword='\red' then begin
col:='#'+IntToHex(StrToIntDef(value,255),2); // Get Red color
end else if keyword='\green' then begin
col:=col+IntToHex(StrToIntDef(value,255),2); // Get Green color
end else if keyword='\blue' then begin
col:=col+IntToHex(StrToIntDef(value,255),2); // Get blue color
State.ColLst.Add(col); // Add RGB in colorlist
end else if keyword='\deff' then with TXTFMT do begin
DefFont:=StrToIntDef(value,0); // Default font
end else if keyword='\fonttbl' then begin
state.fnttbl:=true; // Create font-list
end else if keyword='\colortbl' then begin
state.coltbl:=true; // Create color-list
end else if keyword='\deflang' then begin
state.fnttbl:=False; // Update is finished
With PARFMT do begin // Setup paragraphformat
Alignment:=taLeftJustify;
Written:=false;
Bullets:=0;
end;
With TXTFMT do begin // Setup font-format
Font :=DefFont;
Fontsize :=3;
Color :=0;
Bold :=0;
Italics :=0;
Underline :=0;
Written :=false;
end;
state.coltbl:=True; // Update is finished
end; { last if then }
end; { case else }
end;
end; { collectcode } function CleanUp(s:string):string;
// This could be done without, but - hey - it's nice
var
a : integer;
begin
// Nice up any empty <P>aragraph statements
While pos(#13#10'<P>'#13#10'<P',s)>0 do begin
a:=pos(#13#10'<P>'#13#10'<P',s);
system.delete(s,a,6);
system.insert('</P>',s,a);
end;
result:=s;
end; { cleanup }var
crsr : integer;begin
try
State.FntLst:=TstringList.Create; // Create fontlist
State.ColLst:=TstringList.Create; // Create colorlist
indx:=0;
result:='';
repeat
inc(indx);
case rtf[indx] of
#0..#31 : ; // Ascii ctrl-char - ignorre
'{' : Inc(group);
'}' : Dec(group);
'\' : indx:=collectcode(indx); // Code found - the fun starts ...
else begin
WriteChar(rtf[indx]); // Write char and any pending html-codes ...
Inc(indx); // Speedwrite normal chars till next special one
while (indx<length(rtf)) and
not (rtf[indx] in ['{','}','\','<','>',#00..#31]) do begin
result:=result+rtf[indx];
inc(indx);
end;
dec(indx);
end; end;
until indx=length(rtf);
finally
result:=cleanup(result); // Return the HTML document
State.FntLst.free;
State.ColLst.free;
end;
end;end.