using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Text; using System.Windows.Forms; using System.Net; using System.Drawing.Printing; using System.IO; using System.Text.RegularExpressions; using System.Web;namespace HtmlPrint { public partial class Form1 : Form { private Font printFont; private StreamReader streamToPrint; static string filePath; public Form1() { InitializeComponent(); filePath = "e:\\aa.html"; } // 字符过滤方法 public string NoHTML(string Htmlstring) {
http://community.csdn.net/
谢谢。。
{
System.Windows.Forms.WebBrowser wb = new System.Windows.Forms.WebBrowser();
wb.Navigate("about:blank");
wb.Document.Write(HtmlStr);
return wb.Document.Body.InnerText;
}
//过滤HTML TAG
function filterHtmlTag(htmlString){
var reg=/<[^>]*>|<\/[^>]*>/gm;
var myString=htmlString.replace(reg,"");
return myString;
}
我不用脚本实现。。要是C#源码。。
写带Using System.IO;
***
**
写个整文件可以不。。
这种事情我第一次弄。。
很不明白的地方。。谢谢。。
无误。。
public string NoHTML(string Htmlstring)
{ //删除脚本 &sect; Htmlstring = Htmlstring.Replace("§", "");
Htmlstring = Htmlstring.Replace("—", "-");
Htmlstring = Htmlstring.Replace("·", "."); Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase); //删除HTML Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
Htmlstring.Replace("<", "");
Htmlstring.Replace(">", ""); Htmlstring.Replace("\r\n", "");
//Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim(); return Htmlstring; }
string itemhtml = System.Text.RegularExpressions.Regex.Replace(items, "<[^>]+>", "");
MessageBox.Show(itemhtml);
{
System.Windows.Forms.WebBrowser wb = new System.Windows.Forms.WebBrowser();
wb.Navigate("about:blank");
wb.Document.Write(HtmlStr);
return wb.Document.Body.InnerText;
}HtmlStr为整个页面的源文件
Htmlstring = Regex.Replace(Htmlstring, @"<(?>[^>]*)>", string.Empty, RegexOptions.IgnoreCase);
如何获取 <body>中间的内容???</body>
谢谢
你好。。
请问一个String可以放1个文件的原码??
能放。。改怎么放。。
谢谢
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.Net;
using System.Drawing.Printing;
using System.IO;
using System.Text.RegularExpressions;
using System.Web;namespace HtmlPrint
{
public partial class Form1 : Form
{ private Font printFont;
private StreamReader streamToPrint;
static string filePath;
public Form1()
{
InitializeComponent();
filePath = "e:\\aa.html";
}
// 字符过滤方法 public string NoHTML(string Htmlstring)
{
//删除脚本 &sect; Htmlstring = Htmlstring.Replace("§", "");
Htmlstring = Htmlstring.Replace("—", "-");
Htmlstring = Htmlstring.Replace("·", "."); //Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase); //Htmlstring = Regex.Replace(Htmlstring,@"<head>.*</head>","",RegexOptions.IgnoreCase); //删除HTML //Htmlstring = Regex.Replace(Htmlstring, @"\<.*>", "", RegexOptions.IgnoreCase); //Htmlstring = Regex.Replace(Htmlstring, @"\<.*", "", RegexOptions.IgnoreCase);
//Htmlstring = Regex.Replace(Htmlstring, @".*>", "", RegexOptions.IgnoreCase);
//Htmlstring.Replace("\r\n", "");
//Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim(); Htmlstring = Regex.Replace(Htmlstring, @"<head>.*</head>", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"<[^>]+>", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @" %66", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" ", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"%66%66%66", "%66", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"%66%66%66", "%66", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"%66%66%66", "%66", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"%66", "\r\n", RegexOptions.IgnoreCase);
return Htmlstring;
}
private void button1_Click(object sender, EventArgs e)
{
this.Printing(); }
//打印
public void Printing()
{
try
{
streamToPrint = new StreamReader(filePath);
try
{
printFont = new Font("Arial", 10);
PrintDocument pd = new PrintDocument();
pd.PrintPage += new PrintPageEventHandler(pd_PrintPage);
// Print the document.
pd.Print();
}
finally
{
streamToPrint.Close();
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
// The PrintPage event is raised for each page to be printed.
private void pd_PrintPage(object sender, PrintPageEventArgs ev)
{
float linesPerPage = 0;
float yPos = 0;
int count = 0;
float leftMargin = ev.MarginBounds.Left;
float topMargin = ev.MarginBounds.Top;
String line = null; // Calculate the number of lines per page.
linesPerPage = ev.MarginBounds.Height /printFont.GetHeight(ev.Graphics); // Iterate over the file, printing each line.
while (count < linesPerPage &&
((line = streamToPrint.ReadLine()) != null))
{
yPos = topMargin + (count * printFont.GetHeight(ev.Graphics));
ev.Graphics.DrawString(line, printFont, Brushes.Black,leftMargin, yPos, new StringFormat());
count++;
} // If more lines exist, print another page.
if (line != null)
ev.HasMorePages = true;
else
ev.HasMorePages = false;
}
private void button2_Click(object sender, EventArgs e)
{
FileStream fs = new FileStream("e:\\ss.txt", FileMode.Create);
StreamWriter sw = new StreamWriter(fs);
StreamReader sr = new StreamReader("e:\\co_a001aft211.html"); box.Text = "文件正在导出!!!"; String buff = ""; try
{
String str_r = sr.ReadLine(); while (str_r != null) {
buff += str_r;
buff += "%66";
str_r = sr.ReadLine();
} sw.Write(this.NoHTML(buff));
MessageBox.Show(buff);
}
catch(Exception str_e) {
MessageBox.Show(str_e.Message);
}
sw.Flush();
fs.Flush();
fs.Close();
sr.Close();
box.Text = "文件导出成功!!!";
} private void button3_Click(object sender, EventArgs e)
{
OpenFileDialog op = new OpenFileDialog();
op.ShowDialog();
}
}
}
看好 html 源码 自己编写吧。。
他们都不知道具体结构没办法帮你。。