using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.IO; namespace ConsoleApplication5 { class Program { static void Main(string[] args) { var path = Console.ReadLine(); var s = string.Empty; change: Console.WriteLine(""); s = Console.ReadLine(); var st = DateTime.Now; var x = s == "a" ? all(path) : loop(path); Console.Write(x + "个文件 - 耗时" + DateTime.Now.Subtract(st).TotalMilliseconds + "ms"); goto change; } static int loop(string p) { var x = 0; foreach (string f in Directory.GetFiles(p)) { //Console.WriteLine(f); x++; } foreach (string f in Directory.GetDirectories(p)) { x += loop(f); } return x; } static int all(string p) { var x = 0; foreach (string f in Directory.GetFiles(p, "*", SearchOption.AllDirectories)) { //Console.WriteLine(f); x++; } return x; } } } 直接使用Directory.GetFiles (String, String, SearchOption) 方法获取所有文件确实比以 Directory.GetFiles (String)方法递归更快:
多谢诸位的热心帮助,在我刚看到你们的贴子时我的后台线程终于返回结果,时间大概是:18:20,第一个帖子的时间是16:53,用时1个小时多。现将调试贴图贴上供大家参考:贴图上有些地方剪掉了,请诸位谅解。不管怎么说问题解决了,时间太长了,处理这么多文件等待也值了。 部分代码如下: #Region " - 线程处理" Private _t As Thread Private Event event_thread_end(ByVal file_info_list() As FileInfo) Private Sub background_process(ByVal directory_info As DirectoryInfo) Dim di As DirectoryInfo Dim directory_name As String Dim fi As FileInfo Dim fi_list() As FileInfo Dim full_file_name As String full_file_name = "E:\madaming\Download\m00000001.jpg" ' fi = New FileInfo(full_file_name) directory_name = fi.DirectoryName di = New DirectoryInfo(directory_name) Try fi_list = di.GetFiles() System.Threading.Thread.Sleep(1000) RaiseEvent event_thread_end(fi_list) Catch ex As Exception RaiseEvent event_thread_exception(ex.Message) End Try End Sub Private Sub Button_start_thread_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button_start_thread.Click _t = New Thread(AddressOf background_process) _t.Start() End Sub '''''''' ... Private Sub Button_StopThread_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button_StopThread.Click _t.Abort() End Sub#End Region
{
//string[] files = System.IO.Directory.GetFiles(txtPath.Text, "*.xls", SearchOption.AllDirectories);
rtxtResult.Text = "";
m_path = txtPath.Text;
m_ext = txtExt.Text;
m_filesCount = 0;
m_bgworker = new BackgroundWorker();
m_bgworker.WorkerReportsProgress = true;
m_bgworker.WorkerSupportsCancellation = true;
m_bgworker.DoWork += new DoWorkEventHandler(FindFiles);
m_bgworker.ProgressChanged += new ProgressChangedEventHandler(ReportProgress);
m_bgworker.RunWorkerAsync(m_path);
} private void FindFiles(object sender, DoWorkEventArgs dwea)
{
BackgroundWorker _worker = sender as BackgroundWorker;
string _path = dwea.Argument.ToString();
ErgodicFilesInFolder(_worker, _path, m_ext);
} private void ReportProgress(object sender, ProgressChangedEventArgs e)
{
string _fliename = e.UserState.ToString();
rtxtResult.AppendText(_fliename + System.Environment.NewLine);
rtxtResult.ScrollToCaret();
m_filesCount++;
lblCount.Text = m_filesCount.ToString();
} DirectoryInfo _dir = null;
/// <summary>
/// 遍历指定路径下所有指定扩展名的文件
/// </summary>
/// <param name="path">指定路径</param>
/// <param name="ext">文件扩展名,不包含分隔符(.)</param>
/// <returns>返回所有文件绝对路径</returns>
private void ErgodicFilesInFolder(BackgroundWorker worker, string path, string extension)
{
//
if (worker.CancellationPending)
{
return;
}
FileSystemInfo[] _fsi = null;
try
{
_dir = new DirectoryInfo(path);
_fsi = _dir.GetFileSystemInfos();
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
return;
} FileInfo _fi;
try
{
for (int i = 0; i < _fsi.Length; i++)
{
if (!Directory.Exists(_fsi[i].FullName))
{//是文件
_fi = new FileInfo(_fsi[i].FullName);
//判断文件扩展名
if (m_ext.Length > 0)
{//要求扩展名
if (_fi.Extension == m_ext)
{
worker.ReportProgress(0, _fi.FullName);
}
}
else
{
worker.ReportProgress(0, _fi.FullName);
}
}
else
{//是目录,继续遍历
ErgodicFilesInFolder(worker, _fsi[i].FullName, extension);
}
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}//上下文“0x1a1978”已断开连接。正在从当前上下文(上下文 0x1a1808)释放接口。这可能会导致损坏或数据丢失。要避免此问题,请确保在应用程序全部完成 RuntimeCallableWrapper (表示其内部的 COM 组件)之前,所有上下文/单元都保持活动状态。
cd\
f:
F:\>dir /s /a *.* >1.txt呵呵,历遍搞定
FindFirstFile,FindNextFile和FindClose
参考
卷的序列号是 98A4-03B3 C:\Documents and Settings\Administrator 的目录2010-01-26 18:25 <DIR> .
2010-01-26 18:25 <DIR> ..
2009-08-24 10:48 <DIR> .Analyzer
2010-01-26 15:51 <DIR> Application Data
2009-05-25 12:09 <DIR> Contacts
2010-01-26 19:05 <DIR> Cookies
2010-01-26 18:26 568,038 dir.txt
2010-01-22 09:44 <DIR> Favorites
2009-11-10 08:32 <DIR> IECompatCache
2009-11-10 08:30 <DIR> IETldCache
2009-10-23 15:15 <DIR> Local Settings
2009-11-28 11:40 <DIR> My Documents
2010-01-20 18:57 <DIR> NetHood
2010-01-26 18:27 13,369,344 NTUSER.DAT
2010-01-26 19:07 1,024 ntuser.dat.LOG
2010-01-25 18:12 178 ntuser.ini
2009-04-30 10:26 <DIR> PrintHood
2009-11-10 08:32 <DIR> PrivacIE
2010-01-26 15:40 <DIR> Recent
2009-11-10 14:23 520 regwizard.log
2009-11-10 16:32 484 sanct.log
2009-12-23 18:01 <DIR> sdk
2009-10-24 08:42 <DIR> SendTo
2009-04-30 10:29 0 Sti_Trace.log
2009-05-19 11:08 <DIR> Templates
2009-05-03 12:53 <DIR> UserData
2009-06-08 14:58 <DIR> VSWebCache
2010-01-22 08:58 <DIR> 「开始」菜单
2010-01-26 18:45 <DIR> 桌面
7 个文件 13,939,588 字节 C:\Documents and Settings\Administrator\.Analyzer 的目录2009-08-24 10:48 <DIR> .
2009-08-24 10:48 <DIR> ..
2009-08-24 10:48 2,094 Analyzer.ini
2009-07-24 14:02 <DIR> conf
2009-07-24 14:02 <DIR> output
1 个文件 2,094 字节 C:\Documents and Settings\Administrator\.Analyzer\conf 的目录2009-07-24 14:02 <DIR> .
2009-07-24 14:02 <DIR> ..
2009-07-24 14:02 <DIR> e2emon
0 个文件 0 字节 C:\Documents and Settings\Administrator\.Analyzer\conf\e2emon 的目录这样势必需要对文本文件进行解析,不过的确是一种方法!
128174 个文件 31,381,968,536 字节
74585 个目录 12,700,237,824 可用字节生成的TXT 16MB 12W=16MB
1.2亿 = 16000Mb (大约10多G) (汗,呵呵,楼主的数亿个,可能不行,生成的TXT太大了.)
是啊,我只是遍历了C:\Documents and Settings\Administrator,生成的文件就有2.93M
就看楼主的了.
如果路径不太深,用dos命令也不差的,解析TXT也不难,呵呵
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
namespace ConsoleApplication5
{
class Program
{
static void Main(string[] args)
{
var path = Console.ReadLine();
var s = string.Empty;
change: Console.WriteLine("");
s = Console.ReadLine();
var st = DateTime.Now;
var x = s == "a" ? all(path) : loop(path);
Console.Write(x + "个文件 - 耗时" + DateTime.Now.Subtract(st).TotalMilliseconds + "ms");
goto change;
}
static int loop(string p)
{
var x = 0;
foreach (string f in Directory.GetFiles(p))
{
//Console.WriteLine(f);
x++;
}
foreach (string f in Directory.GetDirectories(p))
{
x += loop(f);
}
return x;
}
static int all(string p)
{
var x = 0;
foreach (string f in Directory.GetFiles(p, "*", SearchOption.AllDirectories))
{
//Console.WriteLine(f);
x++;
}
return x;
}
}
}
直接使用Directory.GetFiles (String, String, SearchOption) 方法获取所有文件确实比以 Directory.GetFiles (String)方法递归更快:
根据楼主说的有几亿个文件.
应该是内存溢出了...
楼主可以去循环10亿次 (_fliename + System.Environment.NewLine);
假设 一个FileName字长10个汉字
(10*2个字节+换行符) X 10亿 = 大约 19G
(粗略算的)
因此,肯定溢出 rtxtResult.AppendText(_fliename + System.Environment.NewLine);
rtxtResult.ScrollToCaret();
IO.Directory.GetDirectories()
IO.Directory.GetFiles()
来试试,一定通过。
如果是内存不足...改用Winapi的Findxxxx系列函数.
private void btnProcess_Click(object sender, EventArgs e)
{
//string[] files = System.IO.Directory.GetFiles(txtPath.Text, "*.xls", SearchOption.AllDirectories);
rtxtResult.Text = "";
m_path = txtPath.Text;
m_ext = txtExt.Text;
m_filesCount = 0;
m_bgworker = new BackgroundWorker();
m_bgworker.WorkerReportsProgress = true;
m_bgworker.WorkerSupportsCancellation = true;
m_bgworker.DoWork += new DoWorkEventHandler(FindFiles);
m_bgworker.ProgressChanged += new ProgressChangedEventHandler(ReportProgress);
m_bgworker.RunWorkerAsync(m_path);
} private void FindFiles(object sender, DoWorkEventArgs dwea)
{
BackgroundWorker _worker = sender as BackgroundWorker;
string _path = dwea.Argument.ToString();
ErgodicFilesInFolder(_worker, _path, m_ext);
} private void ReportProgress(object sender, ProgressChangedEventArgs e)
{
string _fliename = e.UserState.ToString(); rtxtResult.AppendText(_fliename + System.Environment.NewLine);
rtxtResult.ScrollToCaret(); //由于每次完成一个文件查找,立即调用一次,使得出现异常(第一次去掉后正常)
m_filesCount++;
lblCount.Text = m_filesCount.ToString();
} /// <summary>
/// 遍历指定路径下所有指定扩展名的文件
/// </summary>
/// <param name="path">指定路径</param>
/// <param name="ext">文件扩展名,不包含分隔符(.)</param>
/// <returns>返回所有文件绝对路径</returns>
private void ErgodicFilesInFolder(BackgroundWorker worker, string path, string extension)
{
//
if (worker.CancellationPending)
{
return;
}
FileSystemInfo[] _fsi = null;
DirectoryInfo[] _dsi = null;
try
{
DirectoryInfo _dir = new DirectoryInfo(path);
_fsi = _dir.GetFiles();
_dsi = _dir.GetDirectories();
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
return;
} try
{
for (int i = 0; i < _fsi.Length; i++)
{
//if (!Directory.Exists(_fsi[i].FullName)) //文件和目录是分开的,不用每次区分
//{//是文件
// _fi = new FileInfo(_fsi[i].FullName); //每次实例化FileInfo对象,使得GC有太多对象去回收,造成错误(猜测)
// //判断文件扩展名
// if (m_ext.Length > 0)
// {//要求扩展名
// if (_fi.Extension == m_ext)
// {
// worker.ReportProgress(0, _fi.FullName);
// }
// }
// else
// {
// worker.ReportProgress(0, _fi.FullName);
// }
//}
//else
//{//是目录,继续遍历
// ErgodicFilesInFolder(worker, _fsi[i].FullName, extension);
//}
if (_fsi[i].Extension == m_ext)
{
worker.ReportProgress(0, _fsi[i].FullName);
}
};
for (int i = 0; i < _dsi.Length; i++)
{
ErgodicFilesInFolder(worker,_dsi[i].FullName,extension);
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
} private void button5_Click(object sender, EventArgs e)
{
if (m_bgworker != null)
{
m_bgworker.CancelAsync();
m_bgworker = null;
}
this.rtxtResult.AppendText(this.rtxtResult.Text);
}
改后异常没有。如果你的文件确实多,试过我的代码还是有异常,就把rtxtResult.ScrollToCaret();
这句话去掉吧...考虑一开始提示开始搜索,然后线程回调的方式去掉提示吧....
如果去掉这句不会抛异常,不过没有用户界面提示(假死)我感觉如果文件很多的话,还是不要使用这种方式, 一开始提示加载(可以做个滚动条),然后调用搜索函数,最后函数回调,去掉加载提示。
1亿个文件 : (10*2 byte)* 1亿 = 20亿 byte = (20亿/1024*1024*1024)*8 = 15G
楼主,好像的确会溢出....(还是限制下搜索条件吧...或者用TreeView来加载文件夹内文件(动态加载))
public partial class frmMain : Form
{
/// <summary>
/// 路径
/// </summary>
private string m_path;
/// <summary>
/// 扩展名
/// </summary>
private string m_ext;
/// <summary>
/// 文件总数量
/// </summary>
private int m_filesCount;
/// <summary>
/// 后台线程
/// </summary>
private BackgroundWorker m_bgworker = null;
/// <summary>
/// 目录信息
/// </summary>
private DirectoryInfo m_dirInfo = null; private List<string> m_FileNames = new List<string>(); public frmMain()
{
InitializeComponent();
} private void btnOpenPath_Click(object sender, EventArgs e)
{
FolderBrowserDialog _fbd = new FolderBrowserDialog();
_fbd.ShowNewFolderButton = false;
if (_fbd.ShowDialog(this) == DialogResult.OK)
{
txtPath.Text = _fbd.SelectedPath;
}
} private void btnProcess_Click(object sender, EventArgs e)
{
m_FileNames.Clear();
rtxtResult.Text = "";
m_path = txtPath.Text;
m_ext = txtExt.Text;
m_filesCount = 0;
m_bgworker = new BackgroundWorker();
m_bgworker.WorkerReportsProgress = true;
m_bgworker.WorkerSupportsCancellation = true;
m_bgworker.DoWork += new DoWorkEventHandler(DoWork);
m_bgworker.ProgressChanged += new ProgressChangedEventHandler(ReportProgress);
m_bgworker.RunWorkerCompleted+=new RunWorkerCompletedEventHandler(RunWorkerCompleted);
m_bgworker.RunWorkerAsync();
} private void DoWork(object sender, DoWorkEventArgs dwea)
{
BackgroundWorker _worker = sender as BackgroundWorker;
ErgodicFile(_worker, m_path, m_ext);
} /// <summary>
/// 遍历指定路径下所有指定扩展名的文件
/// </summary>
/// <param name="path">指定路径</param>
/// <param name="ext">文件扩展名,不包含分隔符(.)</param>
/// <returns>返回所有文件绝对路径</returns>
private void ErgodicFile(BackgroundWorker worker, string path, string extension)
{
if (worker.CancellationPending)
{
return;
} Thread.Sleep(10);
FileSystemInfo[] _fsi = null;
try
{
m_dirInfo = new DirectoryInfo(path);
_fsi = m_dirInfo.GetFileSystemInfos();
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
return;
} FileInfo _fi;
try
{
for (int i = 0; i < _fsi.Length && !worker.CancellationPending; i++)
{
if (!Directory.Exists(_fsi[i].FullName))
{//是文件
_fi = new FileInfo(_fsi[i].FullName);
//判断文件扩展名
if (m_ext.Length > 0)
{//要求扩展名
if (_fi.Extension == m_ext)
{
m_filesCount++;
worker.ReportProgress(0, _fi.FullName);
m_FileNames.Add(_fi.FullName);
Thread.Sleep(10);
}
}
else
{
worker.ReportProgress(0, _fi.FullName);
m_FileNames.Add(_fi.FullName);
Thread.Sleep(10);
}
}
else
{//是目录,继续遍历
ErgodicFile(worker, _fsi[i].FullName, extension);
Thread.Sleep(10);
}
Thread.Sleep(10);
}
Thread.Sleep(10);
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
} private void RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
{
if(DialogResult.OK==MessageBox.Show("扫描完成,共找到"+m_FileNames.Count+"个文件,点击确定查看","文件扫描完成",MessageBoxButtons.OKCancel))
{}
} private void ReportProgress(object sender, ProgressChangedEventArgs e)
{
string _fileName = e.UserState.ToString();
rtxtResult.AppendText(_fileName + System.Environment.NewLine);
rtxtResult.ScrollToCaret();
lblCount.Text = rtxtResult.Lines.Length.ToString();
} private void btnStop_Click(object sender, EventArgs e)
{
if (m_bgworker != null && m_bgworker.WorkerSupportsCancellation)
{
m_bgworker.CancelAsync();
}
} private void btnStart_Click(object sender, EventArgs e)
{
btnStart.Enabled = false;
if (m_FileNames != null && m_FileNames.Count > 0)
{
frmFileRead _frm = new frmFileRead(m_FileNames);
_frm.ShowDialog(this);
}
btnStart.Enabled = true;
} }看来还是没有使用好多线程。大牛说的关于多线程最经典的论述:多线程的优劣/性能/系统开销线程创建之前
1.系统为线程分配并初始化一个线程内核对象;
2.系统为每个线程保留1MB的地址空间(按需提交)用于线程用户模式堆栈;
3.系统为线程分配12KB(左右)的地址空间用于线程的内核模式堆栈。
线程创建之后
4.Windows调用当前进程中的每个DLL都有的一个函数,用来通知进程中的所有DLL,操作系统创建了一个新的线程。
销毁一个线程时
5.当前进程中的所有DLL都要接收一个关于该线程即将"死亡"的通知;
6.线程的内核对象及创建时系统分配的堆栈需要释放。
如果某台计算机只有一个CPU的话,则在某一时刻只有一个线程可以运行。
Windows必须跟踪记录线程对象,而且不停地跟踪记录每个线程对象。
Windows必须决定CPU下一个次(每隔约20毫秒)调度那一个线程使其运行。
上下文切换(Context switch):Windows使CPU停止执行一个线程的代码,而开始执行另一个线程的代码的现象,我们称之为上下文切换。上下文切换的开销:1.进入内核模式;
2.将CPU的寄存器保存到当前正在执行的线程的内核对象中。
注明:X86架构下CPU寄存器占了大约700字节(Byte)的空间,X64架构下CPU寄存器大约占了1024(Byte)的空间,IA64架构下 CPU寄存器占了大约2500Byte的空间。
3.需要一个自旋锁(spin lock),确定下一次调度那一个线程,然后再释放该自旋锁。
如果下一次调度的线程属于同一个进程,哪么此处开销更大,因为OS必须先切换虚拟地址空间。
4.把即将要运行的线程的内核对象的地址加载到CPU寄存器中。
5.退出内核模式。
以上都是纯粹的开销,导致Windows和应用程序的执行速度比在单线程系统上的执行速度慢。
综上所述:应尽量限制线程的使用。
多线程的带来的好处:
1.健壮性。
此线程的错误不会影响彼线程。
2.可扩展性。
多个CPU情况下,可充分发挥多个CPU的优势。
登录名或邮箱:
密 码: 密码必填
校验码: 校验码必须填写!
重新获得验证码
2周内不用再登录
没有注册?点此处注册
部分代码如下:
#Region " - 线程处理"
Private _t As Thread
Private Event event_thread_end(ByVal file_info_list() As FileInfo) Private Sub background_process(ByVal directory_info As DirectoryInfo)
Dim di As DirectoryInfo
Dim directory_name As String
Dim fi As FileInfo
Dim fi_list() As FileInfo
Dim full_file_name As String full_file_name = "E:\madaming\Download\m00000001.jpg" '
fi = New FileInfo(full_file_name)
directory_name = fi.DirectoryName di = New DirectoryInfo(directory_name)
Try fi_list = di.GetFiles()
System.Threading.Thread.Sleep(1000) RaiseEvent event_thread_end(fi_list) Catch ex As Exception
RaiseEvent event_thread_exception(ex.Message)
End Try End Sub Private Sub Button_start_thread_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button_start_thread.Click _t = New Thread(AddressOf background_process)
_t.Start() End Sub
''''''''
... Private Sub Button_StopThread_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button_StopThread.Click
_t.Abort()
End Sub#End Region