我在写一个简单的搜索引擎,需要对一个包含1亿条数据的倒排文件进行二分查找我需要查找的数,但是采用二分查找的时候瓶颈总是在 FileStream.Seek 上,我进行了文件读写测试,读一个同样大小的文件,每次读出 1024 个字节要比每次读出 4 个字节快的多,但是我查找的时候只需要每次读出4个字节来判断就可以了,有没有更高效率的解决办法?

解决方案 »

  1.   

    test.if 是存储了1亿个无符号整形数的文件,我在 E8400 双核机器上测试,寻找 5万 个连续数字需要13秒的时间。
    Stopwatch sw = new Stopwatch();
                sw.Reset();
                sw.Start();            StringBuilder sb = new StringBuilder();
                for (uint m = 2000000; m < 2000000+10000; m++)
                {
                    uint search = m;                FileInfo di = new FileInfo(Application.StartupPath + "\\test.if");
                    int max = Convert.ToInt32(di.Length / 4) - 1;
                    int min = 0;
                    try
                    {
                        FileStream fs = new FileStream(Application.StartupPath + "\\test.if", FileMode.Open, FileAccess.Read);
                        byte[] buffer = new byte[4];                    int step = 0;
                        bool isExist = false;
                        int nextSearchIndex = min;
                        while (true)
                        {
                            step++;
                            fs.Seek(nextSearchIndex * 4, SeekOrigin.Begin);
                            fs.Read(buffer, 0, buffer.Length);                        uint currentNum = BitConverter.ToUInt32(buffer, 0);                        // 没找到
                            if (max == min || max == min + 1)
                                break;                        if (currentNum == search)
                            {
                                // 找到了
                                isExist = true;
                                break;
                            }
                            else if (currentNum > search)
                            {
                                max = nextSearchIndex;
                                nextSearchIndex = (int)((max - min) / 2) + min;
                            }
                            else if (currentNum < search)
                            {
                                min = nextSearchIndex;
                                nextSearchIndex = (int)((max - min) / 2) + min;
                            }
                        }                    fs.Close();                    if (isExist)
                        {
                            //sb.Append("找到:" + search + " 步数:" + step + "\r\n");
                        }
                        else
                        {
                            //sb.Append("没找到:" + search + " 步数:" + step + "\r\n");
                        }
                    }
                    catch (Exception ex)
                    {
                        MessageBox.Show(ex.Message);
                    }
                    finally
                    {
                    }
                }            sw.Stop();
                //MessageBox.Show();
                MessageBox.Show(sb.ToString() + "花费 " + sw.ElapsedMilliseconds.ToString());