Using FileStream.Seek

20,919

Solution 1

I'm confused by your expected positions, Line 5 at position 30 and 45, with Line 4 at 15, and 3 at 30?

Here's the core of the read logic:

    var offset = (lineNumber - 1) * LineLength;

    fs.Seek(offset, SeekOrigin.Begin);

    var data = new byte[LineLength];
    fs.Read(data, 0, LineLength);

    var text = DecodeData(data);
    Debug.Print("{0,-12}{1,-16}{2}", lineNumber, offset, text);

The full sample is here:

class PaddedFileSearch
{
    public int LineLength { get; private set; }
    public FileInfo File { get; private set; }

    public PaddedFileSearch(FileInfo fileInfo)
    {
        var length = FindLineLength(fileInfo);
        //if (length == 0) throw new PaddedProgramException();
        LineLength = length;
        File = fileInfo;
    }

    private static int FindLineLength(FileInfo fileInfo)
    {
        using (var reader = new StreamReader(fileInfo.FullName))
        {
            string line;
            if ((line = reader.ReadLine()) != null)
            {
                var length = line.Length + 2;
                return length;
            }
        }

        return 0;
    }

    public void SeekMethod(List<int> lineNumbers)
    {

        Debug.Print("");
        Debug.Print("Line No\t\tPosition\t\tLine");
        Debug.Print("-------\t\t--------\t\t-----------------");

        lineNumbers.Sort();

        using (var fs = new FileStream(File.FullName, FileMode.Open))
        {
            lineNumbers.ForEach(ln => OutputData(fs, ln));
        }
    }

    private void OutputData(FileStream fs, int lineNumber)
    {
        var offset = (lineNumber - 1) * LineLength;

        fs.Seek(offset, SeekOrigin.Begin);

        var data = new byte[LineLength];
        fs.Read(data, 0, LineLength);

        var text = DecodeData(data);
        Debug.Print("{0,-12}{1,-16}{2}", lineNumber, offset, text);
    }

    private static string DecodeData(byte[] data)
    {
        var encoding = new UTF8Encoding();
        return encoding.GetString(data);
    }
}

class Program
{
    static void Main(string[] args)
    {
        var seeker = new PaddedFileSearch(new FileInfo(@"D:\Desktop\Test.txt"));

        Debug.Print("File Line length: {0}", seeker.LineLength);

        seeker.SeekMethod(new List<int> { 5, 3, 4 });
        seeker.SeekMethod(new List<int> { 5, 3 });
    }
}

Solution 2

Place this in the inner loop of SeekMethod(int[] lineNos):

position = (lineNo - 1) * LineLength;
fs.Seek(position, SeekOrigin.Begin);
reader.DiscardBufferedData();

The problem is that your position variable changes based on its previous value, and StreamReader maintains a buffer so you need to clear out buffered data when you alter the stream position.

Solution 3

You got pretty sick mix of position being absolute for first lineno and relative for further lineno's

Look closely here and to actual results your getting

position = (lineNo - 1) * LineLength - position;
fs.Seek(position, SeekOrigin.Current);

For values 3,4,5 you get numbers 30, 15, 45, while its obvious that if your using relative position it should be 30,15,15 since line length is 15 OR 30,0,0 if your read method performs SEEK as side effect, like filestream.Read does. And your test output is ACCIDENTLY correct (only for string values though, not positions) , you should have used not a sequence for test and look at position value more closely to see that there is no connection with string displayed and position value.

Actually your StreamReader is ignoring further fs.Seek calls and is simply reading line by line =)

Here is results for 3 5 9 input :)

Line No         Position                Line
-------         --------                -----------------
3                       30                              0003|100!2500
5                       30                              0004|100!2500
9                       90                              0005|100!2500

I believe following is closest to what your trying to achieve, a new function

private static string ReadLine(FileStream stream, int length)
        {
             byte[] bytes= new byte[length];
             stream.Read(bytes, 0, length);
             return new string(Encoding.UTF8.GetChars(bytes));  
        }

And the new loop code

int oldLine = 0;
    using (FileStream fs = new FileStream(FileName, FileMode.Open, FileAccess.Read, FileShare.None))
    {
            foreach (int lineNo in lineNos)
            {
                position = (lineNo - oldLine -1) * LineLength;
                fs.Seek(position, SeekOrigin.Current);
                line = ReadLine(fs, LineLength);
                Console.WriteLine("{0}\t\t\t{1}\t\t\t\t{2}", lineNo, position, line);
                oldLine = lineNo;
            }
    }

Notice that now stream.Read function is equivalent to additional stream.Seek (Length)

New correct output and logical position changes

Line No         Position                Line
-------         --------                -----------------
3                       30                              0003|100!2500    
4                       0                               0004|100!2500    
5                       0                               0005|100!2500

Line No         Position                Line
-------         --------                -----------------
3                       30                              0003|100!2500  
5                       15                              0005|100!2500

PS: its so odd you think that 001: line is 1st line not 0th .. that whole -1 could be removed if you used programmer count method =)

Solution 4

I wouldn't say the problem is your effort to manually manage the position value, but rather that StreamReader.ReadLine changes the stream's Position value. If you step through your code and monitor your local values, you'll see the stream's position changes after each ReadLine call (to 148 after the first).

EDIT

It would be better to just change the stream's position directly rather than use Seek

public void SeekMethod(int[] lineNos)
{
    string line = null;
    long step;

    Array.Sort(lineNos);

    Debug.Print("");
    Debug.Print("Line No\t\tPosition\t\tLine");
    Debug.Print("-------\t\t--------\t\t-----------------");

    using (FileStream fs = new FileStream(FileName, FileMode.Open, FileAccess.Read, FileShare.None))
    {
        foreach (int lineNo in lineNos)
        {
            StreamReader reader = new StreamReader(fs);
            step = (lineNo - 1) * LineLength - fs.Position;
            fs.Position += step;

            if ((line = reader.ReadLine()) != null) {
                Debug.Print("{0}\t\t\t{1}\t\t\t\t{2}", lineNo, step, line);
            }
        }
    }
}
Share:
20,919
Pranav Shah
Author by

Pranav Shah

Updated on March 31, 2020

Comments

  • Pranav Shah
    Pranav Shah about 4 years

    I am trying to work with FileStream.Seek to quickly jump to a line and read it.

    However, I am not getting the right results. I have tried to look at this for a while and can't understand what I am doing wrong.

    Environment:
    OS: Windows 7
    Framework: .NET 4.0
    IDE: Visual C# Express 2010

    Sample Data in file location: C:\Temp\Temp.txt

    0001|100!2500
    0002|100!2500
    0003|100!2500
    0004|100!2500
    0005|100!2500
    0006|100!2500
    0007|100!2500
    0008|100!2500
    0009|100!2500
    0010|100!2500
    

    The code:

    class PaddedFileSearch
    {
        private int LineLength { get; set; }
        private string FileName { get; set; }
    
        public PaddedFileSearch()
        {
            FileName = @"C:\Temp\Temp.txt";     // This is a padded file.  All lines are of the same length.
    
            FindLineLength();
            Debug.Print("File Line length: {0}", LineLength);
    
            // TODO: This purely for testing.  Move this code out.
            SeekMethod(new int[] { 5, 3, 4 });
            /*  Expected Results:
             *  Line No     Position        Line
             *  -------     --------        -----------------
             *  3           30              0003|100!2500
             *  4           15              0004|100!2500
             *  5           15              0005|100!2500 -- This was updated after the initial request.
             */
    
            /* THIS DOES NOT GIVE THE EXPECTED RESULTS */
            SeekMethod(new int[] { 5, 3 });
            /*  Expected Results:
             *  Line No     Position        Line
             *  -------     --------        -----------------
             *  3           30              0003|100!2500
             *  5           30              0005|100!2500
             */
        }
    
        private void FindLineLength()
        {
            string line;
    
            // Add check for FileExists
    
            using (StreamReader reader = new StreamReader(FileName))
            {
                if ((line = reader.ReadLine()) != null)
                {
                    LineLength = line.Length + 2;
                    // The 2 is for NewLine(\r\n)
                }
            }
    
        }
    
        public void SeekMethod(int[] lineNos)
        {
            long position = 0;
            string line = null;
    
            Array.Sort(lineNos);
    
            Debug.Print("");
            Debug.Print("Line No\t\tPosition\t\tLine");
            Debug.Print("-------\t\t--------\t\t-----------------");
    
            using (FileStream fs = new FileStream(FileName, FileMode.Open, FileAccess.Read, FileShare.None))
            {
                using (StreamReader reader = new StreamReader(fs))
                {
                    foreach (int lineNo in lineNos)
                    {
                        position = (lineNo - 1) * LineLength - position;
                        fs.Seek(position, SeekOrigin.Current);
    
                        if ((line = reader.ReadLine()) != null)
                        {
                            Debug.Print("{0}\t\t\t{1}\t\t\t\t{2}", lineNo, position, line);
                        }
                    }
                }
            }
        }
    }
    

    The output I get:

    File Line length: 15
    
    Line No     Position        Line
    -------     --------        -----------------
    3           30              0003|100!2500
    4           15              0004|100!2500
    5           45              0005|100!2500
    
    Line No     Position        Line
    -------     --------        -----------------
    3           30              0003|100!2500
    5           30              0004|100!2500
    

    My problem is with the following output:

    Line No     Position        Line
    -------     --------        -----------------
    5           30              0004|100!2500
    

    The output for Line should be: 0005|100!2500

    I don't understand why this is happening.

    Am I doing something wrong? Is there a workaround? Also are there faster ways to do this using something like seek?
    (I am looking for code based options and NOT Oracle or SQL Server. For the sake of argument lets also say that the file size 1 GB.)

    Any help is greatly appreciated.

    Thanks.

    UPDATE:
    I found 4 great answers here. Thanks a lot.

    Sample Timings:
    Based on a few runs the following are the methods from best to good. Even the good is very close to best.
    In a file that contains 10K lines, 2.28 MB. I searched for same 5000 random lines using all the options.

    1. Seek4: Time elapsed: 00:00:00.0398530 ms -- Ritch Melton
    2. Seek3: Time elapsed: 00:00:00.0446072 ms -- Valentin Kuzub
    3. Seek1: Time elapsed: 00:00:00.0538210 ms -- Jake
    4. Seek2: Time elapsed: 00:00:00.0889589 ms -- bitxwise

    Shown below is the code. After saving the code you can simply call it by typing TestPaddedFileSeek.CallPaddedFileSeek();. You will also have to specify the namespace and the "using references".

    `

    /// <summary>
    /// This class multiple options of reading a by line number in a padded file (all lines are the same length).
    /// The idea is to quick jump to the file.
    /// Details about the discussions is available at: http://stackoverflow.com/questions/5201414/having-a-problem-while-using-filestream-seek-in-c-solved
    /// </summary>
    class PaddedFileSeek
    {
        public FileInfo File {get; private set;}
        public int LineLength { get; private set; }
    
        #region Private methods
        private static int FindLineLength(FileInfo fileInfo)
        {
            using (StreamReader reader = new StreamReader(fileInfo.FullName))
            {
                string line;
                if ((line = reader.ReadLine()) != null)
                {
                    int length = line.Length + 2;   // The 2 is for NewLine(\r\n)
                    return length;
                }
            }
            return 0;
        }
    
        private static void PrintHeader()
        {
           /*
            Debug.Print("");
            Debug.Print("Line No\t\tLine");
            Debug.Print("-------\t\t--------------------------");
           */ 
        }
    
        private static void PrintLine(int lineNo, string line)
        {
            //Debug.Print("{0}\t\t\t{1}", lineNo, line);
        }
    
        private static void PrintElapsedTime(TimeSpan elapsed)
        {
            Debug.WriteLine("Time elapsed: {0} ms", elapsed);
        }
        #endregion
    
        public PaddedFileSeek(FileInfo fileInfo)
        {
            // Possibly might have to check for FileExists
            int length = FindLineLength(fileInfo);
            //if (length == 0) throw new PaddedProgramException();
            LineLength = length;
            File = fileInfo;
        }
    
        public void CallAll(int[] lineNoArray, List<int> lineNoList)
        {
            Stopwatch sw = new Stopwatch();
    
            #region Seek1
            // Create new stopwatch
            sw.Start();
    
            Debug.Write("Seek1: ");
            // Print Header
            PrintHeader();
    
            Seek1(lineNoArray);
    
            // Stop timing
            sw.Stop();
    
            // Print Elapsed Time
            PrintElapsedTime(sw.Elapsed);
    
            sw.Reset();
            #endregion
    
            #region Seek2
            // Create new stopwatch
            sw.Start();
    
            Debug.Write("Seek2: ");
            // Print Header
            PrintHeader();
    
            Seek2(lineNoArray);
    
            // Stop timing
            sw.Stop();
    
            // Print Elapsed Time
            PrintElapsedTime(sw.Elapsed);
    
            sw.Reset();
            #endregion
    
            #region Seek3
            // Create new stopwatch
            sw.Start();
    
            Debug.Write("Seek3: ");
            // Print Header
            PrintHeader();
    
            Seek3(lineNoArray);
    
            // Stop timing
            sw.Stop();
    
            // Print Elapsed Time
            PrintElapsedTime(sw.Elapsed);
    
            sw.Reset();
            #endregion
    
            #region Seek4
            // Create new stopwatch
            sw.Start();
    
            Debug.Write("Seek4: ");
    
            // Print Header
            PrintHeader();
    
            Seek4(lineNoList);
    
            // Stop timing
            sw.Stop();
    
            // Print Elapsed Time
            PrintElapsedTime(sw.Elapsed);
    
            sw.Reset();
            #endregion
    
        }
    
        /// <summary>
        /// Option by Jake
        /// </summary>
        /// <param name="lineNoArray"></param>
        public void Seek1(int[] lineNoArray)
        {
            long position = 0;
            string line = null;
    
            Array.Sort(lineNoArray);
    
            using (FileStream fs = new FileStream(File.FullName, FileMode.Open, FileAccess.Read, FileShare.None))
            {
                using (StreamReader reader = new StreamReader(fs))
                {
                    foreach (int lineNo in lineNoArray)
                    {
                        position = (lineNo - 1) * LineLength;
                        fs.Seek(position, SeekOrigin.Begin);
    
                        if ((line = reader.ReadLine()) != null)
                        {
                            PrintLine(lineNo, line);
                        }
    
                        reader.DiscardBufferedData();
                    }
                }
            }
    
        }
    
        /// <summary>
        /// option by bitxwise
        /// </summary>
        public void Seek2(int[] lineNoArray)
        {
            string line = null;
            long step = 0;
    
            Array.Sort(lineNoArray);
    
            using (FileStream fs = new FileStream(File.FullName, FileMode.Open, FileAccess.Read, FileShare.None))
            {
                // using (StreamReader reader = new StreamReader(fs))
                // If you put "using" here you will get WRONG results.
                // I would like to understand why this is.
                {
                    foreach (int lineNo in lineNoArray)
                    {
                        StreamReader reader = new StreamReader(fs);
                        step = (lineNo - 1) * LineLength - fs.Position;
                        fs.Position += step;
    
                        if ((line = reader.ReadLine()) != null)
                        {
                            PrintLine(lineNo, line);
                        }
                    }
                }
            }
        }
    
        /// <summary>
        /// Option by Valentin Kuzub
        /// </summary>
        /// <param name="lineNoArray"></param>
        #region Seek3
        public void Seek3(int[] lineNoArray)
        {
            long position = 0; // totalPosition = 0;
            string line = null;
            int oldLineNo = 0;
    
            Array.Sort(lineNoArray);
    
            using (FileStream fs = new FileStream(File.FullName, FileMode.Open, FileAccess.Read, FileShare.None))
            {
                using (StreamReader reader = new StreamReader(fs))
                {
                    foreach (int lineNo in lineNoArray)
                    {
                        position = (lineNo - oldLineNo - 1) * LineLength;
                        fs.Seek(position, SeekOrigin.Current);
                        line = ReadLine(fs, LineLength);
                        PrintLine(lineNo, line);
                        oldLineNo = lineNo;
    
                    }
                }
            }
    
        }
    
        #region Required Private methods
        /// <summary>
        /// Currently only used by Seek3
        /// </summary>
        /// <param name="stream"></param>
        /// <param name="length"></param>
        /// <returns></returns>
        private static string ReadLine(FileStream stream, int length)
        {
            byte[] bytes = new byte[length];
            stream.Read(bytes, 0, length);
            return new string(Encoding.UTF8.GetChars(bytes));
        }
        #endregion
        #endregion
    
        /// <summary>
        /// Option by Ritch Melton
        /// </summary>
        /// <param name="lineNoArray"></param>
        #region Seek4
        public void Seek4(List<int> lineNoList)
        {
            lineNoList.Sort();
    
            using (var fs = new FileStream(File.FullName, FileMode.Open))
            {
                lineNoList.ForEach(ln => OutputData(fs, ln));
            }
    
        }
    
        #region Required Private methods
        private void OutputData(FileStream fs, int lineNumber)
        {
            var offset = (lineNumber - 1) * LineLength;
    
            fs.Seek(offset, SeekOrigin.Begin);
    
            var data = new byte[LineLength];
            fs.Read(data, 0, LineLength);
    
            var text = DecodeData(data);
            PrintLine(lineNumber, text);
        }
    
        private static string DecodeData(byte[] data)
        {
            var encoding = new UTF8Encoding();
            return encoding.GetString(data);
        }
    
        #endregion
    
        #endregion
    }
    
    
    
    static class TestPaddedFileSeek
    {
        public static void CallPaddedFileSeek()
        {
            const int arrayLenght = 5000;
            int[] lineNoArray = new int[arrayLenght];
            List<int> lineNoList = new List<int>();
            Random random = new Random();
            int lineNo;
            string fileName;
    
    
            fileName = @"C:\Temp\Temp.txt";
    
            PaddedFileSeek seeker = new PaddedFileSeek(new FileInfo(fileName));
    
            for (int n = 0; n < 25; n++)
            {
                Debug.Print("Loop no: {0}", n + 1);
    
                for (int i = 0; i < arrayLenght; i++)
                {
                    lineNo = random.Next(1, arrayLenght);
    
                    lineNoArray[i] = lineNo;
                    lineNoList.Add(lineNo);
                }
    
                seeker.CallAll(lineNoArray, lineNoList);
    
                lineNoList.Clear();
    
                Debug.Print("");
            }
        }
    }
    

    `