Read a file/URL line-by-line in Swift
Solution 1
(The code is for Swift 2.2/Xcode 7.3 now. Older versions can be found in the edit history if somebody needs it. An updated version for Swift 3 is provided at the end.)
The following Swift code is heavily inspired by the various answers to How to read data from NSFileHandle line by line?. It reads from the file in chunks, and converts complete lines to strings.
The default line delimiter (\n
), string encoding (UTF-8) and chunk size (4096)
can be set with optional parameters.
class StreamReader {
let encoding : UInt
let chunkSize : Int
var fileHandle : NSFileHandle!
let buffer : NSMutableData!
let delimData : NSData!
var atEof : Bool = false
init?(path: String, delimiter: String = "\n", encoding : UInt = NSUTF8StringEncoding, chunkSize : Int = 4096) {
self.chunkSize = chunkSize
self.encoding = encoding
if let fileHandle = NSFileHandle(forReadingAtPath: path),
delimData = delimiter.dataUsingEncoding(encoding),
buffer = NSMutableData(capacity: chunkSize)
{
self.fileHandle = fileHandle
self.delimData = delimData
self.buffer = buffer
} else {
self.fileHandle = nil
self.delimData = nil
self.buffer = nil
return nil
}
}
deinit {
self.close()
}
/// Return next line, or nil on EOF.
func nextLine() -> String? {
precondition(fileHandle != nil, "Attempt to read from closed file")
if atEof {
return nil
}
// Read data chunks from file until a line delimiter is found:
var range = buffer.rangeOfData(delimData, options: [], range: NSMakeRange(0, buffer.length))
while range.location == NSNotFound {
let tmpData = fileHandle.readDataOfLength(chunkSize)
if tmpData.length == 0 {
// EOF or read error.
atEof = true
if buffer.length > 0 {
// Buffer contains last line in file (not terminated by delimiter).
let line = NSString(data: buffer, encoding: encoding)
buffer.length = 0
return line as String?
}
// No more lines.
return nil
}
buffer.appendData(tmpData)
range = buffer.rangeOfData(delimData, options: [], range: NSMakeRange(0, buffer.length))
}
// Convert complete line (excluding the delimiter) to a string:
let line = NSString(data: buffer.subdataWithRange(NSMakeRange(0, range.location)),
encoding: encoding)
// Remove line (and the delimiter) from the buffer:
buffer.replaceBytesInRange(NSMakeRange(0, range.location + range.length), withBytes: nil, length: 0)
return line as String?
}
/// Start reading from the beginning of file.
func rewind() -> Void {
fileHandle.seekToFileOffset(0)
buffer.length = 0
atEof = false
}
/// Close the underlying file. No reading must be done after calling this method.
func close() -> Void {
fileHandle?.closeFile()
fileHandle = nil
}
}
Usage:
if let aStreamReader = StreamReader(path: "/path/to/file") {
defer {
aStreamReader.close()
}
while let line = aStreamReader.nextLine() {
print(line)
}
}
You can even use the reader with a for-in loop
for line in aStreamReader {
print(line)
}
by implementing the SequenceType
protocol (compare http://robots.thoughtbot.com/swift-sequences):
extension StreamReader : SequenceType {
func generate() -> AnyGenerator<String> {
return AnyGenerator {
return self.nextLine()
}
}
}
Update for Swift 3/Xcode 8 beta 6: Also "modernized" to
use guard
and the new Data
value type:
class StreamReader {
let encoding : String.Encoding
let chunkSize : Int
var fileHandle : FileHandle!
let delimData : Data
var buffer : Data
var atEof : Bool
init?(path: String, delimiter: String = "\n", encoding: String.Encoding = .utf8,
chunkSize: Int = 4096) {
guard let fileHandle = FileHandle(forReadingAtPath: path),
let delimData = delimiter.data(using: encoding) else {
return nil
}
self.encoding = encoding
self.chunkSize = chunkSize
self.fileHandle = fileHandle
self.delimData = delimData
self.buffer = Data(capacity: chunkSize)
self.atEof = false
}
deinit {
self.close()
}
/// Return next line, or nil on EOF.
func nextLine() -> String? {
precondition(fileHandle != nil, "Attempt to read from closed file")
// Read data chunks from file until a line delimiter is found:
while !atEof {
if let range = buffer.range(of: delimData) {
// Convert complete line (excluding the delimiter) to a string:
let line = String(data: buffer.subdata(in: 0..<range.lowerBound), encoding: encoding)
// Remove line (and the delimiter) from the buffer:
buffer.removeSubrange(0..<range.upperBound)
return line
}
let tmpData = fileHandle.readData(ofLength: chunkSize)
if tmpData.count > 0 {
buffer.append(tmpData)
} else {
// EOF or read error.
atEof = true
if buffer.count > 0 {
// Buffer contains last line in file (not terminated by delimiter).
let line = String(data: buffer as Data, encoding: encoding)
buffer.count = 0
return line
}
}
}
return nil
}
/// Start reading from the beginning of file.
func rewind() -> Void {
fileHandle.seek(toFileOffset: 0)
buffer.count = 0
atEof = false
}
/// Close the underlying file. No reading must be done after calling this method.
func close() -> Void {
fileHandle?.closeFile()
fileHandle = nil
}
}
extension StreamReader : Sequence {
func makeIterator() -> AnyIterator<String> {
return AnyIterator {
return self.nextLine()
}
}
}
Solution 2
Efficient and convenient class for reading text file line by line (Swift 4, Swift 5)
Note: This code is platform independent (macOS, iOS, ubuntu)
import Foundation
/// Read text file line by line in efficient way
public class LineReader {
public let path: String
fileprivate let file: UnsafeMutablePointer<FILE>!
init?(path: String) {
self.path = path
file = fopen(path, "r")
guard file != nil else { return nil }
}
public var nextLine: String? {
var line:UnsafeMutablePointer<CChar>? = nil
var linecap:Int = 0
defer { free(line) }
return getline(&line, &linecap, file) > 0 ? String(cString: line!) : nil
}
deinit {
fclose(file)
}
}
extension LineReader: Sequence {
public func makeIterator() -> AnyIterator<String> {
return AnyIterator<String> {
return self.nextLine
}
}
}
Usage:
guard let reader = LineReader(path: "/Path/to/file.txt") else {
return; // cannot open file
}
for line in reader {
print(">" + line.trimmingCharacters(in: .whitespacesAndNewlines))
}
Solution 3
Swift 4.2 Safe syntax
class LineReader {
let path: String
init?(path: String) {
self.path = path
guard let file = fopen(path, "r") else {
return nil
}
self.file = file
}
deinit {
fclose(file)
}
var nextLine: String? {
var line: UnsafeMutablePointer<CChar>?
var linecap = 0
defer {
free(line)
}
let status = getline(&line, &linecap, file)
guard status > 0, let unwrappedLine = line else {
return nil
}
return String(cString: unwrappedLine)
}
private let file: UnsafeMutablePointer<FILE>
}
extension LineReader: Sequence {
func makeIterator() -> AnyIterator<String> {
return AnyIterator<String> {
return self.nextLine
}
}
}
Usage:
guard let reader = LineReader(path: "/Path/to/file.txt") else {
return
}
reader.forEach { line in
print(line.trimmingCharacters(in: .whitespacesAndNewlines))
}
Solution 4
This function takes a file URL and returns a sequence which will return every line of the file, reading them lazily. It works with Swift 5. It relies on the underlying getline
:
typealias LineState = (
// pointer to a C string representing a line
linePtr:UnsafeMutablePointer<CChar>?,
linecap:Int,
filePtr:UnsafeMutablePointer<FILE>?
)
/// Returns a sequence which iterates through all lines of the the file at the URL.
///
/// - Parameter url: file URL of a file to read
/// - Returns: a Sequence which lazily iterates through lines of the file
///
/// - warning: the caller of this function **must** iterate through all lines of the file, since aborting iteration midway will leak memory and a file pointer
/// - precondition: the file must be UTF8-encoded (which includes, ASCII-encoded)
func lines(ofFile url:URL) -> UnfoldSequence<String,LineState>
{
let initialState:LineState = (linePtr:nil, linecap:0, filePtr:fopen(url.path,"r"))
return sequence(state: initialState, next: { (state) -> String? in
if getline(&state.linePtr, &state.linecap, state.filePtr) > 0,
let theLine = state.linePtr {
return String.init(cString:theLine)
}
else {
if let actualLine = state.linePtr { free(actualLine) }
fclose(state.filePtr)
return nil
}
})
}
So for instance, here's how you would use it to print every line of a file named "foo" in your app bundle:
let url = NSBundle.mainBundle().urlForResource("foo", ofType: nil)!
for line in lines(ofFile:url) {
// suppress print's automatically inserted line ending, since
// lineGenerator captures each line's own new line character.
print(line, separator: "", terminator: "")
}
I developed this answer by modifying Alex Brown's answer to remove a memory leak mentioned by Martin R's comment, and by updating it to for Swift 5.
Solution 5
I'm late to the game, but here's small class I wrote for that purpose. After some different attempts (try to subclass NSInputStream
) I found this to be a reasonable and simple approach.
Remember to #import <stdio.h>
in your bridging header.
// Use is like this:
let readLine = ReadLine(somePath)
while let line = readLine.readLine() {
// do something...
}
class ReadLine {
private var buf = UnsafeMutablePointer<Int8>.alloc(1024)
private var n: Int = 1024
let path: String
let mode: String = "r"
private lazy var filepointer: UnsafeMutablePointer<FILE> = {
let csmode = self.mode.withCString { cs in return cs }
let cspath = self.path.withCString { cs in return cs }
return fopen(cspath, csmode)
}()
init(path: String) {
self.path = path
}
func readline() -> String? {
// unsafe for unknown input
if getline(&buf, &n, filepointer) > 0 {
return String.fromCString(UnsafePointer<CChar>(buf))
}
return nil
}
deinit {
buf.dealloc(n)
fclose(filepointer)
}
}
Related videos on Youtube
Matt
I am a proficient computer user, fairly good VB.NET programmer, and accomplished computer network hacker/security analyst.
Updated on November 28, 2020Comments
-
Matt over 3 years
I am trying to read a file given in an
NSURL
and load it into an array, with items separated by a newline character\n
.Here is the way I've done it so far:
var possList: NSString? = NSString.stringWithContentsOfURL(filePath.URL) as? NSString if var list = possList { list = list.componentsSeparatedByString("\n") as NSString[] return list } else { //return empty list }
I'm not very happy with this for a couple of reasons. One, I'm working with files that range from a few kilobytes to hundreds of MB in size. As you can imagine, working with strings this large is slow and unwieldy. Secondly, this freezes up the UI when it's executing--again, not good.
I've looked into running this code in a separate thread, but I've been having trouble with that, and besides, it still doesn't solve the problem of dealing with huge strings.
What I'd like to do is something along the lines of the following pseudocode:
var aStreamReader = new StreamReader(from_file_or_url) while aStreamReader.hasNextLine == true { currentline = aStreamReader.nextLine() list.addItem(currentline) }
How would I accomplish this in Swift?
A few notes about the files I'm reading from: All files consist of short (<255 chars) strings separated by either
\n
or\r\n
. The length of the files range from ~100 lines to over 50 million lines. They may contain European characters, and/or characters with accents.-
macshome almost 10 yearsAre you wanting to write the array out to disk as you go or just let the OS handle it with memory? Will the Mac running it have enough ram that you could map the file and work with it that way? Multiple tasks are easy enough to do, and I suppose you could have multiple jobs that start reading the file at different places.
-
-
Matt almost 10 yearsI appreciate the suggestion(s), but I am specifically looking for the code in Swift. Additionally, I want to work with one line at a time, rather than all the lines at once.
-
macshome almost 10 yearsSo are you looking to work with one line then release it and read the next one in? I would need to think that it is going to be faster to work with it in memory. Do they need to be processed in order? If not you can use a enumeration block to dramatically speed up the processing of the array.
-
Matt almost 10 yearsI'd like to grab a number of lines at once, but I won't necessarily need to load all of the lines. As for being in order, it's not critical, but it would be helpful.
-
Matt almost 10 yearsWhat happens if you extend the
case 0...127
to non-ASCII characters? -
Grimxn almost 10 yearsWell that really depends on what character encoding you have in your files. If they are one of the many formats of Unicode, you'll need to code for that, if they are one of the many pre-Unicode PC "code-page" systems, you'll need to decode that. The Foundation libraries do all of this for you, it's a lot of work on your own.
-
Matt almost 10 yearsWhere would I put the
extension
code block? In theStreamReader
class? -
Martin R almost 10 years@Matt: It does not matter. You can put the extension in the same Swift file as the "main class", or in a separate file. - Actually you don't really need an extension. You can add the
generate()
function to the StreamReader class and declare that asclass StreamReader : Sequence { ... }
. But it seems to be good Swift style to use extensions for separate pieces of functionality. -
Matt almost 10 yearsDoes this support Western European characters and accent marks (just the marks themselves)?
-
Matt almost 10 yearsOr, is it possible to have it handle unrecognized characters?
-
Martin R almost 10 years@Matt: Do you know which characters set is used in the text file? You could try NSWindowsCP1252StringEncoding or NSISOLatin1StringEncoding, compare stackoverflow.com/questions/13929403/….
-
Matt almost 10 yearsThis is the file: whitehatenterprises.com/downloads/mangled.txt.zip (Caution, it's pretty big) I'm running in to trouble around line 1704240.
-
Matt almost 10 yearsDon't worry about it. Using
NSISOLatin1SringEncoding
seemed to fix what was happening :) -
Klaas over 9 yearsFor Swift 1.1 and Xcode 6.1 I created an updated version of your
StreamReader
: gist.github.com/klaas/4691612802f540b6a9c5 -
Klaas over 9 yearsFailable initializers come in pretty handy ;-)
-
zanzoken over 9 yearsline seems to be always nil for me. Any idea what I could be doing wrong? I am getting the path from my NSURL through url.path!
-
Martin R over 9 years@zanzoken: What kind of URL are you using? The above code works only for file URLs. It cannot be used to read from a general server URL. Compare stackoverflow.com/questions/26674182/… and my comments under the question.
-
zanzoken over 9 yearsI am using fileURLs which I fetched from a share extension I wrote. So basically they are picture files.
-
Martin R over 9 years@zanzoken: My code is meant for text files, and expects the file to use a specified encoding (UTF-8 by default). If you have a file with arbitrary binary bytes (such as an image file) then the data->string conversion will fail.
-
zanzoken over 9 yearsIs there a way to do it for images?
-
Martin R over 9 years@zanzoken: Reading scan lines from an image is a completely different topic and has nothing to do with this code, sorry. I am sure that it can be done for example with CoreGraphics methods, but I to not have an immediate reference for you.
-
Hector Matos over 9 yearsI was just about to write this up myself but I had time constraints. Thanks for this!
-
Alex Brown about 9 yearsMuch thanks to a now departed answer which gave me the getline code!
-
Alex Brown about 9 yearsFails to handle "by line" at all. It blits input data to output, and does not recognise the different between normal characters and line end characters. Obviously, the output consists of the same lines as the input, but that's because newline is also blitted.
-
Alex Brown about 9 yearsObviously I'm completely ignoring encoding. Left as an exercise for the reader.
-
μολὼν.λαβέ about 9 yearswas thinking about writing some C code but this is good stuff.
-
Martin R almost 9 yearsNote that your code leaks memory as
getline()
allocates a buffer for the data. -
Samuel Rabinowitz almost 9 yearsGreat post, but I found a couple of errors using Swift 2. In the
buffer.rangeOfData
call,options
cannot benil
or it says "cannot invoke 'rangeOfData' with an argument list of type [the arguments]." Changingnil
toNSDataSearchOptions()
or some actual instance of that object fixes the problem. Also, this is minor, but sincetmpData
is only assigned a value once, it can be made a constant by changing thevar
before it tolet
. -
Martin R almost 9 years@5happy1:
options: nil
has to be replaced byoptions: []
as I mentioned in the "Update for Swift 2" at the end of the answer. Your are right about the constant, I have fixed that. Thanks for the feedback! -
Samuel Rabinowitz almost 9 years@MartinR You're welcome! I'm sorry I corrected the options thing, but I just didn't completely understand it on my first read-through. Thanks for clarifying it though, I understand it now. :)
-
Samuel Rabinowitz almost 9 yearsAttention all
Int(aStreamReader.nextLine())
people!!! If that line (or something like it with some optional/forced unwraps (?
or!
)) is giving you trouble because it becomesnil
, then simply remove the last character from whatnextLine()
returns, because it returns something like"17\r"
. To remove it, follow the instructions here: stackoverflow.com/questions/24122288/… except count the characters by the method seen here (in Swift 2): stackoverflow.com/questions/24037711/get-the-length-of-a-string -
applemavs almost 9 yearsFor some reason, using a relative path such as
./file.txt
doesn't work. -
Martin R almost 9 years@applemavs: This works with relative paths as well (I just tested it). Did you verify that the file is located in the current working directory of the process?
-
Igor Tupitsyn almost 9 yearsThis is awesome. Is it possible to use the code with a string instead of a file? I have been trying but cannot figure it out. Thanks!
-
Martin R almost 9 yearsI like this, but it can still be improved. Creating pointers using
withCString
is not necessary (and actually really unsafe), you can simply callreturn fopen(self.path, self.mode)
. One might add a check if the file really could be opened, currentlyreadline()
will just crash. TheUnsafePointer<CChar>
cast is not needed. Finally, your usage example does not compile. -
Martin R almost 9 years@IgorTupitsyn: It should be possible to extend the code to work with strings, but why would you want that? The purpose of this routines was to read huge files line by line, so that you don't have to load the entire file into memory. If you already have a string then you can just use componentsSeparatedByString to split it into lines.
-
Igor Tupitsyn almost 9 yearsMartin. I am using your great solution to extract parts of texts from a big file (these parts are separated from each other by empty lines). Each part consists of multiple lines of text (sometimes as long as 30-40 lines). So, I then need to break the extracted part into separate lines. In C++ I used getline to do both tasks, which was very quick. And I was thinking of a similar solution here. Thanks a lot!
-
Christian over 8 yearsin the line
delimData = delimiter.dataUsingEncoding(encoding)
I would suggest to replaceencoding
byNSUTF8StringEncoding
as the delimiter is coming from the source file. If you encode a UTF16-file it will not work otherwise -
Martin R over 8 years@Christian: Are you sure? If, for example,
delim="\n"
andencoding= NSUTF16LittleEndianStringEncoding
, thendelimData
is set to<0A 00>
and matches the UTF-16 newline character in the file. -
Christian over 8 yearsI just parsed an .strings-file which is UTF16 with my source being UTF8 and I had to do it like this. I thought that was the reason
-
Martin R over 8 years@Rodrigo: Thank you for the edit.
options: .Anchored
is not the correct solution however because it ties the search for the delimiter to the start of the data. Also I had already added an "addendum" for Swift 2. – But I have taken the opportunity to clean-up the answer and remove all pre-2.0 stuff, hopefully that avoids future confusion. -
Karoy Lorentey over 7 yearsWhat happens if there's an I/O error? As far as I can tell, FileHandle still throws Objective-C exceptions that are uncatchable in Swift.
-
lgarbo over 7 yearsI had a spike in memory graph while iterate over lines. Wrap the code in an autoreleasepool solved.
-
theDC over 7 years@Eporediese I'm facing the exact same issue! Could you provide a code snippet how did you solve it using autoreleasepool? Thanks in advance
-
lgarbo over 7 years@DCDC
while !aStreamReader.atEof { try autoreleasepool { guard let line = aStreamReader.nextLine() else { return } ...code... } }
-
Martin R over 7 years@AlexBrown: That is not true.
fgets()
reads characters up to (and including) a newline character (or EOF). Or am I misunderstanding your comment? -
Ethan about 7 yearsYou may want to wrap
buffer.removeSubrange(0..<range.upperBound)
inautoreleasepool
to reduce memory usage when reading large files. -
leanne over 6 yearsWorks in Swift 4. Test at this GitHub repo.
-
gbenroscience over 4 years@Martin R , please how would this look in Swift 4/5? I need something this simple to read a file line by line –
-
Peter Schorn almost 4 yearsIs there any reason why you don't just directly conform
StreamReader
to theIteratorProtocol
and renamenextLine
tonext
? This would obviate the need for the extension toStreamReader
that declares themakeIterator
method. -
Martin R almost 4 years@PeterSchorn: No particular reason. I just wrote the nextLine method first and added the sequence conformance later. Also “nextLine” might describe the purpose of the function better than “next“ but that is of course a matter of personal taste.
-
Michael N about 3 yearsif let aStreamReader = StreamReader(path: filePath, delimiter: "\n") { defer { aStreamReader.close() } while aStreamReader.atEof == false { while let line = aStreamReader.nextLine() { print(line) } } }