Perl Perl Perl File Handling: Reading, Writing, and Processing Files

Perl File Handling: Reading, Writing, and Processing Files

AS
Aman Saurav
| Dec 25, 2024 |
read
#perl #file-handling #io #text-processing

Perl File Handling: Reading, Writing, and Processing Files

Perl excels at file manipulation and text processing. This guide covers everything from basic file operations to advanced techniques for handling files efficiently.

Opening Files

Basic File Opening

#!/usr/bin/perl
use strict;
use warnings;

# Open for reading
open(my $fh, '<', 'input.txt') or die "Cannot open input.txt: $!";

# Open for writing (overwrites)
open(my $fh, '>', 'output.txt') or die "Cannot open output.txt: $!";

# Open for appending
open(my $fh, '>>', 'log.txt') or die "Cannot open log.txt: $!";

# Open for read/write
open(my $fh, '+<', 'data.txt') or die "Cannot open data.txt: $!";
# ✅ Safe: Mode and filename separated
open(my $fh, '<', $filename) or die "Cannot open $filename: $!";

# ❌ Unsafe: Two-argument form
open(my $fh, "< $filename") or die $!;  # Vulnerable to injection

File Modes

'<'   # Read only
'>'   # Write (truncate)
'>>'  # Append
'+<'  # Read/write
'+>'  # Read/write (truncate)
'|'   # Pipe to command
'-|'  # Pipe from command

Reading Files

Line-by-Line Reading

# Read one line at a time
open(my $fh, '<', 'file.txt') or die $!;
while (my $line = <$fh>) {
    chomp $line;  # Remove newline
    print "Line: $line\n";
}
close $fh;

# Shorter form
while (<$fh>) {
    chomp;
    print "Line: $_\n";
}

Read Entire File

# Read all lines into array
open(my $fh, '<', 'file.txt') or die $!;
my @lines = <$fh>;
close $fh;

# Read entire file into scalar
open(my $fh, '<', 'file.txt') or die $!;
local $/ = undef;  # Slurp mode
my $content = <$fh>;
close $fh;

# Using File::Slurp (recommended)
use File::Slurp;
my $content = read_file('file.txt');
my @lines = read_file('file.txt');

Reading Binary Files

open(my $fh, '<:raw', 'image.jpg') or die $!;
binmode $fh;  # Ensure binary mode

my $buffer;
while (read($fh, $buffer, 1024)) {
    # Process $buffer
}
close $fh;

Writing Files

Basic Writing

# Write to file
open(my $fh, '>', 'output.txt') or die $!;
print $fh "Hello, World!\n";
print $fh "Line 2\n";
close $fh;

# Append to file
open(my $fh, '>>', 'log.txt') or die $!;
print $fh scalar(localtime), ": Log entry\n";
close $fh;

Writing with Formatting

open(my $fh, '>', 'report.txt') or die $!;

# printf to file
printf $fh "Name: %-20s Age: %3d\n", "John Doe", 30;

# Formatted output
my $name = "Alice";
my $score = 95.5;
print $fh sprintf("%-15s: %6.2f\n", $name, $score);

close $fh;

Atomic File Writing

use File::Temp qw(tempfile);

# Write to temporary file first
my ($temp_fh, $temp_filename) = tempfile();
print $temp_fh "Data...\n";
close $temp_fh;

# Atomically rename
rename($temp_filename, 'final.txt') or die "Rename failed: $!";

File Tests

Common File Tests

my $file = 'test.txt';

# Existence and type
-e $file   # File exists
-f $file   # Regular file
-d $file   # Directory
-l $file   # Symbolic link
-p $file   # Named pipe
-S $file   # Socket

# Permissions
-r $file   # Readable
-w $file   # Writable
-x $file   # Executable
-o $file   # Owned by effective UID

# Size and age
-z $file   # Zero size
-s $file   # Size in bytes
-M $file   # Days since modification
-A $file   # Days since last access
-C $file   # Days since inode change

# File type
-T $file   # Text file
-B $file   # Binary file

Practical Examples

# Check if file exists and is readable
if (-e $file && -r $file) {
    print "File exists and is readable\n";
}

# Get file size
my $size = -s $file;
print "File size: $size bytes\n";

# Check if file is older than 7 days
if (-M $file > 7) {
    print "File is older than 7 days\n";
}

# Stacked file tests
if (-f $file && -r _ && -w _) {  # _ reuses last stat
    print "Regular file, readable and writable\n";
}

Directory Operations

Reading Directories

# Open directory
opendir(my $dh, '/path/to/dir') or die "Cannot open directory: $!";

# Read entries
while (my $entry = readdir $dh) {
    next if $entry =~ /^\./;  # Skip . and ..
    print "Entry: $entry\n";
}
closedir $dh;

# Read all entries at once
opendir(my $dh, '/path/to/dir') or die $!;
my @files = grep { !/^\./ } readdir $dh;
closedir $dh;

Creating and Removing Directories

# Create directory
mkdir 'newdir' or die "Cannot create directory: $!";
mkdir 'newdir', 0755 or die $!;  # With permissions

# Create nested directories
use File::Path qw(make_path);
make_path('path/to/nested/dir');

# Remove directory
rmdir 'emptydir' or die "Cannot remove directory: $!";

# Remove directory tree
use File::Path qw(remove_tree);
remove_tree('path/to/dir');

Traversing Directory Trees

use File::Find;

# Find all .txt files
find(sub {
    return unless -f && /\.txt$/;
    print "Found: $File::Find::name\n";
}, '/path/to/search');

# Custom processing
find(sub {
    if (-f && /\.log$/) {
        my $size = -s _;
        print "$File::Find::name: $size bytes\n";
    }
}, '/var/log');

File Manipulation

Copying Files

use File::Copy;

# Copy file
copy('source.txt', 'dest.txt') or die "Copy failed: $!";

# Move/rename file
move('old.txt', 'new.txt') or die "Move failed: $!";

# Copy with permissions preserved
use File::Copy::Recursive qw(fcopy);
fcopy('source.txt', 'dest.txt') or die $!;

Deleting Files

# Delete single file
unlink 'file.txt' or die "Cannot delete file: $!";

# Delete multiple files
unlink 'file1.txt', 'file2.txt', 'file3.txt';

# Delete with glob
unlink glob '*.tmp';

# Count deleted files
my $count = unlink glob '*.bak';
print "Deleted $count backup files\n";

Renaming Files

# Simple rename
rename 'old.txt', 'new.txt' or die "Rename failed: $!";

# Batch rename
opendir(my $dh, '.') or die $!;
while (my $file = readdir $dh) {
    next unless $file =~ /\.txt$/;
    my $new = $file;
    $new =~ s/\.txt$/.md/;
    rename $file, $new or warn "Cannot rename $file: $!";
}
closedir $dh;

Advanced File Operations

File Locking

use Fcntl qw(:flock);

open(my $fh, '+<', 'data.txt') or die $!;

# Exclusive lock (write)
flock($fh, LOCK_EX) or die "Cannot lock: $!";
# ... write to file ...
flock($fh, LOCK_UN);  # Unlock

# Shared lock (read)
flock($fh, LOCK_SH) or die "Cannot lock: $!";
# ... read from file ...
flock($fh, LOCK_UN);

close $fh;

File Permissions

# Change permissions
chmod 0644, 'file.txt' or die "chmod failed: $!";
chmod 0755, 'script.pl' or die $!;

# Change ownership
chown $uid, $gid, 'file.txt' or die "chown failed: $!";

# Get file stats
my @stats = stat 'file.txt';
my $mode = $stats[2];
my $size = $stats[7];
my $mtime = $stats[9];

Temporary Files

use File::Temp;

# Temporary file (auto-deleted)
my $fh = File::Temp->new();
print $fh "Temporary data\n";
# File deleted when $fh goes out of scope

# Named temporary file
my $fh = File::Temp->new(
    TEMPLATE => 'tempXXXXX',
    DIR => '/tmp',
    SUFFIX => '.txt',
    UNLINK => 1  # Auto-delete
);

# Temporary directory
my $dir = File::Temp->newdir();
print "Temp dir: $dir\n";

Practical Examples

Log File Processing

#!/usr/bin/perl
use strict;
use warnings;

# Process Apache access log
open(my $fh, '<', '/var/log/apache2/access.log') or die $!;

my %ips;
my %status_codes;

while (my $line = <$fh>) {
    # Parse log line
    if ($line =~ /^(\S+).*"\w+ \S+ HTTP\/[\d.]+" (\d+)/) {
        my ($ip, $status) = ($1, $2);
        $ips{$ip}++;
        $status_codes{$status}++;
    }
}
close $fh;

# Report top IPs
print "Top 10 IPs:\n";
my @top_ips = sort { $ips{$b} <=> $ips{$a} } keys %ips;
for my $ip (@top_ips[0..9]) {
    printf "%-15s: %d requests\n", $ip, $ips{$ip};
}

# Report status codes
print "\nStatus Codes:\n";
for my $code (sort keys %status_codes) {
    printf "%3s: %d\n", $code, $status_codes{$code};
}

CSV File Processing

use Text::CSV;

# Read CSV
my $csv = Text::CSV->new({ binary => 1, auto_diag => 1 });
open(my $fh, '<', 'data.csv') or die $!;

my $header = $csv->getline($fh);  # Get header row
while (my $row = $csv->getline($fh)) {
    my ($name, $age, $city) = @$row;
    print "Name: $name, Age: $age, City: $city\n";
}
close $fh;

# Write CSV
open(my $out, '>', 'output.csv') or die $!;
$csv->print($out, ['Name', 'Age', 'City']);  # Header
$csv->print($out, ['John', 30, 'NYC']);
$csv->print($out, ['Jane', 25, 'LA']);
close $out;

Configuration File Parsing

#!/usr/bin/perl
use strict;
use warnings;

sub read_config {
    my $file = shift;
    my %config;
    
    open(my $fh, '<', $file) or die "Cannot open $file: $!";
    while (my $line = <$fh>) {
        chomp $line;
        next if $line =~ /^\s*#/;  # Skip comments
        next if $line =~ /^\s*$/;  # Skip blank lines
        
        if ($line =~ /^(\w+)\s*=\s*(.+)$/) {
            $config{$1} = $2;
        }
    }
    close $fh;
    
    return %config;
}

my %config = read_config('app.conf');
print "Database: $config{database}\n";
print "Port: $config{port}\n";

File Backup Script

#!/usr/bin/perl
use strict;
use warnings;
use File::Copy;
use File::Basename;
use POSIX qw(strftime);

sub backup_file {
    my $file = shift;
    
    return unless -f $file;
    
    my $timestamp = strftime("%Y%m%d_%H%M%S", localtime);
    my ($name, $path, $ext) = fileparse($file, qr/\.[^.]*/);
    my $backup = "${path}${name}_${timestamp}${ext}.bak";
    
    copy($file, $backup) or die "Backup failed: $!";
    print "Backed up: $file -> $backup\n";
    
    return $backup;
}

# Backup all .conf files
for my $file (glob '*.conf') {
    backup_file($file);
}

File Comparison

use File::Compare;

# Compare two files
if (compare('file1.txt', 'file2.txt') == 0) {
    print "Files are identical\n";
} else {
    print "Files differ\n";
}

# Line-by-line comparison
open(my $fh1, '<', 'file1.txt') or die $!;
open(my $fh2, '<', 'file2.txt') or die $!;

my $line_num = 0;
while (my $line1 = <$fh1>) {
    $line_num++;
    my $line2 = <$fh2> // '';
    
    if ($line1 ne $line2) {
        print "Difference at line $line_num:\n";
        print "File1: $line1";
        print "File2: $line2";
    }
}

close $fh1;
close $fh2;

Performance Tips

1. Use Buffered I/O

# ❌ Slow: Unbuffered
while (my $char = getc($fh)) {
    # Process character
}

# ✅ Fast: Buffered
while (my $line = <$fh>) {
    # Process line
}

2. Avoid Repeated File Opens

# ❌ Slow: Open/close in loop
for my $i (1..1000) {
    open(my $fh, '>>', 'log.txt') or die $!;
    print $fh "Entry $i\n";
    close $fh;
}

# ✅ Fast: Open once
open(my $fh, '>>', 'log.txt') or die $!;
for my $i (1..1000) {
    print $fh "Entry $i\n";
}
close $fh;

3. Use File::Slurp for Small Files

use File::Slurp;

# ✅ Fast and simple
my $content = read_file('small.txt');
write_file('output.txt', $content);

Error Handling

# Basic error handling
open(my $fh, '<', $file) or die "Cannot open $file: $!";

# Custom error handling
open(my $fh, '<', $file) or do {
    warn "Cannot open $file: $!";
    next;
};

# Try::Tiny for complex error handling
use Try::Tiny;

try {
    open(my $fh, '<', $file) or die $!;
    # ... process file ...
    close $fh;
} catch {
    warn "Error processing $file: $_";
};

Best Practices

  1. ✅ Always check return values of open, close, etc.
  2. ✅ Use three-argument open
  3. ✅ Use lexical filehandles (my $fh)
  4. ✅ Close files explicitly or use auto-closing
  5. ✅ Use chomp to remove newlines
  6. ✅ Use file tests before operations
  7. ✅ Handle errors appropriately
  8. ✅ Use modules for complex operations

Conclusion

Perl’s file handling capabilities are powerful and flexible. Key takeaways:

  1. ✅ Use three-argument open for safety
  2. ✅ Always check return values
  3. ✅ Use appropriate file modes
  4. ✅ Leverage CPAN modules for complex tasks
  5. ✅ Use file tests to avoid errors
  6. ✅ Handle errors gracefully

Master these techniques, and you’ll be able to handle any file processing task efficiently!