What's the best way to parse Excel file in Perl?

27,651

Solution 1

The best way is to use Spreadsheet::ParseExcel.

Here is an example:

#!/usr/bin/perl -w

use strict;
use warnings;

use Spreadsheet::ParseExcel;

my $parser   = Spreadsheet::ParseExcel->new();
my $workbook = $parser->parse('Book1.xls');

for my $worksheet ( $workbook->worksheets() ) {

    my ( $row_min, $row_max ) = $worksheet->row_range();
    my ( $col_min, $col_max ) = $worksheet->col_range();

    for my $row ( $row_min .. $row_max ) {
        for my $col ( $col_min .. $col_max ) {

            my $cell = $worksheet->get_cell( $row, $col );
            next unless $cell;

            print "Row, Col    = ($row, $col)\n";
            print "Value       = ", $cell->value(),       "\n";
            print "Unformatted = ", $cell->unformatted(), "\n";
            print "\n";
        }
    }
}

To convert an Excel file to text with Perl I'd recommend excel2txt which uses Spreadsheet::ParseExcel.

Solution 2

I have had great luck using Spreadsheet::ParseExcel.

Solution 3

The 'best' way would ideally be to use a module from our beloved CPAN.

Whenever you have a problem Instantly ask: Why have I not checked CPAN yet?

Which module however I can't be certain, here is a list to get you started, try them, see what works.

Solution 4

@echo off  
ECHO CHECK THE VERSION FROM THE PROJECT
    echo see also this [link][1]
for /f "tokens=*" %%i in ('Type Version.txt') do set _Version=%%i
ECHO The _Version is %_Version%

ECHO remove the output html files
del *.html /q
ECHO remove the output log files
del *.log /q


::pause
ECHO %0 > %0.log
ECHO %0.error.log >%0.error.log

set BaseDir=D:\perl\sfw\ExcelToHtml.%_Version%
echo BaseDir is %BaseDir%  1>>%0.log 2>>%0.error.log
ECHO.


set LogLevel=3
echo LogLevel is %LogLevel%  1>>%0.log 2>>%0.error.log
ECHO.


::set ExcelFileToParse="%BaseDir%\CoDA_ETL_Integration.xls"
SET ExcelFileToParse="%BaseDir%\TODO.xls"
echo ExcelFileToParse is %ExcelFileToParse%  1>>%0.log 2>>%0.error.log
echo.

set OutputDir=%BaseDir%
echo OutputDir is %Outputdir%  1>>%0.log 2>>%0.error.log
echo.

ECHO SET THE UNICODE FOR PERL FOR UTF-8
SET PERL_UNICODE=S
ECHO %%PERL_UNICODE%% IS %PERL_UNICODE%

::set PerlScript=parseExcelToCsv.pl
set PerlScript=ExcelToHtml.pl
echo PerlScript is %PerlScript%  1>>%0.log 2>>%0.error.log
ECHO.


echo Action !!!  1>>%0.log 2>>%0.error.log
echo perl %BaseDir%\%PerlScript% %LogLevel% %ExcelFileToParse% %OutputDir%  1>>%0.log 2>>%0.error.log
perl %BaseDir%\%PerlScript% %LogLevel% %ExcelFileToParse% %OutputDir% 1>>%0.log 2>>%0.error.log

%0.error.log
%0.log

::debug pause

use strict;
use Spreadsheet::ParseExcel;
use utf8 ; 
use Encode ; 


package ExcelToHtml ; 



my $DebugLevel = 3 ; 
print defined($ARGV[0]) ? $DebugLevel = $ARGV[0] : "Using max DebugLevel = $DebugLevel", " \n";
my $minimumAmountOfCommandLineArgs = 3 ; 
my ( $ExcelFileToParse , $OutputDir , $BaseFileName ) = (); 


sub main {
    logMsg("$0 SCRIPT START " , 1 );
    #Action !!! call here methods
    checkCommandLineArgs();     
    buildNames (); 

    ParseExcelAndlogMsgFiles ( $ExcelFileToParse  , $BaseFileName , $OutputDir) ; 

    logMsg("$0 SCRIPT STOP " , 1 );

} #eof MAIN 

sub buildNames 
{

    $ExcelFileToParse = "$ARGV[1]" ; 
    $OutputDir="$ARGV[2]" ;
    $BaseFileName = $ExcelFileToParse ; 
    logMsg( "\$ExcelFileToParse is $ExcelFileToParse" ) ; 
    logMsg ( "\$OutputDir is $OutputDir" ) ; 
    logMsg ("\$BaseFileName is $BaseFileName \n" );
    $BaseFileName =~ s/^(.*)(\\|\/)(.*)/$3/;    #strip the directory part 
    logMsg ("\$BaseFileName is $BaseFileName ");
    $BaseFileName =~ s/^(.*)(\.)(.*)/$1/ ;  #strip the file extension
    logMsg ( "\$BaseFileName is $BaseFileName "); 


} #eof sub buildNames


sub ParseExcelAndlogMsgFiles {

my $ExcelFileToParse = shift ; 
my $BaseFileName = shift ; 
my $OutputDir = shift ; 
my $strToReturn = "";

my $parser   = Spreadsheet::ParseExcel->new();
my $workbook = $parser->Parse("$ExcelFileToParse");

for my $worksheet ( $workbook->worksheets() ) {

        my ( $row_min, $row_max ) = $worksheet->row_range();
        my ( $col_min, $col_max ) = $worksheet->col_range();

        for my $row ( $row_min .. $row_max ) {
                my $rowStr = "" ; 
                for my $col ( $col_min .. $col_max ) {

                        my $cell = $worksheet->get_cell( $row, $col );
                        next unless $cell;

                        logMsg ( " Row, Col    = ($row, $col)") ; 
                        logMsg ( "\$cell->encoding() is " . $cell->encoding()) ;

                        logMsg (" Value = ", $cell->value() );
                        $rowStr .=  makeCell($cell->value() )      ;                #The Value

                        logMsg ( "Unformatted = ", $cell->unformatted()) ; 

                } #eof col
            $rowStr = makeRow( $rowStr ); 
            $strToReturn .= $rowStr ; 
     } #eof for my row

        my $worksheetName = $worksheet->{'Name'}  ; 
        my $FileSheet = "$OutputDir/$BaseFileName" .  '.' . $worksheetName . '.' . 'html' ; 
        $strToReturn = makeTable ( $strToReturn );
        $strToReturn = makeFile ( $strToReturn );
        logMsg ( "\$strToReturn  is $strToReturn " ) ; 
        PrintToFile ( $FileSheet, $strToReturn )  ; 
        $strToReturn = "" ;
        $FileSheet = "" ;
        } #eof for my worksheet

} #eof sub


sub trim    
{
        $_[0]=~s/^\s+//;
        $_[0]=~s/\s+$//;
        return $_[0];
}

# =========================================== eof sub trim 
# Action !!!



sub makeRow 
{
    my $row = shift ; 
    return "<tr>" .  $row .  "</tr> \n"  ; 
}

sub makeCell 
{
    my $cell = shift ; 
    return "<td>"  .  "$cell" . " </td>" ; 
}

sub makeTable
{
    my $table = shift ; 
    return "\n <table> " . $table . "</table> \n" ; 

}

sub makeFile
{
    my $file = shift ; 

    $file = "<html> <head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"/></head><body> $file </body></html>\n" ; 
    return $file ; 
}


sub checkCommandLineArgs
{

        if ( @ARGV != $minimumAmountOfCommandLineArgs )
        {
            logErrorMsg ( "Not enougn command line args supplied exit 1" , 1 );
            die "usage: $0 1 args \n" ; 
            exit 1 ; 
        } #eof if
        else 
        {
                foreach (@ARGV) { logMsg ( "$_ " ) ;  }
        } #eof else         if ( @ARGV != 4 )


} #eof sub checkCommandLineArgs

#log a message according to the DebugLevel
sub logMsg 
{
    my $msg = shift ;
    my $importance = shift ; 

    my $CurrentPerlScriptName = "$0" ; 
    my $niceMonth = GetANiceMonth (); 
    my $logFile = "$CurrentPerlScriptName" . '_' . "$niceMonth" .  '.log' ; 
    $msg = GetANiceTime () .' --- ' .   $msg . " \n" ; 

    if ( $importance == 1 ) 
    {
        $msg = "\n============================================================ \n" . $msg ; 
        $msg = $msg . "============================================================ \n" ;
    }


    #debug nothing
    if ( $DebugLevel == 0 ) {   return ; } 

    #just logMsg the message
    if ( $DebugLevel == 1 ) 
    {
        logMsg ( $msg ); 
    } #eof if ( $DebugLevel == 1 ) 

    #logMsg the message in 
    if ( $DebugLevel == 2 ) 
    {
        #READ ALL ROWS OF A FILE TO ALIST 
        open (LOG, ">> $logFile") || print "could not open the \$logFile $logFile !!!\n"; 
        print LOG $msg  ; 
        close LOG;
    }

    #logMsg the message in 
    if ( $DebugLevel == 3 ) 
    {
        #READ ALL ROWS OF A FILE TO ALIST 
        open (LOG, ">> $logFile") || print "could not open the \$logFile $logFile !!!\n"; 
        print LOG $msg  ; 
        close LOG;
        print $msg ; 
    } #eof if ( $DebugLevel == 3 ) 


} #eof sub logMsg 

#log a message according to the DebugLevel
sub logErrorMsg 
{
    my $errorMsg = shift ; 
    my $importance  = shift ; 

    my $CurrentPerlScriptName = "$0" ; 
    my $niceMonth = GetANiceMonth (); 
    my $errorLogFile = "$CurrentPerlScriptName" . "_" .  "$niceMonth" . '.error.log' ; 
    $errorMsg = GetANiceTime () .' --- ' .   $errorMsg . " \n" ; 

        if ( $importance == 1 ) 
        {
            $errorMsg = "\n============================================================ \n" . $errorMsg ; 
            $errorMsg= $errorMsg. "============================================================ \n" ;
        }



    #debug nothing
    if ( $DebugLevel == 0 ) {   return ; } 

    #just logMsg the message
    if ( $DebugLevel == 1 )  {      print $errorMsg ;       } 

    #logMsg the message in a error log file
    if ( $DebugLevel == 2 ) 
    {
        #READ ALL ROWS OF A FILE TO ALIST 
        open (ERRLOG, ">> $errorLogFile") || print "could not open the \$errorLogFile $errorLogFile !!!\n"; 
        print ERRLOG $errorMsg ; 
        close ERRLOG;
    }

    #logMsg the message in 
    if ( $DebugLevel == 3 ) 
    {
        #READ ALL ROWS OF A FILE TO ALIST 
        open (ERRLOG, ">> $errorLogFile") || print "could not open the \$errorLogFile $errorLogFile !!!\n"; 
        print ERRLOG $errorMsg  ; 
        close ERRLOG;
        print $errorMsg ; 
    } #eof if ( $DebugLevel == 3 ) 


} #eof sub logErrorMsg 


#GET A NICE TIME 
sub GetANiceTime {
 # Purpose: returns the time in yyyymmdd-format 
 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); 
 #---- change 'month'- and 'year'-values to correct format ---- 
 $min = "0$min" if ($min < 10); 
 $hour = "0$hour" if ($hour < 10);
 $mon = $mon + 1;
 $mon = "0$mon" if ($mon < 10); 
 $year = $year + 1900;
 $mday = "0$mday" if ($mday < 10); 
 return "$year\.$mon\.$mday" . "-" . "$hour\:$min\:$sec"; 
} #eof sub GetANiceTime


sub GetANiceMonth {
 # Purpose: returns the time in yyyymmdd-format 
 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); 
 #---- change 'month'- and 'year'-values to correct format ---- 
 $mon = $mon + 1;
 $mon = "0$mon" if ($mon < 10); 
 $year = $year + 1900;
 return "$year\.$mon" ; 
} #eof sub GetANiceTime



sub PrintToFile {
my $FileOutput = shift ; 
my $StringToPrint = shift ; 
#READ ALL ROWS OF A FILE TO ALIST 
open (FILEOUTPUT, ">$FileOutput") || 
            print "could not open the \$FileOutput $FileOutput!\n"; 
print  FILEOUTPUT $StringToPrint ; 
close FILEOUTPUT ;

#debug $strToReturn .=  $StringToPrint; 

}
# =========================================== eof sub PrintToFile



#Action !!!
main();




1 ; 

__END__

#VersionHistory: 
#1.0. --- Yordan Georgiev --- Initial creation 
#1.1 --- Yordan Georgiev --- Added conditional logging 

Solution 5

http://search.cpan.org/dist/Spreadsheet-Read/Read.pm

Spreadsheet::Read tries to transparently read any spreadsheet and return its content in a universal manner independent of the parsing module that does the actual spreadsheet scanning.

Share:
27,651
MCS
Author by

MCS

Updated on July 02, 2020

Comments

  • MCS
    MCS almost 4 years

    What's the simplest way to parse an Excel file in Perl? Converting it to a text file would also work.

  • Jayan
    Jayan about 12 years
    @ DJ-Andrey-sXe : Welcome to SO. Please include more info why this module is worth considering (against some other listed in other answers)
  • Matthew Lock
    Matthew Lock over 7 years
    This is a big mess of batch files and perl. could you clean it up a bit and explain what each part is
  • Yordan Georgiev
    Yordan Georgiev over 7 years
    this code is so old that i really had to google my gmail to find it : code.google.com/archive/p/exceltohtml/downloads