PDFBox: Problem with converting pdf page into image

37,974

Solution 1

Convert PDF file 04-Request-Headers.pdf to image using pdfbox.

Download this file and paste it in Documents folder.

Example:

package com.pdf.pdfbox.test;

import java.awt.HeadlessException;
import java.awt.Toolkit;
import java.awt.image.BufferedImage;
import java.io.File;
import java.util.List;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.util.PDFImageWriter;

public class ConvertPDFPageToImageWithoutText {
    public static void main(String[] args) {
        try {
            String oldPath = "C:/Documents/04-Request-Headers.pdf";
            File oldFile = new File(oldPath);
            if (oldFile.exists()) {
            PDDocument document = PDDocument.load(oldPath);
            @SuppressWarnings("unchecked")
            List<PDPage> list = document.getDocumentCatalog().getAllPages();

            String fileName = oldFile.getName().replace(".pdf", "");
            String imageFormat = "png";
            String password = "";
            int startPage = 1;
            int endPage = list.size();
            String outputPrefix = "C:/Documents/PDFCopy/";//converted images saved here
            File file = new File(outputPrefix);
            if (!file.exists()) {
                file.mkdirs();
            }
            int imageType = 24;
            String color = "rgb";
            int resolution;

            try {
                resolution = Toolkit.getDefaultToolkit().getScreenResolution();
            } catch (HeadlessException e) {
                resolution = 96;
            }

            if ("bilevel".equalsIgnoreCase(color)) {
                imageType = BufferedImage.TYPE_BYTE_BINARY;
            } else if ("indexed".equalsIgnoreCase(color)) {
                imageType = BufferedImage.TYPE_BYTE_INDEXED;
            } else if ("gray".equalsIgnoreCase(color)) {
                imageType = BufferedImage.TYPE_BYTE_GRAY;
            } else if ("rgb".equalsIgnoreCase(color)) {
                imageType = BufferedImage.TYPE_INT_RGB;
            } else if ("rgba".equalsIgnoreCase(color)) {
                imageType = BufferedImage.TYPE_INT_ARGB;
            } else {
                System.err.println("Error: the number of bits per pixel must be 1, 8 or 24.");
            }

            PDFImageWriter pdfImageWriter = new PDFImageWriter();
            boolean imageWriter = pdfImageWriter.writeImage(document, imageFormat, password, startPage, endPage, outputPrefix + fileName, imageType, resolution);
            if (!imageWriter) {
                throw new Exception("No writer found for format '" + imageFormat + "'");
            }
            document.close();

        } else {
            System.err.println(oldPath +" File Can't be found");
        }

    } catch (Exception e) {
        e.printStackTrace();
    }
}

}

OR

Try the below solution for convert pdf files to image format.

How to Convert PDF to image with resolution in java Using PDF Renderer

Solution 2

Use the following code for conversions it works fine!

  import java.awt.HeadlessException;
         import java.awt.Toolkit;
         import java.awt.image.BufferedImage;

         import javax.imageio.ImageIO;

         import org.apache.pdfbox.exceptions.InvalidPasswordException;
         import org.apache.pdfbox.pdmodel.PDDocument;
         import org.apache.pdfbox.util.PDFImageWriter;

         /**
          * Convert a PDF document to an image.
          *
          * @author <a href="[email protected]">Ben Litchfield</a>
          * @version $Revision: 1.6 $
          */
         public class PDFToImage
         {

             private static final String PASSWORD = "-password";
             private static final String START_PAGE = "-startPage";
             private static final String END_PAGE = "-endPage";
             private static final String IMAGE_FORMAT = "-imageType";
             private static final String OUTPUT_PREFIX = "-outputPrefix";
             private static final String COLOR = "-color";
             private static final String RESOLUTION = "-resolution";

             /**
              * private constructor.
             */
             private PDFToImage()
             {
                 //static class
             }

             /**
              * Infamous main method.
              *
              * @param args Command line arguments, should be one and a reference to a file.
              *
              * @throws Exception If there is an error parsing the document.
              */
             public static void main( String[] args ) throws Exception
             {
                 String password = "";
                 String pdfFile = "D:/docoverview.pdf";
                 String outputPrefix = "D:/printdata/pdfimages/";
                 String imageFormat = "jpg";
                 int startPage = 1;
                 int endPage = Integer.MAX_VALUE;
                 String color = "rgb";
                 int resolution;
                 try
                 {
                     resolution = Toolkit.getDefaultToolkit().getScreenResolution();
                 }
                 catch( HeadlessException e )
                 {
                     resolution = 96;
                 }
                 for( int i = 0; i < args.length; i++ )
                 {
                     if( args[i].equals( PASSWORD ) )
                     {
                         i++;
                         if( i >= args.length )
                         {
                             usage();
                         }
                         password = args[i];
                     }
                     else if( args[i].equals( START_PAGE ) )
                     {
                         i++;
                         if( i >= args.length )
                         {
                             usage();
                         }
                         startPage = Integer.parseInt( args[i] );
                     }
                     else if( args[i].equals( END_PAGE ) )
                     {
                         i++;
                         if( i >= args.length )
                         {
                             usage();
                         }
                         endPage = Integer.parseInt( args[i] );
                     }
                     else if( args[i].equals( IMAGE_FORMAT ) )
                     {
                         i++;
                         imageFormat = args[i];
                     }
                     else if( args[i].equals( OUTPUT_PREFIX ) )
                     {
                         i++;
                         outputPrefix = args[i];
                     }
                     else if( args[i].equals( COLOR ) )
                     {
                         i++;
                         color = args[i];
                     }
                     else if( args[i].equals( RESOLUTION ) )
                     {
                         i++;
                         resolution = Integer.parseInt(args[i]);
                     }
                     else
                     {
                         if( pdfFile == null )
                         {
                             pdfFile = args[i];
                         }
                     }
                 }
                 if( pdfFile == null )
                 {
                     usage();
                 }
                 else
                 {
                     if(outputPrefix == null)
                     {
                         outputPrefix = pdfFile.substring( 0, pdfFile.lastIndexOf( '.' ));
                     }

                     PDDocument document = null;
                     try
                     {
                         document = PDDocument.load( pdfFile );     
                         //document.print();

                         if( document.isEncrypted() )
                         {
                             try
                             {
                                 document.decrypt( password );
                             }
                             catch( InvalidPasswordException e )
                             {
                                 if( args.length == 4 )//they supplied the wrong password
                                 {
                                     System.err.println( "Error: The supplied password is incorrect." );
                                     System.exit( 2 );
                                 }
                                 else
                                 {
                                     //they didn't supply a password and the default of "" was wrong.
                                     System.err.println( "Error: The document is encrypted." );
                                     usage();
                                 }
                             }
                         }
                         int imageType = 24;
                         if ("bilevel".equalsIgnoreCase(color))
                         {
                             imageType = BufferedImage.TYPE_BYTE_BINARY;
                         }
                         else if ("indexed".equalsIgnoreCase(color))
                         {
                             imageType = BufferedImage.TYPE_BYTE_INDEXED;
                         }
                         else if ("gray".equalsIgnoreCase(color))
                         {
                             imageType = BufferedImage.TYPE_BYTE_GRAY;
                         }
                         else if ("rgb".equalsIgnoreCase(color))
                         {
                             imageType = BufferedImage.TYPE_INT_RGB;
                         }
                         else if ("rgba".equalsIgnoreCase(color))
                         {
                             imageType = BufferedImage.TYPE_INT_ARGB;
                         }
                         else
                         {
                             System.err.println( "Error: the number of bits per pixel must be 1, 8 or 24." );
                             System.exit( 2 );
                         }

                         //Make the call
                         PDFImageWriter imageWriter = new PDFImageWriter();
                         boolean success = imageWriter.writeImage(document, imageFormat, password,
                                 startPage, endPage, outputPrefix, imageType, resolution);
                         if (!success)
                         {
                             System.err.println( "Error: no writer found for image format '"
                                     + imageFormat + "'" );
                             System.exit(1);
                         }
                     }
                     catch (Exception e)
                     {
                         System.err.println(e);
                     }
                     finally
                     {
                         if( document != null )
                         {
                             document.close();
                         }
                     }
                 }
             }

             /**
              * This will print the usage requirements and exit.
              */
             private static void usage()
             {
                 System.err.println( "Usage: java org.apache.pdfbox.PDFToImage [OPTIONS] <PDF file>\n" +
                     "  -password  <password>          Password to decrypt document\n" +
                     "  -imageType <image type>        (" + getImageFormats() + ")\n" +
                     "  -outputPrefix <output prefix>  Filename prefix for image files\n" +
                     "  -startPage <number>            The first page to start extraction(1 based)\n" +
                     "  -endPage <number>              The last page to extract(inclusive)\n" +
                     "  -color <string>                The color depth (valid: bilevel, indexed, gray, rgb, rgba)\n" +
                     "  -resolution <number>           The bitmap resolution in dpi\n" +
                     "  <PDF file>                     The PDF document to use\n"
                     );
                 System.exit(1);
             }

             private static String getImageFormats()
             {
                 StringBuffer retval = new StringBuffer();
                 String[] formats = ImageIO.getReaderFormatNames();
                 for( int i = 0; i < formats.length; i++ )
                 {
                     retval.append( formats[i] );
                     if( i + 1 < formats.length )
                     {
                         retval.append( "," );
                     }
                 }
                 return retval.toString();
             }
         }
Share:
37,974
user552910
Author by

user552910

Updated on July 20, 2022

Comments

  • user552910
    user552910 almost 2 years

    My mission is pretty simple: converting every single page of a pdf file into images. I tried using icepdf open source version to generate the images but they don't generate the image with the correct font. So I start using PDFBox instead. The code is the following:

    PDDocument document = PDDocument.load(new File("testing.pdf"));             
    List<PDPage> pages = document.getDocumentCatalog().getAllPages();
    for (int i = 0; i < pages.size(); i++) {
     PDPage singlePage = pages.get(i);
     BufferedImage buffImage = convertToImage(singlePage, 8, 12);
     ImageIO.write(buffImage, "png", new File(PdfUtil.DATA_OUTPUT_DIR+(count++)+".png"));
    }
    

    The font looks good, but the pictures within the pdf file look fainted out (See the attachment). I look into the source code but I still have no clue how to fix it. Do you guys have any idea what's going on? Please help. Thanks!!

  • Lilian A. Moraru
    Lilian A. Moraru about 12 years
    Since you are writing in Java, I recommend you to use the Java Code Conventions: oracle.com/technetwork/java/javase/documentation/…
  • Harinder
    Harinder about 12 years
    @MoraruLilian but i have used code conventions. which line of code are u referring to??
  • Steve Atkinson
    Steve Atkinson over 11 years
    7.2 on Morau's link "The opening brace should be at the end of the line that begins the compound statement; the closing brace should begin a line and be indented to the beginning of the compound statement." Putting the brace on a separate line makes the class longer than t needs to be! Also. if ( (with a space) rather than if( and finally, for( int i = 0; i < formats.length; i++ ) is oldschool. use the enhanced for loop for (int i: formats)
  • Tilman Hausherr
    Tilman Hausherr about 10 years
    The code from the answer is from PDFBox itself. We use a different code convention to improve readability: pdfbox.apache.org/codingconventions.html
  • Don Cheadle
    Don Cheadle about 9 years
    does this solve the issue of OP, i.e. that images in the source PDF were "faded"?
  • Don Cheadle
    Don Cheadle about 9 years
    I tried your soln for my PDF, but I get INFO: ColorSpace Pattern doesn't provide a non-stroking color, using white instead! and an image (in this case a QR code) is not copied over... it is just blank white
  • Don Cheadle
    Don Cheadle about 9 years
    I have a q open with attached doc at stackoverflow.com/questions/28589477/…
  • UdayKiran Pulipati
    UdayKiran Pulipati about 9 years
    @mmcrae I tried with attached PDF doc, bar code is eliminated while converting. If you use PDFRenderer-0.9.0 jar it converts pdfpage along with barcode.
  • Don Cheadle
    Don Cheadle about 9 years
    thanks for the tip! I used it and it worked :). Do you know of limitations for this? I'm surprised more people aren't using this... They use paid services, or GhostScript which has limitations, when there is this. Why?
  • UdayKiran Pulipati
    UdayKiran Pulipati about 9 years
    @mmcrae it have some limitations see Visualforce PDF Rendering Considerations and Limitations,
  • Tilman Hausherr
    Tilman Hausherr about 8 years
    PDFImageWriter has been removed in 2.0. For 2.0, see here: stackoverflow.com/questions/23326562/…
  • Lonzak
    Lonzak over 4 years
    There also was an jpedal open source version (github.com/Lonzak/JPedal) out there.