Save a pandas dataframe as table in Image or pdf document with nice multi index display

python pdf pandas latex pdflatex

10,052

The solution that work for me: With pandas >= 0.17 I installed pdflatex. I copied latex package such as booktabs.sty, geography.sty and pdflscape.sty

import pandas as pd
import os
import math

def save_summary_table_as_pdf(path_to_csv, path_to_output_folder):
    pwd = os.getcwd()
    df = pd.read_csv(path_to_csv, sep =',')

    #data preparation
    groupeddf = df.groupby('Cluster')
    res = groupeddf.describe([0.05, 0.5, 0.95])
    res.index.rename(['Cluster', 'Stats'], inplace=True)

    res['cluster'] = res.index.get_level_values('Cluster')
    res['stats'] = res.index.get_level_values('Stats')
    populations = (res.iloc[(res.index.get_level_values('Stats') == 'count'), \
                                                            0].values).tolist()
    res['population'] = [populations[i] for i in res.index.labels[0].values()]
    total_pop = sum(populations)
    res['frequency'] =(res['population']/total_pop).round(3)
    res.set_index(['cluster', 'population','frequency', 'stats'], inplace=True)
    res1 = res.iloc[(res.index.get_level_values('stats') == '5%') |
    (res.index.get_level_values('stats') == 'mean') |
    (res.index.get_level_values('stats') == '50%') |
    (res.index.get_level_values('stats') == '95%')]
    res1 = res1.round(2)
    res1.rename(columns=lambda x: x.replace('_', ' '), inplace=True)  

    #latex
    nbpages = int(math.ceil(res1.shape[0]*1.0/40))

    templatetop = r'''\documentclass[a3paper, 5pt]{article}
    \usepackage{booktabs}
    \usepackage{pdflscape}
    \usepackage[a4paper,bindingoffset=0.2in,%
            left=0.25in,right=0.25in,top=1in,bottom=1in,%
            footskip=.25in]{geometry}
    \begin{document}
    \begin{landscape}
    \pagenumbering{gobble}
    \oddsidemargin = 0pt
    \hoffset = -0.25in
    \topmargin = 1pt
    \headheight = 0pt
    \headsep = 0pt
    '''
    templatebottom = '''
    \end{landscape}
    \end{document}
    '''
    output_folder_path_abs = path_to_output_folder
    output_tex = os.path.join(output_folder_path_abs, 
    "clustering_summary_table.tex")

    with open(output_tex, "wb") as afile: 
        afile.write(templatetop +'\n')
        for i in range(0, nbpages):
            afile.write(res1.iloc[(i*40):((i+1)*40), :].to_latex() +'\n' + 
                                                """\pagenumbering{gobble}""")
        afile.write(templatebottom +'\n')
    os.chdir(output_folder_path_abs)
    os.system('pdflatex clustering_summary_table.tex')
    os.chdir(pwd)
    os.remove(output_tex)
    os.remove(os.path.join(path_to_output_folder, 
                                           'clustering_summary_table.aux'))
    os.remove(os.path.join(path_to_output_folder, 
                                           'clustering_summary_table.log'))

if __name__ == "__main__":
    print 'begin generate pdf table about clustering'
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("path_to_csv")
    parser.add_argument("outputfolder")
    args = vars(parser.parse_args())
    filedir = os.path.abspath(os.path.dirname(__file__))
    output_folder_path_abs = os.path.abspath(args['outputfolder'])
    input_folder_path_abs = os.path.abspath(args['path_to_csv'])
    # copy the user package latex to the folder
    os.system('scp '
    +os.path.abspath(os.path.join(filedir, 'userpackagelatex/booktabs.sty'))+
    ' ' +output_folder_path_abs)
    os.system('scp '
    +os.path.abspath(os.path.join(filedir, 'userpackagelatex/geography.sty'))+
    ' ' +output_folder_path_abs)
    os.system('scp '
    +os.path.abspath(os.path.join(filedir, 'userpackagelatex/pdflscape.sty'))+
    ' ' +output_folder_path_abs)
    save_summary_table_as_pdf(input_folder_path_abs, output_folder_path_abs)
    os.remove(os.path.join(output_folder_path_abs, 'booktabs.sty'))
    os.remove(os.path.join(output_folder_path_abs, 'geography.sty'))
    os.remove(os.path.join(output_folder_path_abs, 'pdflscape.sty'))

10,052

Author by

Luce Philibert

Data lover/hater. Tech entousiast. Startup curious.

Updated on June 04, 2022

Comments

Luce Philibert almost 2 years

I'm trying to include a data frame with multi-index in a report in pdf. I would like to have a nice table output.

I have found these 2 solutions:

pandas.df -> HTML -> pdf

    import pandas as pd
    from IPython.display import HTML
    import pdfkit

    # df generation
    df = pd.read_csv(path_to_csv, sep =',')
    groupeddf = df.groupby('Cluster')
    res = groupeddf.describe([0.05, 0.5, 0.95])
    res.index.rename(['Cluster', 'stats'], inplace=True)

    res['Cluster'] = res.index.get_level_values('Cluster')
    res['stats'] = res.index.get_level_values('stats')
    populations = (res.iloc[(res.index.get_level_values('stats') == 'count'), \
                                                            0].values).tolist()
    res['population'] = [populations[i] for i in res.index.labels[0].values()]
    total_pop = sum(populations)
    res['frequency'] =(res['population']/total_pop).round(3)
    res.set_index(['Cluster', 'population','frequency', 'stats'], inplace=True)



    res1 = res.iloc[(res.index.get_level_values('stats') == '5%') |
    (res.index.get_level_values('stats') == 'mean') |
    (res.index.get_level_values('stats') == '50%') |
    (res.index.get_level_values('stats') == '95%')]
    res1 = res1.round(2)
    # saving the df     
    h = HTML(res1.to_html())
    my_file = open('test.html', 'w')
    my_file.write(h.data)
    my_file.close()


    options = {
        'orientation': 'Landscape'
        }
    with open('test.html') as f:
        pdfkit.from_file(f, 'out.pdf', options=options)

But this has a dependence on pdfkit which make it difficult to us. That's why I am trying to use pandas.df -> tex -> pdf (as mentioned in Export a Pandas dataframe as a table image )

    import pandas as pd
    import os
    # df generation              
    df = pd.read_csv(path_to_csv, sep =',')
    groupeddf = df.groupby('Cluster')
    res = groupeddf.describe([0.05, 0.5, 0.95])
    res.index.rename(['Cluster', 'stats'], inplace=True)

    res['Cluster'] = res.index.get_level_values('Cluster')
    res['stats'] = res.index.get_level_values('stats')
    populations = (res.iloc[(res.index.get_level_values('stats') == 'count'), \
                                                            0].values).tolist()
    res['population'] = [populations[i] for i in res.index.labels[0].values()]
    total_pop = sum(populations)
    res['frequency'] =(res['population']/total_pop).round(3)
    res.set_index(['Cluster', 'population','frequency', 'stats'], inplace=True)



    res1 = res.iloc[(res.index.get_level_values('stats') == '5%') |
    (res.index.get_level_values('stats') == 'mean') |
    (res.index.get_level_values('stats') == '50%') |
    (res.index.get_level_values('stats') == '95%')]
    res1 = res1.round(2)
    res1.rename(columns=lambda x: x.replace('_', ' '), inplace=True)    

    #latex
    template = r'''\documentclass[preview]{{standalone}}
    \usepackage{{booktabs}}
    \begin{{document}}
    {}
    \end{{document}}
    '''

    with open("outputfile.tex", "wb") as afile: 
        afile.write(template.format(res1.to_latex()))
    os.system("pdflatex outputfile.tex")

However, I am not familiar with latex, and I get this error :

  ! LaTeX Error: File `standalone.cls' not found.

 Type X to quit or <RETURN> to proceed,
 or enter a new name. (Default extension: cls)

Any idea about the error or the standard way to do pandas.df -> pdf ?

Recents

Why Is PNG file with Drop Shadow in Flutter Web App Grainy?

How to troubleshoot crashes detected by Google Play Store for Flutter app

Cupertino DateTime picker interfering with scroll behaviour

Why does awk -F work for most letters, but not for the letter "t"?

Flutter change focus color and icon color but not works

How to print and connect to printer using flutter desktop via usb?

Critical issues have been reported with the following SDK versions: com.google.android.gms:play-services-safetynet:17.0.0

Flutter Dart - get localized country name from country code

navigatorState is null when using pushNamed Navigation onGenerateRoutes of GetMaterialPage

Android Sdk manager not found- Flutter doctor error

Flutter Laravel Push Notification without using any third party like(firebase,onesignal..etc)

How to change the color of ElevatedButton when entering text in TextField

Formatting latex (to_latex) output

Installing pstricks in windows with MikTex

Multiple matplotlib plots in same figure + in to pdf-Python

Link to external application in LaTeX Beamer

Changing style of PDF-Latex output through IPython Notebook conversion

Converting LaTeX Generated PDF to Word Using Acrobat XI

tabula-py ImportError: cannot import name 'read_pdf'

Opening a pdf and reading in tables with python pandas

Generating pdf-latex with python script

Export Pandas DataFrame into a PDF file using Python

Save a pandas dataframe as table in Image or pdf document with nice multi index display

Luce Philibert

Comments

Recents

Related