Getting FFProbe Information With Python
Solution 1
There are a few problems in your code:
- args list to Popen has last argument as
-i filename
which is a syntax error use'-i '+filename
instead shell=True
is usually not needed and is unnecessary burden.
Other than that it seems to be working, are you not seeing output after fixing #1 ?
Edit: Looks like you are having problem with ffprobe commandline, so I installed it and changes you require are:
- My ffprobe (ffprobe 0.7.3-4:0.7.3-0ubuntu0.11.10.1) doesn't seems to accept
-i
flag, input file is just passed as last argument. - you need to pass
-loglevel
and option of loglevelquiet
as separate arguments i.e.[..., '-loglevel', 'quiet',..]
So after these changes here is a sample script:
#!/usr/bin/python
import os, sys, subprocess, shlex, re
from subprocess import call
def probe_file(filename):
cmnd = ['ffprobe', '-show_format', '-pretty', '-loglevel', 'quiet', filename]
p = subprocess.Popen(cmnd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print filename
out, err = p.communicate()
print "==========output=========="
print out
if err:
print "========= error ========"
print err
probe_file('drop.avi')
And I see the correct output:
==========output==========
[FORMAT]
filename=drop.avi
nb_streams=1
format_name=avi
format_long_name=AVI format
start_time=0:00:00.000000
duration=0:00:06.066667
size=660.000 Kibyte
bit_rate=891.217 Kbit/s
[/FORMAT]
========= error ========
ffprobe version 0.7.3-4:0.7.3-0ubuntu0.11.10.1, Copyright (c) 2007-2011 the Libav developers
built on Jan 4 2012 16:08:51 with gcc 4.6.1
configuration: --extra-version='4:0.7.3-0ubuntu0.11.10.1' --arch=amd64 --prefix=/usr --enable-vdpau --enable-bzlib --enable-libgsm --enable-libschroedinger --enable-libspeex --enable-libtheora --enable-libvorbis --enable-pthreads --enable-zlib --enable-libvpx --enable-runtime-cpudetect --enable-vaapi --enable-gpl --enable-postproc --enable-swscale --enable-x11grab --enable-libdc1394 --enable-shared --disable-static
libavutil 51. 7. 0 / 51. 7. 0
libavcodec 53. 6. 0 / 53. 6. 0
libavformat 53. 3. 0 / 53. 3. 0
libavdevice 53. 0. 0 / 53. 0. 0
libavfilter 2. 4. 0 / 2. 4. 0
libswscale 2. 0. 0 / 2. 0. 0
libpostproc 52. 0. 0 / 52. 0. 0
Unsupported codec with id 114 for input stream 0
Solution 2
The json output from ffprobe
is simple enough to parse in python having to install a third-party library.
#!/usr/bin/env python
import argparse
import subprocess
import sys
from pathlib import Path
from typing import NamedTuple
class FFProbeResult(NamedTuple):
return_code: int
json: str
error: str
def ffprobe(file_path) -> FFProbeResult:
command_array = ["ffprobe",
"-v", "quiet",
"-print_format", "json",
"-show_format",
"-show_streams",
file_path]
result = subprocess.run(command_array, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
return FFProbeResult(return_code=result.returncode,
json=result.stdout,
error=result.stderr)
Example usage, echoing the resulting json string:
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='View ffprobe output')
parser.add_argument('-i', '--input', help='File Name', required=True)
args = parser.parse_args()
if not Path(args.input).is_file():
print("could not read file: " + args.input)
exit(1)
print('File: {}'.format(args.input))
ffprobe_result = ffprobe(file_path=args.input))
if ffprobe_result.return_code == 0:
# Print the raw json string
print(ffprobe_result.json)
# or print a summary of each stream
d = json.loads(ffprobe_result.json)
streams = d.get("streams", [])
for stream in streams:
print(f'{stream.get("codec_type", "unknown")}: {stream.get("codec_long_name")}')
else:
print("ERROR")
print(ffprobe_result.error, file=sys.stderr)
So:
$ ./ffprobe.py -i input.mp4
File: input.mp4
{
"streams": [
{
"index": 0,
"codec_name": "h264",
"codec_long_name": "H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10",
"profile": "High",
"codec_type": "video",
"codec_time_base": "1001/48000",
"codec_tag_string": "avc1",
"codec_tag": "0x31637661",
"width": 1920,
"height": 1080,
"coded_width": 1920,
"coded_height": 1088,
"has_b_frames": 2,
"pix_fmt": "yuv420p",
"level": 40,
"color_range": "tv",
"color_space": "bt709",
"color_transfer": "bt709",
"color_primaries": "bt709",
"chroma_location": "left",
"refs": 1,
"is_avc": "true",
"nal_length_size": "4",
"r_frame_rate": "24000/1001",
"avg_frame_rate": "24000/1001",
"time_base": "1/24000",
"start_pts": 0,
"start_time": "0.000000",
"duration_ts": 442440,
"duration": "18.435000",
"bit_rate": "4970031",
"bits_per_raw_sample": "8",
"nb_frames": "442",
"disposition": {
"default": 1,
"dub": 0,
"original": 0,
"comment": 0,
"lyrics": 0,
"karaoke": 0,
"forced": 0,
"hearing_impaired": 0,
"visual_impaired": 0,
"clean_effects": 0,
"attached_pic": 0,
"timed_thumbnails": 0
},
"tags": {
"creation_time": "2020-05-20T15:37:59.000000Z",
"language": "und",
"handler_name": "L-SMASH Video Handler",
"encoder": "AVC Coding"
}
},
{
"index": 1,
"codec_name": "aac",
"codec_long_name": "AAC (Advanced Audio Coding)",
"profile": "LC",
"codec_type": "audio",
"codec_time_base": "1/48000",
"codec_tag_string": "mp4a",
"codec_tag": "0x6134706d",
"sample_fmt": "fltp",
"sample_rate": "48000",
"channels": 2,
"channel_layout": "stereo",
"bits_per_sample": 0,
"r_frame_rate": "0/0",
"avg_frame_rate": "0/0",
"time_base": "1/48000",
"start_pts": 0,
"start_time": "0.000000",
"duration_ts": 885760,
"duration": "18.453333",
"bit_rate": "137011",
"max_bit_rate": "140304",
"nb_frames": "865",
"disposition": {
"default": 1,
"dub": 0,
"original": 0,
"comment": 0,
"lyrics": 0,
"karaoke": 0,
"forced": 0,
"hearing_impaired": 0,
"visual_impaired": 0,
"clean_effects": 0,
"attached_pic": 0,
"timed_thumbnails": 0
},
"tags": {
"creation_time": "2020-05-20T15:37:59.000000Z",
"language": "und",
"handler_name": "L-SMASH Audio Handler"
}
}
],
"format": {
"filename": "input.mp4",
"nb_streams": 2,
"nb_programs": 0,
"format_name": "mov,mp4,m4a,3gp,3g2,mj2",
"format_long_name": "QuickTime / MOV",
"start_time": "0.000000",
"duration": "18.453333",
"size": "11779747",
"bit_rate": "5106826",
"probe_score": 100,
"tags": {
"major_brand": "mp42",
"minor_version": "0",
"compatible_brands": "mp42mp41isomavc1",
"creation_time": "2020-05-20T15:37:59.000000Z"
}
}
}
video: H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10
audio: AAC (Advanced Audio Coding)
Solution 3
Here is technique I think it is both simple to use and easy to parse (tested with ffmpeg 3.x):
import subprocess
import xml.etree
def ffprobe(executable, filename):
'''Runs ``ffprobe`` executable over ``filename``, returns parsed XML
Parameters:
executable (str): Full path leading to ``ffprobe``
filename (str): Full path leading to the file to be probed
Returns:
xml.etree.ElementTree: containing all parsed elements
'''
cmd = [
executable,
'-v', 'quiet',
'-print_format', 'xml', #here is the trick
'-show_format',
'-show_streams',
filename,
]
return xml.etree.ElementTree.fromstring(subprocess.check_output(cmd))
The data available comes from a string representation that looks like this:
<ffprobe>
<streams>
<stream index="0" codec_name="h264" codec_long_name="H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10" profile="Constrained Baseline" codec_type="video" codec_time_base="1/60" codec_tag_string="avc1" codec_tag="0x31637661" width="560" height="320" coded_width="560" coded_height="320" has_b_frames="0" sample_aspect_ratio="0:1" display_aspect_ratio="0:1" pix_fmt="yuv420p" level="30" color_range="tv" color_space="bt709" color_transfer="bt709" color_primaries="bt709" chroma_location="left" refs="1" is_avc="true" nal_length_size="4" r_frame_rate="30/1" avg_frame_rate="30/1" time_base="1/90000" start_pts="0" start_time="0.000000" duration_ts="498000" duration="5.533333" bit_rate="465641" bits_per_raw_sample="8" nb_frames="166">
<disposition default="1" dub="0" original="0" comment="0" lyrics="0" karaoke="0" forced="0" hearing_impaired="0" visual_impaired="0" clean_effects="0" attached_pic="0" timed_thumbnails="0"/>
<tag key="creation_time" value="2010-03-20T21:29:11.000000Z"/>
<tag key="language" value="und"/>
<tag key="encoder" value="JVT/AVC Coding"/>
</stream>
<stream>...</stream>
</streams>
<format filename="/Users/andre/Projects/qnap/librarian/librarian/data/movie.mp4" nb_streams="2" nb_programs="0" format_name="mov,mp4,m4a,3gp,3g2,mj2" format_long_name="QuickTime / MOV" start_time="0.000000" duration="5.568000" size="383631" bit_rate="551193" probe_score="100">
<tag key="major_brand" value="mp42"/>
<tag key="minor_version" value="0"/>
<tag key="compatible_brands" value="mp42isomavc1"/>
<tag key="creation_time" value="2010-03-20T21:29:11.000000Z"/>
<tag key="encoder" value="HandBrake 0.9.4 2009112300"/>
</format>
</ffprobe>
Solution 4
There is FFProbe wrapper for Python (https://pypi.org/project/ffprobe/).
You can install it easily: (sudo) pip install ffprobe
A typical usage:
from ffprobe import FFProbe
metadata = FFProbe("example.mp4")
An alternative to FFProbe is pymediainfo library (https://pymediainfo.readthedocs.io/en/stable/). You can find usage in the documentation site.
Solution 5
One-liner in a terminal:
ffprobe -v quiet -print_format json -show_format -show_streams "foobar.mp4" > "foobar.mp4.json"
Hence, you may simply:
#!/usr/bin/python3
import subprocess
import json
def probe_file(filename):
command_line = ['ffprobe', '-v', 'quiet', '-print_format', 'json', '-show_format', '-show_streams', f"{filename}"]
p = subprocess.Popen(command_line, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print(filename) # * Sanity check *
out, err = p.communicate()
if len(out)>0: # ** if result okay
print("==========output==========")
result = json.loads(out)
else:
result = {}
if err:
print("========= error ========")
print(err)
return result
result_json = probe_file('/foo/bar.mov')
print(json.dumps(result_json, indent=2))
NB: I have used this example. You may clone the code from github and use it as a module:
#python3
from ffprobe_json import ffprobe
ffprobe_result, error = ffprobe(file_path="/your/media_file.mp4", quiet=True)
user
Updated on June 18, 2021Comments
-
user about 3 years
I've been attempting to figure this out for forever now (I'm new to programming) and I can't figure it out.
I'm attempting to build a script that will test the file, and give me output from which I can get information like "Audio Format" that I can then put into the filename. However, I can't even get the script to return any file info. I've hit a wall at inserting an input file...
So at this point I just need help getting it to spit out info based on the argvs I've thrown in. Hopefully I'll be able to figure out how to parse the audio info from that.
My attempt that seems to be close:
#!/usr/bin/python import os, sys, subprocess, shlex, re from subprocess import call def probe_file(filename): p = subprocess.Popen(['/opt/local/bin/ffprobe', '-show_format', '-pretty', '-loglevel quiet', -i filename], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) print filename print p.communicate() [probe_file (f) for f in os.listdir('.') if not f.startswith('.')]
-
Gabriel Devillers over 6 yearsYour XML parsing does not work for me (Debian 9, Python 3.5.3, ffprobe 3.2.8-1~deb9u1). But thanks for the hint though: I transformed your code to use JSON and it is working.
-
André Anjos over 6 yearsThanks for the feedback. Could you please tell why it does not work for you? Is it a Python issue or the generated XML does not conform?
-
Gabriel Devillers over 6 yearsWhen I call your function the return statement gives me a
AttributeError: module 'xml.etree' has no attribute 'ElementTree'
-
jonbon almost 5 yearsJust an FYI for anyone who finds this, FFProbe was not written to be compatible with Python3.
-
Mass Dot Net about 4 yearsAlso, that Python
ffprobe
library hasn't been updated since Oct 30, 2013.