How to get comments from videos using YouTube API v3 and Python?

python youtube youtube-data-api

18,931

Solution 1

Im using this code

import os
import pickle
import google.oauth2.credentials
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
CLIENT_SECRETS_FILE = "client_secret.json" # for more information  to create your credentials json please visit https://python.gotrained.com/youtube-api-extracting-comments/
SCOPES = ['https://www.googleapis.com/auth/youtube.force-ssl']
API_SERVICE_NAME = 'youtube'
API_VERSION = 'v3'
def get_authenticated_service():
    credentials = None
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            credentials = pickle.load(token)
    #  Check if the credentials are invalid or do not exist
    if not credentials or not credentials.valid:
        # Check if the credentials have expired
        if credentials and credentials.expired and credentials.refresh_token:
            credentials.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                CLIENT_SECRETS_FILE, SCOPES)
            credentials = flow.run_console()
        # Save the credentials for the next run
        with open('token.pickle', 'wb') as token:
            pickle.dump(credentials, token)
    return build(API_SERVICE_NAME, API_VERSION, credentials = credentials)
def get_video_comments(service, **kwargs):
    comments = []
    results = service.commentThreads().list(**kwargs).execute()
    while results:
        for item in results['items']:
            comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
            comments.append(comment)
        # Check if another page exists
        if 'nextPageToken' in results:
            kwargs['pageToken'] = results['nextPageToken']
            results = service.commentThreads().list(**kwargs).execute()
        else:
            break
    return comments
if __name__ == '__main__':
    # When running locally, disable OAuthlib's HTTPs verification. When
    # running in production *do not* leave this option enabled.
    os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'
    service = get_authenticated_service()
    videoId = input('Enter Video id : ') # video id here (the video id of https://www.youtube.com/watch?v=vedLpKXzZqE -> is vedLpKXzZqE)
    comments = get_video_comments(service, part='snippet', videoId=videoId, textFormat='plainText')
print(len(comments),comments)

good luck

Solution 2

You can fetch all the comments using the nextPageToken. YouTube v3 API has gone something messy. But don't worry, i think this is something you are looking for.

YOUTUBE_COMMENT_URL = 'https://www.googleapis.com/youtube/v3/commentThreads'
def get_video_comment(self):
    def load_comments(self):
        for item in mat["items"]:
            comment = item["snippet"]["topLevelComment"]
            author = comment["snippet"]["authorDisplayName"]
            text = comment["snippet"]["textDisplay"]
            print("Comment by {}: {}".format(author, text))
            if 'replies' in item.keys():
                for reply in item['replies']['comments']:
                    rauthor = reply['snippet']['authorDisplayName']
                    rtext = reply["snippet"]["textDisplay"]
                print("\n\tReply by {}: {}".format(rauthor, rtext), "\n")
    parser = argparse.ArgumentParser()
    mxRes = 20
    vid = str()
    parser.add_argument("--c", help="calls comment function by keyword function", action='store_true')
    parser.add_argument("--max", help="number of comments to return")
    parser.add_argument("--videourl", help="Required URL for which comments to return")
    parser.add_argument("--key", help="Required API key")
    args = parser.parse_args()
    if not args.max:
        args.max = mxRes
    if not args.videourl:
        exit("Please specify video URL using the --videourl=parameter.")
    if not args.key:
        exit("Please specify API key using the --key=parameter.")
    try:
        video_id = urlparse(str(args.videourl))
        q = parse_qs(video_id.query)
        vid = q["v"][0]
    except:
        print("Invalid YouTube URL")
    parms = {
                'part': 'snippet,replies',
                'maxResults': args.max,
                'videoId': vid,
                'key': args.key
            }
    try:
        matches = self.openURL(YOUTUBE_COMMENT_URL, parms)
        i = 2
        mat = json.loads(matches)
        nextPageToken = mat.get("nextPageToken")
        print("\nPage : 1")
        print("------------------------------------------------------------------")
        load_comments(self)
        while nextPageToken:
            parms.update({'pageToken': nextPageToken})
            matches = self.openURL(YOUTUBE_COMMENT_URL, parms)
            mat = json.loads(matches)
            nextPageToken = mat.get("nextPageToken")
            print("\nPage : ", i)
            print("------------------------------------------------------------------")
            load_comments(self)
            i += 1
    except KeyboardInterrupt:
        print("User Aborted the Operation")
    except:
        print("Cannot Open URL or Fetch comments at a moment")

Find the full source code for other utilites at GitHub

This script can fetch comments (along with replies), perform search and return videos, channels and playlist in categorized form and also returns country based search results.

Hope this helps.

18,931

Author by

mshcruz

Updated on June 18, 2022

Comments

mshcruz 6 months

I've been trying to get comments (both threads and replies) from a given video on YouTube using Python (as an exercise to learn the language).

Based on the examples given at the official website (https://developers.google.com/youtube/v3/docs/commentThreads/list), I was able to get some of the comments, but not all of them. I tried to add some code to deal with multiple pages, but I am having troubles to get the comments for videos with only a single page.

For example, https://www.youtube.com/watch?v=Gd_L7DVKTA8 has 17 comments (including replies), but I'm only able to obtain 7 threads and 2 replies. Interestingly, I get the same results (only 7 threads) using the API Explorer available at the link above.

My code is as follows:

#!/usr/bin/python
# Usage:
# python scraper.py --videoid='<video_id>'
from apiclient.errors import HttpError
from oauth2client.tools import argparser
from apiclient.discovery import build
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
DEVELOPER_KEY = 'key'
def get_comment_threads(youtube, video_id, comments):
   threads = []
   results = youtube.commentThreads().list(
     part="snippet",
     videoId=video_id,
     textFormat="plainText",
   ).execute()
  #Get the first set of comments
  for item in results["items"]:
    threads.append(item)
    comment = item["snippet"]["topLevelComment"]
    text = comment["snippet"]["textDisplay"]
    comments.append(text)
  #Keep getting comments from the following pages
  while ("nextPageToken" in results):
    results = youtube.commentThreads().list(
      part="snippet",
      videoId=video_id,
      pageToken=results["nextPageToken"],
      textFormat="plainText",
    ).execute()
    for item in results["items"]:
      threads.append(item)
      comment = item["snippet"]["topLevelComment"]
      text = comment["snippet"]["textDisplay"]
      comments.append(text)
  print "Total threads: %d" % len(threads)
  return threads
def get_comments(youtube, parent_id, comments):
  results = youtube.comments().list(
    part="snippet",
    parentId=parent_id,
    textFormat="plainText"
  ).execute()
  for item in results["items"]:
    text = item["snippet"]["textDisplay"]
    comments.append(text)
  return results["items"]
if __name__ == "__main__":
  argparser.add_argument("--videoid", help="Required; ID for video for which the comment will be inserted.")
  args = argparser.parse_args()
  youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)
  try:
    output_file = open("output.txt", "w")
    comments = []
    video_comment_threads = get_comment_threads(youtube, args.videoid, comments)
    for thread in video_comment_threads:
      get_comments(youtube, thread["id"], comments)
    for comment in comments:
      output_file.write(comment.encode("utf-8") + "\n")
    output_file.close()
    print "Total comments: %d" % len(comments)
  except HttpError, e:
    print "An HTTP error %d occurred:\n%s" % (e.resp.status, e.content)

Thanks in advance for any suggestions!