19.3 YouTube
- YouTube Ads
- YouTube Analytics and Reporting APIs: All YouTube Analytics and YouTube Reporting API requests must be authorized by the channel or content owner that owns the requested data
19.3.1 OAuth
Go to link to access your API
library(tuber)
app_id = "YOUR APP ID"
app_secret = "YOUR APP SECRET"
yt_oauth(app_id, app_secret)
get_stats(video_id = "N708P-A45D0")
Note for Ubuntu user
Get info about a video
Get caption of a video
Search Videos
Get Comments of a video
Get All the Comments Including Replies
Get statistics of all the videos in a channel
a <- list_channel_resources(filter = c(channel_id = "UCT5Cx1l4IS3wHkJXNyuj4TA"), part="contentDetails")
# Uploaded playlists:
playlist_id <- a$items[[1]]$contentDetails$relatedPlaylists$uploads
# Get videos on the playlist
vids <- get_playlist_items(filter= c(playlist_id=playlist_id))
# Video ids
vid_ids <- as.vector(vids$contentDetails.videoId)
# Function to scrape stats for all vids
get_all_stats <- function(id) {
get_stats(id)
}
# Get stats and convert results to data frame
res <- lapply(vid_ids, get_all_stats)
res_df <- do.call(rbind, lapply(res, data.frame))
head(res_df)
19.3.2 API
require(curl)
require(jsonlite)
library(kableExtra)
library(dplyr)
library(ggplot2)
library(plotly)
library(reshape2)
API_key = "YOUR API KEY"
getstats_video<-function(video_id,API_key){
url=paste0("https://www.googleapis.com/youtube/v3/videos?part=snippet,statistics&id=",video_id,"&key=",API_key)
result <- fromJSON(txt=url)
salida=list()
return(data.frame(name=result$items$snippet$channelTitle, result$items$statistics,title=result$items$snippet$title,date=result$items$snippet$publishedAt,descrip=result$items$snippet$description))
}
get_playlist_canal<-function(id,API_key,topn=15){
url=paste0('https://www.googleapis.com/youtube/v3/playlistItems?part=contentDetails&playlistId=',id,'&key=',API_key,'&maxResults=',topn)
result=fromJSON(txt=url)
return(data.frame(result$items$contentDetails))
}
getstats_canal<-function(id,API_key){
url=paste0('https://www.googleapis.com/youtube/v3/channels?part=snippet%2CcontentDetails%2Cstatistics&id=',id,'&key=',API_key)
result <- fromJSON(txt=url)
return(data.frame(name=result$items$snippet$title,result$items$statistics,pl_list_id=result$items$contentDetails$relatedPlaylists))
}
getall_channels<-function(ids,API_key,topn=5){
videos=lapply(ids,FUN=get_playlist_canal,API_key=API_key,topn=topn) %>% bind_rows()
stats=lapply(videos[,1],FUN=getstats_video,API_key=API_key)
stats=bind_rows(stats)
stats$vid_id=videos[,1]
return(stats)
}
Statistics per Channel
can_st=lapply(comp_data$cha_id,FUN = getstats_canal,API_key=API_key)
can_st=bind_rows(can_st)
can_st$viewCount=as.numeric(can_st$viewCount)
can_st[,1:6] %>% kable() %>%kable_styling()
can_st$viewCount=round(as.numeric(can_st$viewCount)/1000000,2)
p1=can_st %>% ggplot(aes(x=reorder(name,viewCount),y=viewCount,fill=name))+
geom_bar(stat="sum")+guides(size=F)+coord_flip()+scale_fill_manual(values = c("red", "darkblue", "yellow2"))+
geom_text(inherit.aes = T,aes(label=paste(viewCount,"M")),nudge_y =0,angle = 90)+
labs(x="Total Visualizations(Millions)",y="Visualizations",fill="")+
theme(legend.position = "top")+mytheme3
ggplotly(p1,tooltip=c("name","viewCount")) %>%
layout(legend = list(orientation = "h",x = 0.01, y = -0.1,autosize=F))
Information on Individual Videos per Channel
var_to_see="dislikeCount" #favoriteCount or commentCount
info=getall_channels(ids = can_st$pl_list_id.uploads,API_key = API_key,topn =20)
datacond=melt(info[,c(1:6,8)],id.vars = c("name","date"))
datacond$date=as.Date(datacond$date)
datacond$value=as.numeric(datacond$value)
ggplot(filter(datacond,variable==var_to_see),aes(x=as.Date(date),y=value,fill=name))+
geom_bar(stat="sum")+labs(x=var_to_see,y="",fill="")+guides(size=FALSE)+scale_fill_manual(values = c("red", "darkblue", "yellow2"))+theme(legend.position = "top")+
scale_x_date(limits =as.Date(c(as.Date(min(datacond$date)),as.Date(Sys.time()))),date_breaks ="month",date_labels="%b %y")+theme(legend.position = "top")+mytheme2
19.3.3 Python
# from pytube import YouTube
# YouTube('https://youtube.com/watch?v=2lAe1cqCOXo').streams.first().download()
import requests
import json
r = requests.get("http://gdata.youtube.com/feeds/api/standardfeeds/top_rated?v=2&alt=jsonc")
#r.text
data = json.loads(r.text)
data
# for item in data['data']['items']:
# print "Video Title: %s" % (item['title'])
# print "Video Category: %s" % (item['category'])
# print "Video ID: %s" % (item['id'])
# print "Video Rating: %f" % (item['rating'])
# print "Embed URL: %s" % (item['player']['default'])
Using BeautifulSoup
from requests_html import HTMLSession
from bs4 import BeautifulSoup as bs # importing BeautifulSoup
# sample youtube video url
video_url = "https://www.youtube.com/watch?v=jNQXAC9IVRw"
# init an HTML Session
session = HTMLSession()
# get the html content
response = session.get(video_url)
# execute Java-script
response.html.render(sleep=1)
# create bs object to parse HTML
soup = bs(response.html.html, "html.parser")
# write all HTML code into a file
open("video.html", "w", encoding='utf8').write(response.html.html)
Generate random video
import json
import urllib.request
import string
import random
count = 50
API_KEY = 'your_key'
random = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(3))
urlData = "https://www.googleapis.com/youtube/v3/search?key={}&maxResults={}&part=snippet&type=video&q={}".format(API_KEY,count,random)
webURL = urllib.request.urlopen(urlData)
data = webURL.read()
encoding = webURL.info().get_content_charset('utf-8')
results = json.loads(data.decode(encoding))
for data in results['items']:
videoId = (data['id']['videoId'])
print(videoId)
#store your ids
Random used by YouTube API, test
# -*- coding: utf-8 -*-
# Sample Python code for youtube.search.list
# See instructions for running these code samples locally:
# https://developers.google.com/explorer-help/guides/code_samples#python
import os
import google_auth_oauthlib.flow
import googleapiclient.discovery
import googleapiclient.errors
scopes = ["https://www.googleapis.com/auth/youtube.force-ssl"]
def main():
# Disable OAuthlib's HTTPS verification when running locally.
# *DO NOT* leave this option enabled in production.
os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
api_service_name = "youtube"
api_version = "v3"
client_secrets_file = "YOUR_CLIENT_SECRET_FILE.json"
# Get credentials and create an API client
flow = google_auth_oauthlib.flow.InstalledAppFlow.from_client_secrets_file(
client_secrets_file, scopes)
credentials = flow.run_console()
youtube = googleapiclient.discovery.build(
api_service_name, api_version, credentials=credentials)
request = youtube.search().list(
part="snippet",
maxResults=25,
q="surfing"
)
response = request.execute()
print(response)
if __name__ == "__main__":
main()
Another way, but this is a dirty crawler, not for production
import re, urllib
from random import randint
def random_str(str_size):
res = ""
for i in xrange(str_size):
x = randint(0,25)
c = chr(ord('a')+x)
res += c
return res
def find_watch(text,pos):
start = text.find("watch?v=",pos)
if (start<0):
return None,None
end = text.find(" ",start)
if (end<0):
return None,None
if (end-start > 200): #silly heuristics, probably not a must
return None,None
return text[start:end-1], start
def find_instance_links():
base_url = 'https://www.youtube.com/results?search_query='
url = base_url+random_str(3)
#print url
r = urllib.urlopen(url).read()
links = {}
pos = 0
while True:
link,pos = find_watch(r,pos)
if link == None or pos == None:
break
pos += 1
#print link
if (";" in link):
continue
links[link] = 1
items_list = links.items()
list_size = len(items_list)
selected = randint(list_size/2,list_size-1)
return items_list[selected][0]
for i in xrange(1000):
sleep(randint(7,20)) # pause randomly between 7 and 20 seconds
link = find_instance_links()
print link