[docs]defverify_key(self):''' Checks it the API key is valid. :returns: True if the API key is valid, False if the key is not valid. :rtype: bool '''http_endpoint=("https://www.googleapis.com/youtube/v{}/playlists""?part=id&id=UC_x5XG1OV2P6uZZ5FSM9Ttw&""key={}&maxResults=2".format(self.api_version,self.key))response=requests.get(http_endpoint)try:response.raise_for_status()returnTrueexcept:returnFalse

[docs]defcreate_session(self,max_retries=2,backoff_factor=.5,status_forcelist=[500,502,503,504],**kwargs):''' Creates a requests session to retry API calls when any `status_forcelist` codes are returned. :param max_retries: How many times to retry an HTTP request (API call) when a `status_forcelist` code is returned :type max_retries: int :param backoff_factor: How long to wait between retrying API calls. Scales exponentially. :type backoff_factor: float :param status_forcelist: Retry when any of these http response codes are returned. :type status_forcelist: list '''session=requests.Session()retries=Retry(total=max_retries,backoff_factor=backoff_factor,status_forcelist=status_forcelist,**kwargs)session.mount('http://',HTTPAdapter(max_retries=retries))self.session=session

[docs]defget_channel_id_from_user(self,username,**kwargs):""" Get a channel_id from a YouTube username. These are the unique identifiers for all YouTube "uers". IE. "Munchies" -> "UCaLfMkkHhSA_LaCta0BzyhQ". Read the docs: https://developers.google.com/youtube/v3/docs/channels/list :param username: the username for a YouTube channel :type username: str :returns: YouTube Channel ID for a given username :rtype: str """http_endpoint=("https://www.googleapis.com/youtube/v{}/channels""?part=id""&forUsername={}&key={}".format(self.api_version,username,self.key))fork,vinkwargs.items():http_endpoint+='&{}={}'.format(k,v)response=self.session.get(http_endpoint)response_json=_load_response(response)channel_id=Noneifresponse_json.get('items'):channel_id=response_json['items'][0]['id']returnchannel_id

[docs]defget_channel_metadata_gen(self,channel_id,parser=P.parse_channel_metadata,part=["id","snippet","contentDetails","statistics","topicDetails","brandingSettings"],**kwargs):''' Gets a dictionary of channel metadata given a channel_id, or a list of channel_ids. :param channel_id: channel id(s) :type channel_id: str or list :param parser: the function to parse the json document. :type parser: :mod:`youtube_api.parsers module` :param part: The part parameter specifies a comma-separated list of one or more resource properties that the API response will include. Different parameters cost different quota costs from the API. :type part: list :returns: yields the YouTube channel metadata :rtype: dict '''parser=parserifparserelseP.raw_jsonpart=','.join(part)ifisinstance(channel_id,list)orisinstance(channel_id,pd.Series):forchunkin_chunker(channel_id,50):id_input=','.join(chunk)http_endpoint=("https://www.googleapis.com/youtube/{}/channels?""part={}&id={}&key={}&maxResults=50".format(self.api_version,part,id_input,self.key))fork,vinkwargs.items():http_endpoint+='&{}={}'.format(k,v)response=self.session.get(http_endpoint)response_json=_load_response(response)ifresponse_json.get('items'):foriteminresponse_json['items']:yieldparser(item)else:yieldparser(None)

[docs]defget_channel_metadata(self,channel_id,parser=P.parse_channel_metadata,part=["id","snippet","contentDetails","statistics","topicDetails","brandingSettings"],**kwargs):''' Gets a dictionary of channel metadata given a channel_id, or a list of channel_ids. Read the docs: https://developers.google.com/youtube/v3/docs/channels/list :param channel_id: the channel id(s) :type channel_id: str or list :param parser: the function to parse the json document. :type parser: :mod:`youtube_api.parsers module` :param part: The part parameter specifies a comma-separated list of one or more resource properties that the API response will include. Different parameters cost different quota costs from the API. :type part: list :returns: the YouTube channel metadata :rtype: dict '''parser=parserifparserelseP.raw_jsonchannel_meta=[]ifisinstance(channel_id,str):part=','.join(part)http_endpoint=("https://www.googleapis.com/youtube/v{}/channels?""part={}&id={}&key={}&maxResults=50".format(self.api_version,part,channel_id,self.key))fork,vinkwargs.items():http_endpoint+='&{}={}'.format(k,v)response=self.session.get(http_endpoint)response_json=_load_response(response)ifresponse_json.get('items'):channel_meta=parser(response_json['items'][0])elifisinstance(channel_id,list)orisinstance(channel_id,pd.Series):forchannel_meta_inself.get_channel_metadata_gen(channel_id,parser=parser,part=part,**kwargs):channel_meta.append(channel_meta_)else:raiseTypeError("Could not process the type entered!")returnchannel_meta

[docs]defget_video_metadata_gen(self,video_id,parser=P.parse_video_metadata,part=['statistics','snippet'],**kwargs):''' Given a `video_id` returns metrics (views, likes, comments) and metadata (description, category) as a dictionary. Read the docs: https://developers.google.com/youtube/v3/docs/videos/list :param video_id: The ID of a video IE: "kNbhUWLH_yY", this can be found at the end of YouTube urls and by parsing links using :meth:`youtube_api.youtube_api_utils.strip_youtube_id`. :type video_id: str or list of str :param parser: the function to parse the json document :type parser: :mod:`youtube_api.parsers module` :param part: The part parameter specifies a comma-separated list of one or more resource properties that the API response will include. Different parameters cost different quota costs from the API. :type part: list :returns: returns metadata from the inputted ``video_id``s. :rtype: dict '''part=','.join(part)parser=parserifparserelseP.raw_jsonifisinstance(video_id,list)orisinstance(video_id,pd.Series):forchunkin_chunker(video_id,50):id_input=','.join(chunk)http_endpoint=("https://www.googleapis.com/youtube/v{}/videos""?part={}""&id={}&key={}&maxResults=50".format(self.api_version,part,id_input,self.key))fork,vinkwargs.items():http_endpoint+='&{}={}'.format(k,v)response=self.session.get(http_endpoint)response_json=_load_response(response)ifresponse_json.get('items'):foriteminresponse_json['items']:yieldparser(item)else:yieldparser(None)else:raiseExpection('This function only takes iterables!')

[docs]defget_video_metadata(self,video_id,parser=P.parse_video_metadata,part=['statistics','snippet'],**kwargs):''' Given a single or list of `video_id` returns metrics (views, likes, comments) and metadata (description, category) as a dictionary. Read the docs: https://developers.google.com/youtube/v3/docs/videos/list :param video_id: the ID of a video IE: ['kNbhUWLH_yY'], this can be found at the end of YouTube urls and by parsing links using :meth:`youtube_api.youtube_api_utils.strip_youtube_id`. :type video_id: str or list of str :param parser: the function to parse the json document :type parser: :mod:`youtube_api.parsers module` :param part: The part parameter specifies a comma-separated list of one or more resource properties that the API response will include. Different parameters cost different quota costs from the API. :type part: list :returns: yields a video metadata. :rtype: dict '''video_metadata=[]parser=parserifparserelseP.raw_jsonifisinstance(video_id,str):part=','.join(part)http_endpoint=("https://www.googleapis.com/youtube/v{}/videos""?part={}""&id={}&key={}&maxResults=2".format(self.api_version,part,video_id,self.key))fork,vinkwargs.items():http_endpoint+='&{}={}'.format(k,v)response=self.session.get(http_endpoint)response_json=_load_response(response)ifresponse_json.get('items'):video_metadata=parser(response_json['items'][0])elifisinstance(video_id,list)orisinstance(video_id,pd.Series):forvideo_metainself.get_video_metadata_gen(video_id,parser=parser,part=part,**kwargs):video_metadata.append(video_meta)else:raiseTypeError("Could not process the type entered!")returnvideo_metadata

[docs]defget_playlists(self,channel_id,next_page_token=False,parser=P.parse_playlist_metadata,part=['id','snippet','contentDetails'],**kwargs):''' Returns a list of playlist IDs that `channel_id` created. Note that playlists can contains videos from any users. Read the docs: https://developers.google.com/youtube/v3/docs/playlists/list :param channel_id: a channel_id IE: "UCn8zNIfYAQNdrFRrr8oibKw" :type channel_id: str :param next_page_token: a token to continue from a preciously stopped query IE: "CDIQAA" :type next_page_token: str :param parser: the function to parse the json document :type parser: :mod:`youtube_api.parsers module` :param part: The part parameter specifies a comma-separated list of one or more resource properties that the API response will include. Different parameters cost different quota costs from the API. :type part: list :returns: playlist info that ``channel_id`` is subscribed to. :rtype: list of dict '''parser=parserifparserelseP.raw_jsonpart=','.join(part)playlists=[]whileTrue:http_endpoint=("https://www.googleapis.com/youtube/v{}/playlists""?part={}&channelId={}&key={}&maxResults=50".format(self.api_version,part,channel_id,self.key))fork,vinkwargs.items():http_endpoint+='&{}={}'.format(k,v)ifnext_page_token:http_endpoint+="&pageToken={}".format(next_page_token)response=self.session.get(http_endpoint)response_json=_load_response(response)ifresponse_json.get('items'):foriteminresponse_json.get('items'):playlists.append(parser(item))ifresponse_json.get('nextPageToken'):next_page_token=response_json.get('nextPageToken')else:breakreturnplaylists

[docs]defget_videos_from_playlist_id(self,playlist_id,next_page_token=None,published_after=datetime.datetime(1990,1,1),parser=P.parse_video_url,part=['snippet'],**kwargs):''' Given a `playlist_id`, returns `video_ids` associated with that playlist. Note that user uploads for any given channel are from a playlist named "upload playlist id". You can get this value using :meth:`youtube_api.youtube_api.get_channel_metadata` or :meth:`youtube_api.youtube_api_utils.get_upload_playlist_id`. The playlist ID for uploads is always the channel_id with "UU" subbed for "UC". Read the docs: https://developers.google.com/youtube/v3/docs/playlistItems :param playlist_id: the playlist_id IE: "UUaLfMkkHhSA_LaCta0BzyhQ" :type platlist_id: str :param next_page_token: a token to continue from a preciously stopped query IE: "CDIQAA" :type next_page_token: str :param cutoff_date: a date for the minimum publish date for videos from a playlist_id. :type cutoff_date: datetime :param parser: the function to parse the json document :type parser: :mod:`youtube_api.parsers module` :param part: The part parameter specifies a comma-separated list of one or more resource properties that the API response will include. Different parameters cost different quota costs from the API. :type part: list :returns: video ids associated with ``playlist_id``. :rtype: list of dict '''parser=parserifparserelseP.raw_jsonpart=','.join(part)videos=[]run=Truewhilerun:http_endpoint=("https://www.googleapis.com/youtube/v{}/playlistItems""?part={}&playlistId={}&maxResults=50&key={}".format(self.api_version,part,playlist_id,self.key))fork,vinkwargs.items():http_endpoint+='&{}={}'.format(k,v)ifnext_page_token:http_endpoint+="&pageToken={}".format(next_page_token)response=self.session.get(http_endpoint)response_json=_load_response(response)ifresponse_json.get('items'):foriteminresponse_json.get('items'):publish_date=parse_yt_datetime(item['snippet'].get('publishedAt'))ifpublish_date<=published_after:run=Falsebreakvideos.append(parser(item))ifresponse_json.get('nextPageToken'):next_page_token=response_json.get('nextPageToken')else:run=Falsebreakreturnvideos

[docs]defget_featured_channels_gen(self,channel_id,parser=P.parse_featured_channels,part=["id","brandingSettings"],**kwargs):''' Given a `channel_id` returns a dictionary {channel_id : [list, of, channel_ids]} of featured channels. Optionally, can take a list of channel IDS, and returns a list of dictionaries. Read the docs: https://developers.google.com/youtube/v3/docs/channels/list :param channel_id: channel_ids IE: ['UCn8zNIfYAQNdrFRrr8oibKw'] :type channel_id: str of list of str :param parser: the function to parse the json document :type parser: :mod:`youtube_api.parsers module` :param part: The part parameter specifies a comma-separated list of one or more resource properties that the API response will include. Different parameters cost different quota costs from the API. :type part: list :returns: yields metadata for featured channels :rtype: dict '''parser=parserifparserelseP.raw_jsonpart=','.join(part)ifisinstance(channel_id,list):forchunkin_chunker(channel_id,50):id_input=','.join(chunk)http_endpoint=("https://www.googleapis.com/youtube/v{}/channels""?part={}&id={}&key={}".format(self.api_version,part,id_input,self.key))fork,vinkwargs.items():http_endpoint+='&{}={}'.format(k,v)response=self.session.get(http_endpoint)response_json=_load_response(response)ifresponse_json.get('items'):foriteminresponse_json['items']:yieldparser(item)else:yieldparser(None)else:http_endpoint=("https://www.googleapis.com/youtube/v{}/channels""?part={}&id={}&key={}".format(self.api_version,part,channel_id,self.key))fork,vinkwargs.items():http_endpoint+='&{}={}'.format(k,v)response=self.session.get(http_endpoint)response_json=_load_response(response)foriteminresponse_json['items']:yieldparser(item)

[docs]defget_featured_channels(self,channel_id,parser=P.parse_featured_channels,**kwargs):''' Given a `channel_id` returns a dictionary {channel_id : [list, of, channel_ids]} of featured channels. Optionally, can take a list of channel IDs, and returns a list of dictionaries. Read the docs: https://developers.google.com/youtube/v3/docs/channels/list :param channel_id: channel_ids IE:['UCn8zNIfYAQNdrFRrr8oibKw'] :type channel_id: str or list of str :param parser: the function to parse the json document :type parser: :mod:`youtube_api.parsers module` :param part: The part parameter specifies a comma-separated list of one or more resource properties that the API response will include. Different parameters cost different quota costs from the API. :type part: list :returns: metadata for featured channels from ``channel_id``. :rtype: list of dict '''featured_channels=[]forchannelinself.get_featured_channels_gen(channel_id,parser=parser,**kwargs):featured_channels.append(channel)returnfeatured_channels

[docs]defget_video_comments(self,video_id,get_replies=True,max_results=None,next_page_token=False,parser=P.parse_comment_metadata,part=['snippet'],**kwargs):""" Returns comments and replies to comments for a given video. Read the docs: https://developers.google.com/youtube/v3/docs/commentThreads/list :param video_id: a video_id IE: "eqwPlwHSL_M" :type video_id: str :param get_replies: whether or not to get replies to comments :type get_replies: bool :param parser: the function to parse the json document :type parser: :mod:`youtube_api.parsers module` :param part: The part parameter specifies a comma-separated list of one or more resource properties that the API response will include. Different parameters cost different quota costs from the API. :type part: list :returns: comments and responses to comments of the given ``video_id``. :rtype: list of dict """parser=parserifparserelseP.raw_jsonpart=','.join(part)comments=[]run=Truewhilerun:http_endpoint=("https://www.googleapis.com/youtube/v{}/commentThreads?""part={}&textFormat=plainText&maxResults=100&""videoId={}&key={}".format(self.api_version,part,video_id,self.key))fork,vinkwargs.items():http_endpoint+='&{}={}'.format(k,v)ifnext_page_token:http_endpoint+="&pageToken={}".format(next_page_token)response=self.session.get(http_endpoint)response_json=_load_response(response)ifresponse_json.get('items'):items=response_json.get('items')foriteminitems:ifmax_results:iflen(comments)>=max_results:returncommentscomments.append(parser(item))ifresponse_json.get('nextPageToken'):next_page_token=response_json['nextPageToken']else:run=Falsebreakifget_replies:forcommentincomments:ifcomment.get('reply_count')andcomment.get('reply_count')>0:comment_id=comment.get('comment_id')http_endpoint=("https://www.googleapis.com/youtube/v{}/comments?""part={}&textFormat=plainText&maxResults=100&""parentId={}&key={}".format(self.api_version,part,comment_id,self.key))fork,vinkwargs.items():http_endpoint+='&{}={}'.format(k,v)response=self.session.get(http_endpoint)response_json=_load_response(response)ifresponse_json.get('items'):foriteminresponse_json.get('items'):ifmax_results:iflen(comments)>=max_results:returncommentscomments.append(parser(item))returncomments

[docs]defget_captions(self,video_id,lang_code='en',parser=P.parse_caption_track,**kwargs):""" Grabs captions given a video id using the PyTube and BeautifulSoup Packages. Note that this is NOT from the API. :param video_id: a video_id IE: "eqwPlwHSL_M" :type video_id: str :param lang_code: language to get captions in :type lang_code: str :param parser: the function to parse the json document :type parser: :mod:`youtube_api.parsers module` :returns: the captions from a given ``video_id``. :rtype: dict """def_get_captions(video_id,lang_code='en',parser=P.parse_caption_track,**kwargs):""" Grabs captions given a video id using the PyTube and BeautifulSoup Packages :param video_id: (str) a vide_id IE: eqwPlwHSL_M :param lang_code: (str) language to get captions in :param parser: (func) the function to parse the json document :returns: the captions from a given video_id """url=get_url_from_video_id(video_id)vid=YouTube(url,**kwargs)captions=vid.captions.get_by_language_code(lang_code)resp={}ifcaptions:clean_cap=_text_from_html(captions.xml_captions)resp['caption']=clean_capelse:resp['caption']=Noneresp['video_id']=video_idresp['collection_date']=datetime.datetime.now()returnrespifisinstance(video_id,str):captions=_get_captions(video_id,lang_code=lang_code,parser=parser,**kwargs)else:captions=[]forv_idinvideo_id:captions.append(_get_captions(v_id,lang_code=lang_code,parser=parser,**kwargs))returncaptions

[docs]defsearch(self,q=None,channel_id=None,max_results=5,order_by="relevance",next_page_token=None,published_after=datetime.datetime(2000,1,1),published_before=datetime.datetime(3000,1,1),location=None,location_radius='1km',region_code=None,safe_search=None,relevance_language=None,event_type=None,topic_id=None,video_duration=None,search_type="video",parser=P.parse_rec_video_metadata,part=['snippet'],**kwargs):""" Search YouTube for either videos, channels for keywords. Only returns up to 500 videos per search. For an exhaustive search, take advantage of the ``published_after`` and ``published_before`` params. Note the docstring needs to be updated to account for all the arguments this function takes. Read the docs: https://developers.google.com/youtube/v3/docs/search/list :param q: regex pattern to search using | for or, && for and, and - for not. IE boat|fishing is boat or fishing :type q: list or str :param max_results: max number of videos returned by a search query. :type max_results: int :param parser: the function to parse the json document :type parser: :mod:`youtube_api.parsers module` :param part: The part parameter specifies a comma-separated list of one or more resource properties that the API response will include. Different parameters cost different quota costs from the API. :type part: list :param order_by: Return search results ordered by either ``relevance``, ``date``, ``rating``, ``title``, ``videoCount``, ``viewCount``. :type order_by: str :param next_page_token: A token to continue from a preciously stopped query IE:CDIQAA :type next_page_token: str :param published_after: Only show videos uploaded after datetime :type published_after: datetime :param published_before: Only show videos uploaded before datetime :type published_before: datetime :param location: Coodinates of video uploaded in location. :type location: tuple :param location_radius: The radius from the ``location`` param to include in the search. :type location_radius: str :param region_code: search results for videos that can be viewed in the specified country. The parameter value is an ISO 3166-1 alpha-2 country code. :type region_code: str :param safe_search: whether or not to include restricted content, options are "moderate", "strict", None. :type safe_search: str or None :param relevance_language: Instructs the API to return search results that are most relevant to the specified language. :type relevance_language: str :param event_type: whether the video is "live", "completed", or "upcoming". :type event_type: str :param topic_id: only contain resources associated with the specified topic. The value identifies a Freebase topic ID. :type topic_id: str :param video_duration: filter on video durations "any", "long", "medium", "short". :type video_duration: str :param search_type: return results on a "video", "channel", or "playlist" search. :returns: incomplete video metadata of videos returned by search query. :rtype: list of dict """ifsearch_typenotin["video","channel","playlist"]:raiseException("The value you have entered for `type` is not valid!")parser=parserifparserelseP.raw_jsonpart=','.join(part)videos=[]whileTrue:http_endpoint=("https://www.googleapis.com/youtube/v{}/search?""part={}&type={}&maxResults=50""&order={}&key={}".format(self.api_version,part,search_type,order_by,self.key))ifq:ifisinstance(q,list):q='|'.join(q)http_endpoint+="&q={}".format(q)ifpublished_after:ifnotisinstance(published_after,datetime.date):raiseException("published_after must be a datetime, not a {}".format(type(published_after)))_published_after=datetime.datetime.strftime(published_after,"%Y-%m-%dT%H:%M:%SZ")http_endpoint+="&publishedAfter={}".format(_published_after)ifpublished_before:ifnotisinstance(published_before,datetime.date):raiseException("published_before must be a datetime, not a {}".format(type(published_before)))_published_before=datetime.datetime.strftime(published_before,"%Y-%m-%dT%H:%M:%SZ")http_endpoint+="&publishedBefore={}".format(_published_before)ifchannel_id:http_endpoint+="&channelId={}".format(channel_id)iflocation:ifisinstance(location,tuple):location=urllib.parse.quote_plus(str(location).strip('()').replace(' ',''))http_endpoint+="&location={}&locationRadius={}".format(location,location_radius)ifregion_code:http_endpoint+="&regionCode={}".format(region_code)ifsafe_search:ifnotsafe_searchin['moderate','strict','none']:raise"Not proper safe_search."http_endpoint+='&safeSearch={}'.format(safe_search)ifrelevance_language:http_endpoint+='&relevanceLanguage={}'.format(relevance_language)ifevent_type:ifnotevent_typein['completed','live','upcoming']:raise"Not proper event_type!"http_endpoint+='&eventType={}'.format(event_type)iftopic_id:http_endpoint+='&topicId={}'.format(topic_id)ifvideo_duration:ifnotvideo_durationin['short','long','medium','any']:raise"Not proper video_duration"http_endpoint+='&videoDuration={}'.format(video_duration)fork,vinkwargs.items():http_endpoint+='&{}={}'.format(k,v)ifnext_page_token:http_endpoint+="&pageToken={}".format(next_page_token)response=self.session.get(http_endpoint)response_json=_load_response(response)ifresponse_json.get('items'):foriteminresponse_json.get('items'):videos.append(parser(item))ifmax_results:iflen(videos)>=max_results:videos=videos[:max_results]breakifresponse_json.get('nextPageToken'):next_page_token=response_json.get('nextPageToken')time.sleep(.1)else:breakelse:breakreturnvideos