In [1]:
from le_utils.constants import languages as languages
# can lookup language using language code
language_obj = languages.getlang('en')
language_obj
Out[1]:
In [2]:
# can lookup language using language name (the new le_utils version has not shipped yet)
language_obj = languages.getlang_by_name('English')
language_obj
Out[2]:
In [3]:
# all `language` attributed (channel, nodes, and files) need to use language code
language_obj.code
Out[3]:
In [4]:
from le_utils.constants.languages import getlang_by_native_name
lang_obj = getlang_by_native_name('français')
print(lang_obj)
print(lang_obj.code)
The above language code is an internal representaiton that uses two-letter codes, and sometimes has a locale information, e.g., pt-BR
for Brazilian Portuiguese. Sometimes the internal code representaiton for a language is the three-letter vesion, e.g., zul
for Zulu.
In [ ]:
In [5]:
from ricecooker.chefs import SushiChef
from ricecooker.classes.nodes import ChannelNode, TopicNode, DocumentNode
from ricecooker.classes.files import DocumentFile
from le_utils.constants import languages
from le_utils.constants import licenses
class MySushiChef(SushiChef):
"""
A sushi chef that creates a channel with content in EN, FR, and SP.
"""
def get_channel(self, **kwargs):
channel = ChannelNode(
source_domain='testing.org',
source_id='lang_test_chanl',
title='Languages test channel',
thumbnail='http://themes.mysitemyway.com/_shared/images/flags.png',
language = languages.getlang('en').code # set global language for channel (will apply as default option to all content items in this channel)
)
return channel
def construct_channel(self, **kwargs):
# create channel
channel = self.get_channel(**kwargs)
# create the English topic, add a DocumentNode to it
topic = TopicNode(
source_id="<en_topic_id>",
title="New Topic in English",
language=languages.getlang('en').code,
)
doc_node = DocumentNode(
source_id="<en_doc_id>",
title='Some doc in English',
description='This is a sample document node in English',
files=[DocumentFile(path='samplefiles/documents/doc_EN.pdf')],
license=licenses.PUBLIC_DOMAIN,
language=languages.getlang('en').code,
)
topic.add_child(doc_node)
channel.add_child(topic)
# create the Spanish topic, add a DocumentNode to it
topic = TopicNode(
source_id="<es_topic_id>",
title="Topic in Spanish",
language=languages.getlang('es-MX').code,
)
doc_node = DocumentNode(
source_id="<es_doc_id>",
title='Some doc in Spanish',
description='This is a sample document node in Spanish',
files=[DocumentFile(path='samplefiles/documents/doc_ES.pdf')],
license=licenses.PUBLIC_DOMAIN,
language=languages.getlang('es-MX').code,
)
topic.add_child(doc_node)
channel.add_child(topic)
# create the French topic, add a DocumentNode to it
topic = TopicNode(
source_id="<fr_topic_id>",
title="Topic in French",
language=languages.getlang('fr').code,
)
doc_node = DocumentNode(
source_id="<fr_doc_id>",
title='Some doc in French',
description='This is a sample document node in French',
files=[DocumentFile(path='samplefiles/documents/doc_FR.pdf')],
license=licenses.PUBLIC_DOMAIN,
language=languages.getlang('fr').code,
)
topic.add_child(doc_node)
channel.add_child(topic)
return channel
Run of you chef by creating an instance of the chef class and calling it's run
method:
In [6]:
mychef = MySushiChef()
args = {'token': 'YOURTOKENHERE9139139f3a23232',
'reset': True,
'verbose': True,
'publish': True}
options = {}
mychef.run(args, options)
Congratulations, you put three languages on the internet!
In [ ]:
In [ ]:
In [7]:
import youtube_dl
ydl = youtube_dl.YoutubeDL({
'quiet': True,
'no_warnings': True,
'writesubtitles': True,
'allsubtitles': True,
})
youtube_id = 'FN12ty5ztAs'
info = ydl.extract_info(youtube_id, download=False)
subtitle_languages = info["subtitles"].keys()
print(subtitle_languages)
In [ ]:
In [8]:
from ricecooker.chefs import SushiChef
from ricecooker.classes import licenses
from ricecooker.classes.nodes import ChannelNode, TopicNode, VideoNode
from ricecooker.classes.files import YouTubeVideoFile, YouTubeSubtitleFile
from ricecooker.classes.files import is_youtube_subtitle_file_supported_language
import youtube_dl
ydl = youtube_dl.YoutubeDL({
'quiet': True,
'no_warnings': True,
'writesubtitles': True,
'allsubtitles': True,
})
# Define the license object with necessary info
TE_LICENSE = licenses.SpecialPermissionsLicense(
description='Permission granted by Touchable Earth to distribute through Kolibri.',
copyright_holder='Touchable Earth Foundation (New Zealand)'
)
class YoutubeVideoWithSubtitlesSushiChef(SushiChef):
"""
A sushi chef that creates a channel with content in EN, FR, and SP.
"""
channel_info = {
'CHANNEL_SOURCE_DOMAIN': 'learningequality.org', # change me!
'CHANNEL_SOURCE_ID': 'sample_youtube_video_with_subs', # change me!
'CHANNEL_TITLE': 'Youtube subtitles downloading chef',
'CHANNEL_LANGUAGE': 'en',
'CHANNEL_THUMBNAIL': 'http://themes.mysitemyway.com/_shared/images/flags.png',
'CHANNEL_DESCRIPTION': 'This is a test channel to make sure youtube subtitle languages lookup works'
}
def construct_channel(self, **kwargs):
# create channel
channel = self.get_channel(**kwargs)
# get all subtitles available for a sample video
youtube_id = 'FN12ty5ztAs'
info = ydl.extract_info(youtube_id, download=False)
subtitle_languages = info["subtitles"].keys()
print('Found subtitle_languages = ', subtitle_languages)
# create video node
video_node = VideoNode(
source_id=youtube_id,
title='Youtube video',
license=TE_LICENSE,
derive_thumbnail=True,
files=[YouTubeVideoFile(youtube_id=youtube_id)],
)
# add subtitles in whichever languages are available.
for lang_code in subtitle_languages:
if is_youtube_subtitle_file_supported_language(lang_code):
video_node.add_file(
YouTubeSubtitleFile(
youtube_id=youtube_id,
language=lang_code
)
)
else:
print('Unsupported subtitle language code:', lang_code)
channel.add_child(video_node)
return channel
In [ ]:
chef = YoutubeVideoWithSubtitlesSushiChef()
args = {'token': 'YOURTOKENHERE9139139f3a23232',
'reset': True,
'verbose': True,
'publish': True}
options = {}
chef.run(args, options)
In [ ]: