Skip to content

Commit

Permalink
feat: Video editor supports transcripts [FC-0076] (openedx#36058)
Browse files Browse the repository at this point in the history
* Add error handler on save video to avoid creating sjson
* Support transcripts without edx_video_id in definition_to_xml
* When copying a video from a library to a course: Create a new edx_video_id
* Save transcripts as static assets in a video in a library when adding a new transcript.
* Delete transcripts as static assets in a video in a library when deleting transcripts.
* Support download transcript in a video in a library.
* Support replace transcript in a video in a library.
* Support updating transcripts in video in a library.
* Refactor the code of downloading YouTube transcripts to enable this feature in libraries.
* Support copy from a library to a course and a course to a library.
  • Loading branch information
ChrisChV authored Feb 21, 2025
1 parent 6c6fd84 commit b6489e7
Show file tree
Hide file tree
Showing 12 changed files with 569 additions and 179 deletions.
69 changes: 66 additions & 3 deletions cms/djangoapps/contentstore/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import re

from attrs import frozen, Factory
from django.core.files.base import ContentFile
from django.conf import settings
from django.contrib.auth import get_user_model
from django.utils.translation import gettext as _
Expand All @@ -23,6 +24,11 @@
from xmodule.exceptions import NotFoundError
from xmodule.modulestore.django import modulestore
from xmodule.xml_block import XmlMixin
from xmodule.video_block.transcripts_utils import Transcript, build_components_import_path
from edxval.api import (
create_external_video,
create_or_update_video_transcript,
)

from cms.djangoapps.models.settings.course_grading import CourseGradingModel
from cms.lib.xblock.upstream_sync import UpstreamLink, UpstreamLinkException, fetch_customizable_fields
Expand Down Expand Up @@ -274,8 +280,14 @@ def _insert_static_files_into_downstream_xblock(
course_key=downstream_xblock.context_key,
staged_content_id=staged_content_id,
static_files=static_files,
usage_key=downstream_xblock.scope_ids.usage_id,
usage_key=downstream_xblock.usage_key,
)
if downstream_xblock.usage_key.block_type == 'video':
_import_transcripts(
downstream_xblock,
staged_content_id=staged_content_id,
static_files=static_files,
)

# Rewrite the OLX's static asset references to point to the new
# locations for those assets. See _import_files_into_course for more
Expand Down Expand Up @@ -331,6 +343,13 @@ def import_staged_content_from_user_clipboard(parent_key: UsageKey, request) ->
tags=user_clipboard.content.tags,
)

usage_key = new_xblock.usage_key
if usage_key.block_type == 'video':
# The edx_video_id must always be new so as not
# to interfere with the data of the copied block
new_xblock.edx_video_id = create_external_video(display_name='external video')
store.update_item(new_xblock, request.user.id)

notices = _insert_static_files_into_downstream_xblock(new_xblock, user_clipboard.content.id, request)

return new_xblock, notices
Expand Down Expand Up @@ -630,8 +649,8 @@ def _import_file_into_course(
# we're not going to attempt to change.
if clipboard_file_path.startswith('static/'):
# If it's in this form, it came from a library and assumes component-local assets
file_path = clipboard_file_path.lstrip('static/')
import_path = f"components/{usage_key.block_type}/{usage_key.block_id}/{file_path}"
file_path = clipboard_file_path.removeprefix('static/')
import_path = build_components_import_path(usage_key, file_path)
filename = pathlib.Path(file_path).name
new_key = course_key.make_asset_key("asset", import_path.replace("/", "_"))
else:
Expand Down Expand Up @@ -672,6 +691,50 @@ def _import_file_into_course(
return False, {}


def _import_transcripts(
block: XBlock,
staged_content_id: int,
static_files: list[content_staging_api.StagedContentFileData],
):
"""
Adds transcripts to VAL using the new edx_video_id.
"""
for file_data_obj in static_files:
clipboard_file_path = file_data_obj.filename
data = content_staging_api.get_staged_content_static_file_data(
staged_content_id,
clipboard_file_path
)
if data is None:
raise NotFoundError(file_data_obj.source_key)

if clipboard_file_path.startswith('static/'):
# If it's in this form, it came from a library and assumes component-local assets
file_path = clipboard_file_path.removeprefix('static/')
else:
# Otherwise it came from a course...
file_path = clipboard_file_path

filename = pathlib.Path(file_path).name

language_code = next((k for k, v in block.transcripts.items() if v == filename), None)
if language_code:
sjson_subs = Transcript.convert(
content=data,
input_format=Transcript.SRT,
output_format=Transcript.SJSON
).encode()
create_or_update_video_transcript(
video_id=block.edx_video_id,
language_code=language_code,
metadata={
'file_format': Transcript.SJSON,
'language_code': language_code
},
file_data=ContentFile(sjson_subs),
)


def is_item_in_course_tree(item):
"""
Check that the item is in the course tree.
Expand Down
4 changes: 4 additions & 0 deletions cms/djangoapps/contentstore/rest_api/v2/views/downstreams.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@
DeveloperErrorViewMixin,
view_auth_classes,
)
from xmodule.video_block.transcripts_utils import clear_transcripts
from xmodule.modulestore.django import modulestore
from xmodule.modulestore.exceptions import ItemNotFoundError

Expand Down Expand Up @@ -224,6 +225,9 @@ def post(self, request: _AuthenticatedRequest, usage_key_string: str) -> Respons
"""
downstream = _load_accessible_block(request.user, usage_key_string, require_write_access=True)
try:
if downstream.usage_key.block_type == "video":
# Delete all transcripts so we can copy new ones from upstream
clear_transcripts(downstream)
upstream = sync_from_upstream(downstream, request.user)
static_file_notices = import_static_assets_for_library_sync(downstream, upstream, request)
except UpstreamLinkException as exc:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,8 @@ def call_api(self, usage_key_string):
@patch.object(UpstreamLink, "get_for_block", _get_upstream_link_good_and_syncable)
@patch.object(downstreams_views, "sync_from_upstream")
@patch.object(downstreams_views, "import_static_assets_for_library_sync", return_value=StaticFileNotices())
def test_200(self, mock_sync_from_upstream, mock_import_staged_content):
@patch.object(downstreams_views, "clear_transcripts")
def test_200(self, mock_sync_from_upstream, mock_import_staged_content, mock_clear_transcripts):
"""
Does the happy path work?
"""
Expand All @@ -273,6 +274,7 @@ def test_200(self, mock_sync_from_upstream, mock_import_staged_content):
assert response.status_code == 200
assert mock_sync_from_upstream.call_count == 1
assert mock_import_staged_content.call_count == 1
assert mock_clear_transcripts.call_count == 1


class DeleteDownstreamSyncViewtest(_DownstreamSyncViewTestMixin, SharedModuleStoreTestCase):
Expand Down
40 changes: 39 additions & 1 deletion cms/djangoapps/contentstore/views/tests/test_transcripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
from django.urls import reverse
from edxval.api import create_video
from opaque_keys.edx.keys import UsageKey
from organizations.tests.factories import OrganizationFactory

from cms.djangoapps.contentstore.tests.utils import CourseTestCase, setup_caption_responses
from openedx.core.djangoapps.contentserver.caching import del_cached_content
from openedx.core.djangoapps.content_libraries import api as lib_api
from xmodule.contentstore.content import StaticContent # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.contentstore.django import contentstore # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.exceptions import NotFoundError # lint-amnesty, pylint: disable=wrong-import-order
Expand All @@ -27,8 +29,10 @@
GetTranscriptsFromYouTubeException,
Transcript,
get_video_transcript_content,
remove_subs_from_store
get_transcript,
remove_subs_from_store,
)
from openedx.core.djangoapps.xblock import api as xblock_api

TEST_DATA_CONTENTSTORE = copy.deepcopy(settings.CONTENTSTORE)
TEST_DATA_CONTENTSTORE['DOC_STORE_CONFIG']['db'] = 'test_xcontent_%s' % uuid4().hex
Expand Down Expand Up @@ -92,6 +96,21 @@ def setUp(self):
resp = self.client.ajax_post('/xblock/', data)
self.assertEqual(resp.status_code, 200)

self.library = lib_api.create_library(
org=OrganizationFactory.create(short_name="org1"),
slug="lib",
title="Library",
)
self.library_block_metadata = lib_api.create_library_block(
self.library.key,
"video",
"video-transcript",
)
self.library_block = xblock_api.load_block(
self.library_block_metadata.usage_key,
self.user,
)

self.video_usage_key = self._get_usage_key(resp)
self.item = modulestore().get_item(self.video_usage_key)
# hI10vDNYz4M - valid Youtube ID with transcripts.
Expand Down Expand Up @@ -702,6 +721,25 @@ def test_replace_transcript_success(self, edx_video_id):
expected_sjson_content = json.loads(SJSON_TRANSCRIPT_CONTENT)
self.assertDictEqual(actual_sjson_content, expected_sjson_content)

def test_replace_transcript_library_content_success(self):
# Make call to replace transcripts from youtube
response = self.replace_transcript(self.library_block_metadata.usage_key, self.youtube_id)

# Verify the response
self.assert_response(response, expected_status_code=200, expected_message='Success')

# Obtain updated block
updated_block = xblock_api.load_block(
self.library_block_metadata.usage_key,
self.user,
)

# Verify transcript content
transcript = get_transcript(updated_block, 'en', Transcript.SJSON)
actual_sjson_content = json.loads(transcript[0])
expected_sjson_content = json.loads(SJSON_TRANSCRIPT_CONTENT)
self.assertDictEqual(actual_sjson_content, expected_sjson_content)

def test_replace_transcript_fails_without_data(self):
"""
Verify that replace transcript fails if we do not provide video data in request.
Expand Down
Loading

0 comments on commit b6489e7

Please sign in to comment.