feat: Video editor supports transcripts [FC-0076] (openedx#36058)

* Add error handler on save video to avoid creating sjson * Support transcripts without edx_video_id in definition_to_xml * When copying a video from a library to a course: Create a new edx_video_id * Save transcripts as static assets in a video in a library when adding a new transcript. * Delete transcripts as static assets in a video in a library when deleting transcripts. * Support download transcript in a video in a library. * Support replace transcript in a video in a library. * Support updating transcripts in video in a library. * Refactor the code of downloading YouTube transcripts to enable this feature in libraries. * Support copy from a library to a course and a course to a library.
mitodl · Feb 21, 2025 · b6489e7 · b6489e7
1 parent 6c6fd84
commit b6489e7
Show file tree

Hide file tree

Showing 12 changed files with 569 additions and 179 deletions.
diff --git a/cms/djangoapps/contentstore/helpers.py b/cms/djangoapps/contentstore/helpers.py
@@ -10,6 +10,7 @@
 import re
 
 from attrs import frozen, Factory
+from django.core.files.base import ContentFile
 from django.conf import settings
 from django.contrib.auth import get_user_model
 from django.utils.translation import gettext as _
@@ -23,6 +24,11 @@
 from xmodule.exceptions import NotFoundError
 from xmodule.modulestore.django import modulestore
 from xmodule.xml_block import XmlMixin
+from xmodule.video_block.transcripts_utils import Transcript, build_components_import_path
+from edxval.api import (
+    create_external_video,
+    create_or_update_video_transcript,
+)
 
 from cms.djangoapps.models.settings.course_grading import CourseGradingModel
 from cms.lib.xblock.upstream_sync import UpstreamLink, UpstreamLinkException, fetch_customizable_fields
@@ -274,8 +280,14 @@ def _insert_static_files_into_downstream_xblock(
         course_key=downstream_xblock.context_key,
         staged_content_id=staged_content_id,
         static_files=static_files,
-        usage_key=downstream_xblock.scope_ids.usage_id,
+        usage_key=downstream_xblock.usage_key,
     )
+    if downstream_xblock.usage_key.block_type == 'video':
+        _import_transcripts(
+            downstream_xblock,
+            staged_content_id=staged_content_id,
+            static_files=static_files,
+        )
 
     # Rewrite the OLX's static asset references to point to the new
     # locations for those assets. See _import_files_into_course for more
@@ -331,6 +343,13 @@ def import_staged_content_from_user_clipboard(parent_key: UsageKey, request) ->
             tags=user_clipboard.content.tags,
         )
 
+        usage_key = new_xblock.usage_key
+        if usage_key.block_type == 'video':
+            # The edx_video_id must always be new so as not
+            # to interfere with the data of the copied block
+            new_xblock.edx_video_id = create_external_video(display_name='external video')
+            store.update_item(new_xblock, request.user.id)
+
         notices = _insert_static_files_into_downstream_xblock(new_xblock, user_clipboard.content.id, request)
 
     return new_xblock, notices
@@ -630,8 +649,8 @@ def _import_file_into_course(
     # we're not going to attempt to change.
     if clipboard_file_path.startswith('static/'):
         # If it's in this form, it came from a library and assumes component-local assets
-        file_path = clipboard_file_path.lstrip('static/')
-        import_path = f"components/{usage_key.block_type}/{usage_key.block_id}/{file_path}"
+        file_path = clipboard_file_path.removeprefix('static/')
+        import_path = build_components_import_path(usage_key, file_path)
         filename = pathlib.Path(file_path).name
         new_key = course_key.make_asset_key("asset", import_path.replace("/", "_"))
     else:
@@ -672,6 +691,50 @@ def _import_file_into_course(
         return False, {}
 
 
+def _import_transcripts(
+    block: XBlock,
+    staged_content_id: int,
+    static_files: list[content_staging_api.StagedContentFileData],
+):
+    """
+    Adds transcripts to VAL using the new edx_video_id.
+    """
+    for file_data_obj in static_files:
+        clipboard_file_path = file_data_obj.filename
+        data = content_staging_api.get_staged_content_static_file_data(
+            staged_content_id,
+            clipboard_file_path
+        )
+        if data is None:
+            raise NotFoundError(file_data_obj.source_key)
+
+        if clipboard_file_path.startswith('static/'):
+            # If it's in this form, it came from a library and assumes component-local assets
+            file_path = clipboard_file_path.removeprefix('static/')
+        else:
+            # Otherwise it came from a course...
+            file_path = clipboard_file_path
+
+        filename = pathlib.Path(file_path).name
+
+        language_code = next((k for k, v in block.transcripts.items() if v == filename), None)
+        if language_code:
+            sjson_subs = Transcript.convert(
+                content=data,
+                input_format=Transcript.SRT,
+                output_format=Transcript.SJSON
+            ).encode()
+            create_or_update_video_transcript(
+                video_id=block.edx_video_id,
+                language_code=language_code,
+                metadata={
+                    'file_format': Transcript.SJSON,
+                    'language_code': language_code
+                },
+                file_data=ContentFile(sjson_subs),
+            )
+
+
 def is_item_in_course_tree(item):
     """
     Check that the item is in the course tree.

diff --git a/cms/djangoapps/contentstore/rest_api/v2/views/downstreams.py b/cms/djangoapps/contentstore/rest_api/v2/views/downstreams.py
@@ -88,6 +88,7 @@
     DeveloperErrorViewMixin,
     view_auth_classes,
 )
+from xmodule.video_block.transcripts_utils import clear_transcripts
 from xmodule.modulestore.django import modulestore
 from xmodule.modulestore.exceptions import ItemNotFoundError
 
@@ -224,6 +225,9 @@ def post(self, request: _AuthenticatedRequest, usage_key_string: str) -> Respons
         """
         downstream = _load_accessible_block(request.user, usage_key_string, require_write_access=True)
         try:
+            if downstream.usage_key.block_type == "video":
+                # Delete all transcripts so we can copy new ones from upstream
+                clear_transcripts(downstream)
             upstream = sync_from_upstream(downstream, request.user)
             static_file_notices = import_static_assets_for_library_sync(downstream, upstream, request)
         except UpstreamLinkException as exc:

diff --git a/cms/djangoapps/contentstore/rest_api/v2/views/tests/test_downstreams.py b/cms/djangoapps/contentstore/rest_api/v2/views/tests/test_downstreams.py
@@ -264,7 +264,8 @@ def call_api(self, usage_key_string):
     @patch.object(UpstreamLink, "get_for_block", _get_upstream_link_good_and_syncable)
     @patch.object(downstreams_views, "sync_from_upstream")
     @patch.object(downstreams_views, "import_static_assets_for_library_sync", return_value=StaticFileNotices())
-    def test_200(self, mock_sync_from_upstream, mock_import_staged_content):
+    @patch.object(downstreams_views, "clear_transcripts")
+    def test_200(self, mock_sync_from_upstream, mock_import_staged_content, mock_clear_transcripts):
         """
         Does the happy path work?
         """
@@ -273,6 +274,7 @@ def test_200(self, mock_sync_from_upstream, mock_import_staged_content):
         assert response.status_code == 200
         assert mock_sync_from_upstream.call_count == 1
         assert mock_import_staged_content.call_count == 1
+        assert mock_clear_transcripts.call_count == 1
 
 
 class DeleteDownstreamSyncViewtest(_DownstreamSyncViewTestMixin, SharedModuleStoreTestCase):

diff --git a/cms/djangoapps/contentstore/views/tests/test_transcripts.py b/cms/djangoapps/contentstore/views/tests/test_transcripts.py
@@ -15,9 +15,11 @@
 from django.urls import reverse
 from edxval.api import create_video
 from opaque_keys.edx.keys import UsageKey
+from organizations.tests.factories import OrganizationFactory
 
 from cms.djangoapps.contentstore.tests.utils import CourseTestCase, setup_caption_responses
 from openedx.core.djangoapps.contentserver.caching import del_cached_content
+from openedx.core.djangoapps.content_libraries import api as lib_api
 from xmodule.contentstore.content import StaticContent  # lint-amnesty, pylint: disable=wrong-import-order
 from xmodule.contentstore.django import contentstore  # lint-amnesty, pylint: disable=wrong-import-order
 from xmodule.exceptions import NotFoundError  # lint-amnesty, pylint: disable=wrong-import-order
@@ -27,8 +29,10 @@
     GetTranscriptsFromYouTubeException,
     Transcript,
     get_video_transcript_content,
-    remove_subs_from_store
+    get_transcript,
+    remove_subs_from_store,
 )
+from openedx.core.djangoapps.xblock import api as xblock_api
 
 TEST_DATA_CONTENTSTORE = copy.deepcopy(settings.CONTENTSTORE)
 TEST_DATA_CONTENTSTORE['DOC_STORE_CONFIG']['db'] = 'test_xcontent_%s' % uuid4().hex
@@ -92,6 +96,21 @@ def setUp(self):
         resp = self.client.ajax_post('/xblock/', data)
         self.assertEqual(resp.status_code, 200)
 
+        self.library = lib_api.create_library(
+            org=OrganizationFactory.create(short_name="org1"),
+            slug="lib",
+            title="Library",
+        )
+        self.library_block_metadata = lib_api.create_library_block(
+            self.library.key,
+            "video",
+            "video-transcript",
+        )
+        self.library_block = xblock_api.load_block(
+            self.library_block_metadata.usage_key,
+            self.user,
+        )
+
         self.video_usage_key = self._get_usage_key(resp)
         self.item = modulestore().get_item(self.video_usage_key)
         # hI10vDNYz4M - valid Youtube ID with transcripts.
@@ -702,6 +721,25 @@ def test_replace_transcript_success(self, edx_video_id):
         expected_sjson_content = json.loads(SJSON_TRANSCRIPT_CONTENT)
         self.assertDictEqual(actual_sjson_content, expected_sjson_content)
 
+    def test_replace_transcript_library_content_success(self):
+        # Make call to replace transcripts from youtube
+        response = self.replace_transcript(self.library_block_metadata.usage_key, self.youtube_id)
+
+        # Verify the response
+        self.assert_response(response, expected_status_code=200, expected_message='Success')
+
+        # Obtain updated block
+        updated_block = xblock_api.load_block(
+            self.library_block_metadata.usage_key,
+            self.user,
+        )
+
+        # Verify transcript content
+        transcript = get_transcript(updated_block, 'en', Transcript.SJSON)
+        actual_sjson_content = json.loads(transcript[0])
+        expected_sjson_content = json.loads(SJSON_TRANSCRIPT_CONTENT)
+        self.assertDictEqual(actual_sjson_content, expected_sjson_content)
+
     def test_replace_transcript_fails_without_data(self):
         """
         Verify that replace transcript fails if we do not provide video data in request.