-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathStreamTranscriptCleaner_RetainTimestamps.html
More file actions
115 lines (95 loc) · 5.1 KB
/
Copy pathStreamTranscriptCleaner_RetainTimestamps.html
File metadata and controls
115 lines (95 loc) · 5.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="description" content="Tool for Microsoft Stream to extract just the text from an autogenerated transcript VTT file.">
<title>Microsoft Stream transcript extractor/VTT file cleaner</title>
<style>
body, html {
font-family: Segoe UI,SegoeUI,Segoe WP,Helvetica Neue,Helvetica,Tahoma,Arial,sans-serif;
font-weight: 400;
text-rendering: optimizeLegibility;
-webkit-font-smoothing: antialiased;
padding: 1.5rem;
padding-top: .25rem;
}
.cleanVTTText,.copyButton {
margin-left: 1.5rem;
margin-right: 1.5rem;
}
.copyButton {
margin-bottom: 0.5rem;
}
</style>
</head>
<body>
<h1>Microsoft Stream transcript VTT file cleaner</h1>
<a href="https://docs.microsoft.com/stream">Microsoft Stream</a> automatically generates a WebVTT file from what was spoken in a video and uses it for a <a href="https://docs.microsoft.com/stream/portal-use-deep-search">video's transcript</a> and closed captions.<br>
At times you may want to get a copy of a video's transcript without any time codes, metadata, and extra lines. This utility will extract the transcript text from <a href="https://docs.microsoft.com/stream/portal-add-subtitles-captions#download-subtitles-or-captions">downloaded VTT files</a> generated by Microsoft Stream allowing you to copy and paste the text elsewhere.
<br>
<small><i>Tool last updated: 12/02/2021</i></small>
<h2>Instructions</h2>
<ol>
<li>As an owner of a video in Microsoft Stream, go to the <b>Update video details</b> screen</li>
<li>On the details page for the video in Microsoft Stream, in the <b>Options</b> section, under <b>Captions</b>, click <b>Download file</b></li>
<li>Come back to this this transcript cleaner tool, click <b>Choose Files</b></li>
<li>Pick the VTT file(s) you downloaded from Microsoft Stream</li>
<li>The transcript portion of the VTT file(s) selected will be output below</li>
<li>click the <b>Copy transcript</b> button above a transcript to copy it to the clipboard</li>
</ol>
<h2>Select the transcript VTT files to clean </h2>
<input type="file" id="files" accept=".vtt" multiple />
<script>
document.querySelector('#files').addEventListener('change', (e) => {
let files = e.target.files,
i = 0,
reader = new FileReader;
reader.onload = (e) => {
//console.log(files[i].name, e.target.result);
var fileName = files[i].name;
var text = e.target.result;
text = text.replace(/WEBVTT[\r\n]/,"");
text = text.replace(/NOTE duration:.*[\r\n]/,"");
text = text.replace(/NOTE language:.*[\r\n]/,"");
text = text.replace(/NOTE Confidence:.+\d/g,"");
text = text.replace(/NOTE recognizability.+\d/g,"");
text = text.replace(/[\r\n].+-.+-.+-.+-.+/g,"");
text = text.replace(/.[\r\n]. --> .+[\r\n]/g,"");
text = text.replace(/[\n](.)/g," $1");
text = text.replace(/^\s+|\s+$/gm,"\n");
console.log(text);
var heading = document.createElement('h3');
document.body.appendChild(heading);
heading.innerHTML = "Transcript for '" + files[i].name + "'";
var copyButton = document.createElement('button');
document.body.appendChild(copyButton);
copyButton.onclick = function() {copyToClip(text,fileName); };
copyButton.innerHTML = "Copy transcript";
copyButton.className = "copyButton";
var div = document.createElement('div');
document.body.appendChild(div);
div.className = "cleanVTTText";
div.innerHTML = text;
//console.log(files[i].name, text);
console.log(files[i].name);
if (i++ < files.length - 1) {
reader.readAsText(files[i]);
} else {
console.log('done');
}
};
reader.readAsText(files[i]);
}, false);
function copyToClip(str,fileName) {
function listener(e) {
e.clipboardData.setData("text/html", str);
e.clipboardData.setData("text/plain", str);
e.preventDefault();
}
document.addEventListener("copy", listener);
document.execCommand("copy");
document.removeEventListener("copy", listener);
alert("Copied transcript to clipboard:\n'"+fileName+"'");
};
</script>
</body>
</html>