Transcript converter
This commit is contained in:
commit
ee4fc5d9c9
72
import-transcript.js
Normal file
72
import-transcript.js
Normal file
@ -0,0 +1,72 @@
|
||||
|
||||
|
||||
const fs = require('fs')
|
||||
|
||||
function importTranscript(srtContent, names) {
|
||||
const entries = srtContent.trim().split(/\n\n+/)
|
||||
const nameSet = new Set(names.map(name => name.trim()))
|
||||
|
||||
let foundSpeakers = new Set()
|
||||
|
||||
const mdEntries = entries.map(entry => {
|
||||
const lines = entry.split(/\n/)
|
||||
if (lines.length < 3) return null
|
||||
|
||||
const timeParts = lines[1].split(' --> ')
|
||||
if (timeParts.length !== 2) return null
|
||||
|
||||
let content = lines.slice(2).join(' ').trim()
|
||||
for (const name of nameSet) {
|
||||
const regex = new RegExp(`^${name}:`, 'i')
|
||||
if (content.match(regex)) {
|
||||
content = content.replace(name, `**${name}**`)
|
||||
foundSpeakers.add(name)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return `==${timeParts[0]}==${content}==${timeParts[1]}==`
|
||||
}).filter(Boolean)
|
||||
|
||||
nameSet.forEach(name => {
|
||||
if (!foundSpeakers.has(name)) {
|
||||
console.warn(`Warning: Speaker ${name} wasn't found. Did you misspell their name?`)
|
||||
}
|
||||
})
|
||||
|
||||
return mdEntries.join('\n\n')
|
||||
}
|
||||
|
||||
let srtFileName, mdFileName, names = ''
|
||||
|
||||
for (let i = 2; i < process.argv.length; i++) {
|
||||
switch (process.argv[i]) {
|
||||
case '--input':
|
||||
case '-i':
|
||||
srtFileName = process.argv[++i]
|
||||
break
|
||||
case '--output':
|
||||
case '-o':
|
||||
mdFileName = process.argv[++i]
|
||||
break
|
||||
case '--speakers':
|
||||
names = process.argv[++i]
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (!srtFileName) {
|
||||
console.log("This utility converts valid .srt files to NDC compatible transcripts. Usage: node import-transcripts.js --input <input.srt> --output <output.md> --speakers 'Name1,Name2,...'")
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
const srtContent = fs.readFileSync(srtFileName, 'utf8')
|
||||
const nameList = names.split(',')
|
||||
const mdContent = importTranscript(srtContent, nameList)
|
||||
|
||||
if (mdFileName) {
|
||||
fs.writeFileSync(mdFileName, mdContent, 'utf8')
|
||||
console.log(`Converted content written to ${mdFileName}`)
|
||||
} else {
|
||||
console.log(mdContent)
|
||||
}
|
Loading…
Reference in New Issue
Block a user