jumpcutter initial checkin
This commit is contained in:
parent
06052565f8
commit
bc78af7436
|
@ -0,0 +1,209 @@
|
||||||
|
from contextlib import closing
|
||||||
|
from PIL import Image
|
||||||
|
import subprocess
|
||||||
|
from audiotsm import phasevocoder
|
||||||
|
from audiotsm.io.wav import WavReader, WavWriter
|
||||||
|
from scipy.io import wavfile
|
||||||
|
import numpy as np
|
||||||
|
import re
|
||||||
|
import math
|
||||||
|
from shutil import copyfile, rmtree
|
||||||
|
import os
|
||||||
|
import argparse
|
||||||
|
from pytube import YouTube
|
||||||
|
import PySimpleGUI as sg
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def downloadFile(url):
|
||||||
|
name = YouTube(url).streams.first().download()
|
||||||
|
newname = name.replace(' ','_')
|
||||||
|
os.rename(name,newname)
|
||||||
|
return newname
|
||||||
|
|
||||||
|
def getMaxVolume(s):
|
||||||
|
maxv = float(np.max(s))
|
||||||
|
minv = float(np.min(s))
|
||||||
|
return max(maxv,-minv)
|
||||||
|
|
||||||
|
def copyFrame(inputFrame,outputFrame):
|
||||||
|
src = TEMP_FOLDER+"/frame{:06d}".format(inputFrame+1)+".jpg"
|
||||||
|
dst = TEMP_FOLDER+"/newFrame{:06d}".format(outputFrame+1)+".jpg"
|
||||||
|
if not os.path.isfile(src):
|
||||||
|
return False
|
||||||
|
copyfile(src, dst)
|
||||||
|
if outputFrame%20 == 19:
|
||||||
|
print(str(outputFrame+1)+" time-altered frames saved.")
|
||||||
|
return True
|
||||||
|
|
||||||
|
def inputToOutputFilename(filename):
|
||||||
|
dotIndex = filename.rfind(".")
|
||||||
|
return filename[:dotIndex]+"_ALTERED"+filename[dotIndex:]
|
||||||
|
|
||||||
|
def createPath(s):
|
||||||
|
#assert (not os.path.exists(s)), "The filepath "+s+" already exists. Don't want to overwrite it. Aborting."
|
||||||
|
|
||||||
|
try:
|
||||||
|
os.mkdir(s)
|
||||||
|
except OSError:
|
||||||
|
assert False, "Creation of the directory %s failed. (The TEMP folder may already exist. Delete or rename it, and try again.)"
|
||||||
|
|
||||||
|
def deletePath(s): # Dangerous! Watch out!
|
||||||
|
try:
|
||||||
|
rmtree(s,ignore_errors=False)
|
||||||
|
except OSError:
|
||||||
|
print ("Deletion of the directory %s failed" % s)
|
||||||
|
print(OSError)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description='Modifies a video file to play at different speeds when there is sound vs. silence.')
|
||||||
|
parser.add_argument('--input_file', type=str, help='the video file you want modified')
|
||||||
|
parser.add_argument('--url', type=str, help='A youtube url to download and process')
|
||||||
|
parser.add_argument('--output_file', type=str, default="", help="the output file. (optional. if not included, it'll just modify the input file name)")
|
||||||
|
parser.add_argument('--silent_threshold', type=float, default=0.03, help="the volume amount that frames' audio needs to surpass to be consider \"sounded\". It ranges from 0 (silence) to 1 (max volume)")
|
||||||
|
parser.add_argument('--sounded_speed', type=float, default=1.00, help="the speed that sounded (spoken) frames should be played at. Typically 1.")
|
||||||
|
parser.add_argument('--silent_speed', type=float, default=5.00, help="the speed that silent frames should be played at. 999999 for jumpcutting.")
|
||||||
|
parser.add_argument('--frame_margin', type=float, default=1, help="some silent frames adjacent to sounded frames are included to provide context. How many frames on either the side of speech should be included? That's this variable.")
|
||||||
|
parser.add_argument('--sample_rate', type=int, default=44100, help="sample rate of the input and output videos")
|
||||||
|
parser.add_argument('--frame_rate', type=float, default=30, help="frame rate of the input and output videos. optional... I try to find it out myself, but it doesn't always work.")
|
||||||
|
parser.add_argument('--frame_quality', type=int, default=3, help="quality of frames to be extracted from input video. 1 is highest, 31 is lowest, 3 is the default.")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
frameRate = args.frame_rate
|
||||||
|
SAMPLE_RATE = args.sample_rate
|
||||||
|
SILENT_THRESHOLD = args.silent_threshold
|
||||||
|
FRAME_SPREADAGE = args.frame_margin
|
||||||
|
NEW_SPEED = [args.silent_speed, args.sounded_speed]
|
||||||
|
if args.url != None:
|
||||||
|
INPUT_FILE = downloadFile(args.url)
|
||||||
|
else:
|
||||||
|
INPUT_FILE = args.input_file
|
||||||
|
URL = args.url
|
||||||
|
FRAME_QUALITY = args.frame_quality
|
||||||
|
|
||||||
|
assert INPUT_FILE != None , "why u put no input file, that dum"
|
||||||
|
|
||||||
|
if len(args.output_file) >= 1:
|
||||||
|
OUTPUT_FILE = args.output_file
|
||||||
|
else:
|
||||||
|
OUTPUT_FILE = inputToOutputFilename(INPUT_FILE)
|
||||||
|
|
||||||
|
TEMP_FOLDER = "TEMP"
|
||||||
|
AUDIO_FADE_ENVELOPE_SIZE = 400 # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever)
|
||||||
|
|
||||||
|
createPath(TEMP_FOLDER)
|
||||||
|
|
||||||
|
command = "ffmpeg -i "+INPUT_FILE+" -qscale:v "+str(FRAME_QUALITY)+" "+TEMP_FOLDER+"/frame%06d.jpg -hide_banner"
|
||||||
|
subprocess.call(command, shell=True)
|
||||||
|
|
||||||
|
command = "ffmpeg -i "+INPUT_FILE+" -ab 160k -ac 2 -ar "+str(SAMPLE_RATE)+" -vn "+TEMP_FOLDER+"/audio.wav"
|
||||||
|
|
||||||
|
subprocess.call(command, shell=True)
|
||||||
|
|
||||||
|
command = "ffmpeg -i "+TEMP_FOLDER+"/input.mp4 2>&1"
|
||||||
|
f = open(TEMP_FOLDER+"/params.txt", "w")
|
||||||
|
subprocess.call(command, shell=True, stdout=f)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
sampleRate, audioData = wavfile.read(TEMP_FOLDER+"/audio.wav")
|
||||||
|
audioSampleCount = audioData.shape[0]
|
||||||
|
maxAudioVolume = getMaxVolume(audioData)
|
||||||
|
|
||||||
|
f = open(TEMP_FOLDER+"/params.txt", 'r+')
|
||||||
|
pre_params = f.read()
|
||||||
|
f.close()
|
||||||
|
params = pre_params.split('\n')
|
||||||
|
for line in params:
|
||||||
|
m = re.search('Stream #.*Video.* ([0-9]*) fps',line)
|
||||||
|
if m is not None:
|
||||||
|
frameRate = float(m.group(1))
|
||||||
|
|
||||||
|
samplesPerFrame = sampleRate/frameRate
|
||||||
|
|
||||||
|
audioFrameCount = int(math.ceil(audioSampleCount/samplesPerFrame))
|
||||||
|
|
||||||
|
hasLoudAudio = np.zeros((audioFrameCount))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
for i in range(audioFrameCount):
|
||||||
|
start = int(i*samplesPerFrame)
|
||||||
|
end = min(int((i+1)*samplesPerFrame),audioSampleCount)
|
||||||
|
audiochunks = audioData[start:end]
|
||||||
|
maxchunksVolume = float(getMaxVolume(audiochunks))/maxAudioVolume
|
||||||
|
if maxchunksVolume >= SILENT_THRESHOLD:
|
||||||
|
hasLoudAudio[i] = 1
|
||||||
|
|
||||||
|
chunks = [[0,0,0]]
|
||||||
|
shouldIncludeFrame = np.zeros((audioFrameCount))
|
||||||
|
for i in range(audioFrameCount):
|
||||||
|
start = int(max(0,i-FRAME_SPREADAGE))
|
||||||
|
end = int(min(audioFrameCount,i+1+FRAME_SPREADAGE))
|
||||||
|
shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end])
|
||||||
|
if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i-1]): # Did we flip?
|
||||||
|
chunks.append([chunks[-1][1],i,shouldIncludeFrame[i-1]])
|
||||||
|
|
||||||
|
chunks.append([chunks[-1][1],audioFrameCount,shouldIncludeFrame[i-1]])
|
||||||
|
chunks = chunks[1:]
|
||||||
|
|
||||||
|
outputAudioData = np.zeros((0,audioData.shape[1]))
|
||||||
|
outputPointer = 0
|
||||||
|
|
||||||
|
lastExistingFrame = None
|
||||||
|
for chunk in chunks:
|
||||||
|
audioChunk = audioData[int(chunk[0]*samplesPerFrame):int(chunk[1]*samplesPerFrame)]
|
||||||
|
|
||||||
|
sFile = TEMP_FOLDER+"/tempStart.wav"
|
||||||
|
eFile = TEMP_FOLDER+"/tempEnd.wav"
|
||||||
|
wavfile.write(sFile,SAMPLE_RATE,audioChunk)
|
||||||
|
with WavReader(sFile) as reader:
|
||||||
|
with WavWriter(eFile, reader.channels, reader.samplerate) as writer:
|
||||||
|
tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])])
|
||||||
|
tsm.run(reader, writer)
|
||||||
|
_, alteredAudioData = wavfile.read(eFile)
|
||||||
|
leng = alteredAudioData.shape[0]
|
||||||
|
endPointer = outputPointer+leng
|
||||||
|
outputAudioData = np.concatenate((outputAudioData,alteredAudioData/maxAudioVolume))
|
||||||
|
|
||||||
|
#outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume
|
||||||
|
|
||||||
|
# smooth out transitiion's audio by quickly fading in/out
|
||||||
|
|
||||||
|
if leng < AUDIO_FADE_ENVELOPE_SIZE:
|
||||||
|
outputAudioData[outputPointer:endPointer] = 0 # audio is less than 0.01 sec, let's just remove it.
|
||||||
|
else:
|
||||||
|
premask = np.arange(AUDIO_FADE_ENVELOPE_SIZE)/AUDIO_FADE_ENVELOPE_SIZE
|
||||||
|
mask = np.repeat(premask[:, np.newaxis],2,axis=1) # make the fade-envelope mask stereo
|
||||||
|
outputAudioData[outputPointer:outputPointer+AUDIO_FADE_ENVELOPE_SIZE] *= mask
|
||||||
|
outputAudioData[endPointer-AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1-mask
|
||||||
|
|
||||||
|
startOutputFrame = int(math.ceil(outputPointer/samplesPerFrame))
|
||||||
|
endOutputFrame = int(math.ceil(endPointer/samplesPerFrame))
|
||||||
|
for outputFrame in range(startOutputFrame, endOutputFrame):
|
||||||
|
inputFrame = int(chunk[0]+NEW_SPEED[int(chunk[2])]*(outputFrame-startOutputFrame))
|
||||||
|
didItWork = copyFrame(inputFrame,outputFrame)
|
||||||
|
if didItWork:
|
||||||
|
lastExistingFrame = inputFrame
|
||||||
|
else:
|
||||||
|
copyFrame(lastExistingFrame,outputFrame)
|
||||||
|
|
||||||
|
outputPointer = endPointer
|
||||||
|
|
||||||
|
wavfile.write(TEMP_FOLDER+"/audioNew.wav",SAMPLE_RATE,outputAudioData)
|
||||||
|
|
||||||
|
'''
|
||||||
|
outputFrame = math.ceil(outputPointer/samplesPerFrame)
|
||||||
|
for endGap in range(outputFrame,audioFrameCount):
|
||||||
|
copyFrame(int(audioSampleCount/samplesPerFrame)-1,endGap)
|
||||||
|
'''
|
||||||
|
|
||||||
|
command = "ffmpeg -framerate "+str(frameRate)+" -i "+TEMP_FOLDER+"/newFrame%06d.jpg -i "+TEMP_FOLDER+"/audioNew.wav -strict -2 "+OUTPUT_FILE
|
||||||
|
subprocess.call(command, shell=True)
|
||||||
|
|
||||||
|
deletePath(TEMP_FOLDER)
|
||||||
|
|
|
@ -0,0 +1,109 @@
|
||||||
|
import PySimpleGUI as sg
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
|
"""
|
||||||
|
Jumpcutter GUI
|
||||||
|
|
||||||
|
This is a front-end GUI for a command line tool named jumpcutter.
|
||||||
|
|
||||||
|
jumpcutter is a command line based tool written by Carykh. You'll find the repo here:
|
||||||
|
https://github.com/carykh/jumpcutter
|
||||||
|
|
||||||
|
The design of this GUI was made in a way that should not have required any changes to the
|
||||||
|
jumpcutter.py file. However, there appears to be a bug in the original code. The sample rate
|
||||||
|
argument was specified as a float, but this later causes a crash in the program, so a single
|
||||||
|
change was made to line 68, changing the parameter from a float to an int. You can get around
|
||||||
|
this change by not specifying a default value in this GUI. Rather than specifying 44100, leave it blank
|
||||||
|
which will cause the parameter to be skipped.
|
||||||
|
|
||||||
|
This kind of GUI can be applied to a large number of other commandline programs.
|
||||||
|
|
||||||
|
NOTE - it has not yet been tested on Linux. It's only been tested on Windows. Hoping to get it
|
||||||
|
tested out on Linux shortly.
|
||||||
|
|
||||||
|
Copyright 2020 PySimpleGUI.org
|
||||||
|
"""
|
||||||
|
|
||||||
|
def build_parameter_string(values):
|
||||||
|
values_to_parm = {'-FILE-' : '--input_file',
|
||||||
|
'-URL-' : '--url',
|
||||||
|
'-OUT FILE-' : '--output_file',
|
||||||
|
'-SILENT THRESHOLD-' : '--silent_threshold',
|
||||||
|
'-SOUNDED SPEED-' : '--sounded_speed',
|
||||||
|
'-SILENT SPEED-' : '--silent_speed',
|
||||||
|
'-FRAME MARGIN-' : '--frame_margin',
|
||||||
|
'-SAMPLE RATE-' : '--sample_rate',
|
||||||
|
'-FRAME RATE-' : '--frame_rate',
|
||||||
|
'-FRAME QUALITY-' : '--frame_quality',
|
||||||
|
}
|
||||||
|
parms = ''
|
||||||
|
for key in values:
|
||||||
|
if key not in values_to_parm:
|
||||||
|
continue
|
||||||
|
if values[key] != '':
|
||||||
|
parms += f"{values_to_parm[key]} {values[key]} "
|
||||||
|
return(parms)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
def FText(text, in_key=None, default=None, tooltip=None, input_size=(20,1)):
|
||||||
|
"""
|
||||||
|
A "Fixed-sized Text Input". Returns a row with a Text and an Input element.
|
||||||
|
"""
|
||||||
|
return [sg.Text(text, size=(20, 1), justification='r', tooltip=tooltip), sg.Input(default_text=default, key=in_key, size=input_size)]
|
||||||
|
|
||||||
|
layout = [
|
||||||
|
[sg.Text('Jump Cutter', font='Any 20')],
|
||||||
|
FText('Input File', '-FILE-', '', 'the video file you want modified', input_size=(40,1)) + [sg.FileBrowse()],
|
||||||
|
FText('URL', '-URL-', '', 'A youtube url to download and process', input_size=(40,1)),
|
||||||
|
FText('Output File', '-OUT FILE-', '', "the output file. (optional. if not included, it'll just modify the input file name)", input_size=(40,1)) + [sg.FileSaveAs()],
|
||||||
|
FText('Silent Threshold', '-SILENT THRESHOLD-', 0.03,
|
||||||
|
"the volume amount that frames' audio needs to surpass to be consider \"sounded\". It ranges from 0 (silence) to 1 (max volume)"),
|
||||||
|
FText('Sounded Speed', '-SOUNDED SPEED-', 1.00, "the speed that sounded (spoken) frames should be played at. Typically 1."),
|
||||||
|
FText('Silent Speed', '-SILENT SPEED-', 5.00, "the speed that silent frames should be played at. 999999 for jumpcutting."),
|
||||||
|
FText('Frame Margin', '-FRAME MARGIN-', 1,
|
||||||
|
"some silent frames adjacent to sounded frames are included to provide context. How many frames on either the side of speech should be included? That's this variable."),
|
||||||
|
FText('Sample Rate', '-SAMPLE RATE-', '44100', "sample rate of the input and output videos"),
|
||||||
|
FText('Frame Rate', '-FRAME RATE-', 30,
|
||||||
|
"frame rate of the input and output videos. optional... I try to find it out myself, but it doesn't always work."),
|
||||||
|
FText('Frame Quality', '-FRAME QUALITY-', 3, "quality of frames to be extracted from input video. 1 is highest, 31 is lowest, 3 is the default."),
|
||||||
|
[sg.MLine(size=(90,10), reroute_stdout=True, reroute_stderr=True, reroute_cprint=True, write_only=True, font='Courier 10', autoscroll=True, key='-ML-')],
|
||||||
|
[sg.Button('Start'), sg.Button('Exit')],
|
||||||
|
]
|
||||||
|
|
||||||
|
window = sg.Window('Jump Cutter', layout)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
event, values = window.read()
|
||||||
|
if event in (sg.WIN_CLOSED, 'Exit'):
|
||||||
|
break
|
||||||
|
if event == 'Start':
|
||||||
|
parms = build_parameter_string(values)
|
||||||
|
print('Your parameters = ', parms)
|
||||||
|
runCommand(cmd=r'python .\jumpcutter.py ' + parms, window=window)
|
||||||
|
sg.cprint('*'*20+'DONE'+'*'*20, background_color='red', text_color='white')
|
||||||
|
window.close()
|
||||||
|
|
||||||
|
|
||||||
|
def runCommand(cmd, timeout=None, window=None):
|
||||||
|
""" run shell command
|
||||||
|
@param cmd: command to execute
|
||||||
|
@param timeout: timeout for command execution
|
||||||
|
@param window: the PySimpleGUI window that the output is going to (needed to do refresh on)
|
||||||
|
@return: (return code from command, command output)
|
||||||
|
"""
|
||||||
|
p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||||
|
output = ''
|
||||||
|
for line in p.stdout:
|
||||||
|
line = line.decode(errors='replace' if (sys.version_info) < (3, 5) else 'backslashreplace').rstrip()
|
||||||
|
output += line
|
||||||
|
print(line)
|
||||||
|
window.refresh() if window else None # yes, a 1-line if, so shoot me
|
||||||
|
|
||||||
|
retval = p.wait(timeout)
|
||||||
|
return (retval, output)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# sg.theme('Dark Grey 11')
|
||||||
|
main()
|
Loading…
Reference in New Issue