from contextlib import closing
from PIL import Image
import subprocess
from audiotsm import phasevocoder
from import WavReader, WavWriter
from import wavfile
import numpy as np
import re
import math
from shutil import copyfile, rmtree
import os
import argparse
from pytube import YouTube
import PySimpleGUI as sg
def downloadFile(url):
name = YouTube(url).streams.first().download()
newname = name.replace(' ','_')
return newname
def getMaxVolume(s):
maxv = float(np.max(s))
minv = float(np.min(s))
return max(maxv,-minv)
def copyFrame(inputFrame,outputFrame):
src = TEMP_FOLDER+"/frame{:06d}".format(inputFrame+1)+".jpg"
dst = TEMP_FOLDER+"/newFrame{:06d}".format(outputFrame+1)+".jpg"
if not os.path.isfile(src):
return False
copyfile(src, dst)
if outputFrame%20 == 19:
print(str(outputFrame+1)+" time-altered frames saved.")
return True
def inputToOutputFilename(filename):
dotIndex = filename.rfind(".")
return filename[:dotIndex]+"_ALTERED"+filename[dotIndex:]
def createPath(s):
#assert (not os.path.exists(s)), "The filepath "+s+" already exists. Don't want to overwrite it. Aborting."
except OSError:
assert False, "Creation of the directory %s failed. (The TEMP folder may already exist. Delete or rename it, and try again.)"
def deletePath(s): # Dangerous! Watch out!
except OSError:
print ("Deletion of the directory %s failed" % s)
parser = argparse.ArgumentParser(description='Modifies a video file to play at different speeds when there is sound vs. silence.')
parser.add_argument('--input_file', type=str, help='the video file you want modified')
parser.add_argument('--url', type=str, help='A youtube url to download and process')
parser.add_argument('--output_file', type=str, default="", help="the output file. (optional. if not included, it'll just modify the input file name)")
parser.add_argument('--silent_threshold', type=float, default=0.03, help="the volume amount that frames' audio needs to surpass to be consider \"sounded\". It ranges from 0 (silence) to 1 (max volume)")
parser.add_argument('--sounded_speed', type=float, default=1.00, help="the speed that sounded (spoken) frames should be played at. Typically 1.")
parser.add_argument('--silent_speed', type=float, default=5.00, help="the speed that silent frames should be played at. 999999 for jumpcutting.")
parser.add_argument('--frame_margin', type=float, default=1, help="some silent frames adjacent to sounded frames are included to provide context. How many frames on either the side of speech should be included? That's this variable.")
parser.add_argument('--sample_rate', type=int, default=44100, help="sample rate of the input and output videos")
parser.add_argument('--frame_rate', type=float, default=30, help="frame rate of the input and output videos. optional... I try to find it out myself, but it doesn't always work.")
parser.add_argument('--frame_quality', type=int, default=3, help="quality of frames to be extracted from input video. 1 is highest, 31 is lowest, 3 is the default.")
args = parser.parse_args()
frameRate = args.frame_rate
SAMPLE_RATE = args.sample_rate
SILENT_THRESHOLD = args.silent_threshold
FRAME_SPREADAGE = args.frame_margin
NEW_SPEED = [args.silent_speed, args.sounded_speed]
if args.url != None:
INPUT_FILE = downloadFile(args.url)
INPUT_FILE = args.input_file
URL = args.url
FRAME_QUALITY = args.frame_quality
assert INPUT_FILE != None , "why u put no input file, that dum"
if len(args.output_file) >= 1:
OUTPUT_FILE = args.output_file
OUTPUT_FILE = inputToOutputFilename(INPUT_FILE)
AUDIO_FADE_ENVELOPE_SIZE = 400 # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever)
command = "ffmpeg -i "+INPUT_FILE+" -qscale:v "+str(FRAME_QUALITY)+" "+TEMP_FOLDER+"/frame%06d.jpg -hide_banner", shell=True)
command = "ffmpeg -i "+INPUT_FILE+" -ab 160k -ac 2 -ar "+str(SAMPLE_RATE)+" -vn "+TEMP_FOLDER+"/audio.wav", shell=True)
command = "ffmpeg -i "+TEMP_FOLDER+"/input.mp4 2>&1"
f = open(TEMP_FOLDER+"/params.txt", "w"), shell=True, stdout=f)
sampleRate, audioData ="/audio.wav")
audioSampleCount = audioData.shape[0]
maxAudioVolume = getMaxVolume(audioData)
f = open(TEMP_FOLDER+"/params.txt", 'r+')
pre_params =
params = pre_params.split('\n')
for line in params:
m ='Stream #.*Video.* ([0-9]*) fps',line)
if m is not None:
frameRate = float(
samplesPerFrame = sampleRate/frameRate
audioFrameCount = int(math.ceil(audioSampleCount/samplesPerFrame))
hasLoudAudio = np.zeros((audioFrameCount))
for i in range(audioFrameCount):
start = int(i*samplesPerFrame)
end = min(int((i+1)*samplesPerFrame),audioSampleCount)
audiochunks = audioData[start:end]
maxchunksVolume = float(getMaxVolume(audiochunks))/maxAudioVolume
if maxchunksVolume >= SILENT_THRESHOLD:
hasLoudAudio[i] = 1
chunks = [[0,0,0]]
shouldIncludeFrame = np.zeros((audioFrameCount))
for i in range(audioFrameCount):
start = int(max(0,i-FRAME_SPREADAGE))
end = int(min(audioFrameCount,i+1+FRAME_SPREADAGE))
shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end])
if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i-1]): # Did we flip?
chunks = chunks[1:]
outputAudioData = np.zeros((0,audioData.shape[1]))
outputPointer = 0
lastExistingFrame = None
for chunk in chunks:
audioChunk = audioData[int(chunk[0]*samplesPerFrame):int(chunk[1]*samplesPerFrame)]
sFile = TEMP_FOLDER+"/tempStart.wav"
eFile = TEMP_FOLDER+"/tempEnd.wav"
with WavReader(sFile) as reader:
with WavWriter(eFile, reader.channels, reader.samplerate) as writer:
tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])]), writer)
_, alteredAudioData =
leng = alteredAudioData.shape[0]
endPointer = outputPointer+leng
outputAudioData = np.concatenate((outputAudioData,alteredAudioData/maxAudioVolume))
#outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume
# smooth out transitiion's audio by quickly fading in/out
outputAudioData[outputPointer:endPointer] = 0 # audio is less than 0.01 sec, let's just remove it.
mask = np.repeat(premask[:, np.newaxis],2,axis=1) # make the fade-envelope mask stereo
outputAudioData[outputPointer:outputPointer+AUDIO_FADE_ENVELOPE_SIZE] *= mask
outputAudioData[endPointer-AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1-mask
startOutputFrame = int(math.ceil(outputPointer/samplesPerFrame))
endOutputFrame = int(math.ceil(endPointer/samplesPerFrame))
for outputFrame in range(startOutputFrame, endOutputFrame):
inputFrame = int(chunk[0]+NEW_SPEED[int(chunk[2])]*(outputFrame-startOutputFrame))
didItWork = copyFrame(inputFrame,outputFrame)
if didItWork:
lastExistingFrame = inputFrame
outputPointer = endPointer
outputFrame = math.ceil(outputPointer/samplesPerFrame)
for endGap in range(outputFrame,audioFrameCount):
command = "ffmpeg -framerate "+str(frameRate)+" -i "+TEMP_FOLDER+"/newFrame%06d.jpg -i "+TEMP_FOLDER+"/audioNew.wav -strict -2 "+OUTPUT_FILE, shell=True)

import PySimpleGUI as sg
import subprocess
import sys
Jumpcutter GUI
This is a front-end GUI for a command line tool named jumpcutter.
jumpcutter is a command line based tool written by Carykh. You'll find the repo here:
The design of this GUI was made in a way that should not have required any changes to the file. However, there appears to be a bug in the original code. The sample rate
argument was specified as a float, but this later causes a crash in the program, so a single
change was made to line 68, changing the parameter from a float to an int. You can get around
this change by not specifying a default value in this GUI. Rather than specifying 44100, leave it blank
which will cause the parameter to be skipped.
This kind of GUI can be applied to a large number of other commandline programs.
NOTE - it has not yet been tested on Linux. It's only been tested on Windows. Hoping to get it
tested out on Linux shortly.
Copyright 2020
def build_parameter_string(values):
values_to_parm = {'-FILE-' : '--input_file',
'-URL-' : '--url',
'-OUT FILE-' : '--output_file',
'-SILENT THRESHOLD-' : '--silent_threshold',
'-SOUNDED SPEED-' : '--sounded_speed',
'-SILENT SPEED-' : '--silent_speed',
'-FRAME MARGIN-' : '--frame_margin',
'-SAMPLE RATE-' : '--sample_rate',
'-FRAME RATE-' : '--frame_rate',
'-FRAME QUALITY-' : '--frame_quality',
parms = ''
for key in values:
if key not in values_to_parm:
if values[key] != '':
parms += f"{values_to_parm[key]} {values[key]} "
def main():
def FText(text, in_key=None, default=None, tooltip=None, input_size=(20,1)):
A "Fixed-sized Text Input". Returns a row with a Text and an Input element.
return [sg.Text(text, size=(20, 1), justification='r', tooltip=tooltip), sg.Input(default_text=default, key=in_key, size=input_size)]
layout = [
[sg.Text('Jump Cutter', font='Any 20')],
FText('Input File', '-FILE-', '', 'the video file you want modified', input_size=(40,1)) + [sg.FileBrowse()],
FText('URL', '-URL-', '', 'A youtube url to download and process', input_size=(40,1)),
FText('Output File', '-OUT FILE-', '', "the output file. (optional. if not included, it'll just modify the input file name)", input_size=(40,1)) + [sg.FileSaveAs()],
FText('Silent Threshold', '-SILENT THRESHOLD-', 0.03,
"the volume amount that frames' audio needs to surpass to be consider \"sounded\". It ranges from 0 (silence) to 1 (max volume)"),
FText('Sounded Speed', '-SOUNDED SPEED-', 1.00, "the speed that sounded (spoken) frames should be played at. Typically 1."),
FText('Silent Speed', '-SILENT SPEED-', 5.00, "the speed that silent frames should be played at. 999999 for jumpcutting."),
FText('Frame Margin', '-FRAME MARGIN-', 1,
"some silent frames adjacent to sounded frames are included to provide context. How many frames on either the side of speech should be included? That's this variable."),
FText('Sample Rate', '-SAMPLE RATE-', '44100', "sample rate of the input and output videos"),
FText('Frame Rate', '-FRAME RATE-', 30,
"frame rate of the input and output videos. optional... I try to find it out myself, but it doesn't always work."),
FText('Frame Quality', '-FRAME QUALITY-', 3, "quality of frames to be extracted from input video. 1 is highest, 31 is lowest, 3 is the default."),
[sg.MLine(size=(90,10), reroute_stdout=True, reroute_stderr=True, reroute_cprint=True, write_only=True, font='Courier 10', autoscroll=True, key='-ML-')],
[sg.Button('Start'), sg.Button('Exit')],
window = sg.Window('Jump Cutter', layout)
while True:
event, values =
if event in (sg.WIN_CLOSED, 'Exit'):
if event == 'Start':
parms = build_parameter_string(values)
print('Your parameters = ', parms)
runCommand(cmd=r'python .\ ' + parms, window=window)
sg.cprint('*'*20+'DONE'+'*'*20, background_color='red', text_color='white')
def runCommand(cmd, timeout=None, window=None):
""" run shell command
@param cmd: command to execute
@param timeout: timeout for command execution
@param window: the PySimpleGUI window that the output is going to (needed to do refresh on)
@return: (return code from command, command output)
p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
output = ''
for line in p.stdout:
line = line.decode(errors='replace' if (sys.version_info) < (3, 5) else 'backslashreplace').rstrip()
output += line
window.refresh() if window else None # yes, a 1-line if, so shoot me
retval = p.wait(timeout)
return (retval, output)
if __name__ == '__main__':
# sg.theme('Dark Grey 11')