jumpcutter initial checkin

2020-09-11 06:49:22 -04:00 · 2020-09-11 06:49:22 -04:00 · bc78af7436
commit bc78af7436
parent 06052565f8
2 changed files with 318 additions and 0 deletions
--- a/UserCreatedPrograms/jumpcutter/jumpcutter.py
+++ b/UserCreatedPrograms/jumpcutter/jumpcutter.py
@ -0,0 +1,209 @@
 from contextlib import closing
 from PIL import Image
 import subprocess
 from audiotsm import phasevocoder
 from audiotsm.io.wav import WavReader, WavWriter
 from scipy.io import wavfile
 import numpy as np
 import re
 import math
 from shutil import copyfile, rmtree
 import os
 import argparse
 from pytube import YouTube
 import PySimpleGUI as sg
 def downloadFile(url):
    name = YouTube(url).streams.first().download()
    newname = name.replace(' ','_')
    os.rename(name,newname)
    return newname
 def getMaxVolume(s):
    maxv = float(np.max(s))
    minv = float(np.min(s))
    return max(maxv,-minv)
 def copyFrame(inputFrame,outputFrame):
    src = TEMP_FOLDER+"/frame{:06d}".format(inputFrame+1)+".jpg"
    dst = TEMP_FOLDER+"/newFrame{:06d}".format(outputFrame+1)+".jpg"
    if not os.path.isfile(src):
        return False
    copyfile(src, dst)
    if outputFrame%20 == 19:
        print(str(outputFrame+1)+" time-altered frames saved.")
    return True
 def inputToOutputFilename(filename):
    dotIndex = filename.rfind(".")
    return filename[:dotIndex]+"_ALTERED"+filename[dotIndex:]
 def createPath(s):
    #assert (not os.path.exists(s)), "The filepath "+s+" already exists. Don't want to overwrite it. Aborting."
    try:  
        os.mkdir(s)
    except OSError:  
        assert False, "Creation of the directory %s failed. (The TEMP folder may already exist. Delete or rename it, and try again.)"
 def deletePath(s): # Dangerous! Watch out!
    try:  
        rmtree(s,ignore_errors=False)
    except OSError:  
        print ("Deletion of the directory %s failed" % s)
        print(OSError)
 parser = argparse.ArgumentParser(description='Modifies a video file to play at different speeds when there is sound vs. silence.')
 parser.add_argument('--input_file', type=str,  help='the video file you want modified')
 parser.add_argument('--url', type=str, help='A youtube url to download and process')
 parser.add_argument('--output_file', type=str, default="", help="the output file. (optional. if not included, it'll just modify the input file name)")
 parser.add_argument('--silent_threshold', type=float, default=0.03, help="the volume amount that frames' audio needs to surpass to be consider \"sounded\". It ranges from 0 (silence) to 1 (max volume)")
 parser.add_argument('--sounded_speed', type=float, default=1.00, help="the speed that sounded (spoken) frames should be played at. Typically 1.")
 parser.add_argument('--silent_speed', type=float, default=5.00, help="the speed that silent frames should be played at. 999999 for jumpcutting.")
 parser.add_argument('--frame_margin', type=float, default=1, help="some silent frames adjacent to sounded frames are included to provide context. How many frames on either the side of speech should be included? That's this variable.")
 parser.add_argument('--sample_rate', type=int, default=44100, help="sample rate of the input and output videos")
 parser.add_argument('--frame_rate', type=float, default=30, help="frame rate of the input and output videos. optional... I try to find it out myself, but it doesn't always work.")
 parser.add_argument('--frame_quality', type=int, default=3, help="quality of frames to be extracted from input video. 1 is highest, 31 is lowest, 3 is the default.")
 args = parser.parse_args()
 frameRate = args.frame_rate
 SAMPLE_RATE = args.sample_rate
 SILENT_THRESHOLD = args.silent_threshold
 FRAME_SPREADAGE = args.frame_margin
 NEW_SPEED = [args.silent_speed, args.sounded_speed]
 if args.url != None:
    INPUT_FILE = downloadFile(args.url)
 else:
    INPUT_FILE = args.input_file
 URL = args.url
 FRAME_QUALITY = args.frame_quality
 assert INPUT_FILE != None , "why u put no input file, that dum"
 if len(args.output_file) >= 1:
    OUTPUT_FILE = args.output_file
 else:
    OUTPUT_FILE = inputToOutputFilename(INPUT_FILE)
 TEMP_FOLDER = "TEMP"
 AUDIO_FADE_ENVELOPE_SIZE = 400 # smooth out transitiion's audio by quickly fading in/out (arbitrary magic number whatever)
 createPath(TEMP_FOLDER)
 command = "ffmpeg -i "+INPUT_FILE+" -qscale:v "+str(FRAME_QUALITY)+" "+TEMP_FOLDER+"/frame%06d.jpg -hide_banner"
 subprocess.call(command, shell=True)
 command = "ffmpeg -i "+INPUT_FILE+" -ab 160k -ac 2 -ar "+str(SAMPLE_RATE)+" -vn "+TEMP_FOLDER+"/audio.wav"
 subprocess.call(command, shell=True)
 command = "ffmpeg -i "+TEMP_FOLDER+"/input.mp4 2>&1"
 f = open(TEMP_FOLDER+"/params.txt", "w")
 subprocess.call(command, shell=True, stdout=f)
 sampleRate, audioData = wavfile.read(TEMP_FOLDER+"/audio.wav")
 audioSampleCount = audioData.shape[0]
 maxAudioVolume = getMaxVolume(audioData)
 f = open(TEMP_FOLDER+"/params.txt", 'r+')
 pre_params = f.read()
 f.close()
 params = pre_params.split('\n')
 for line in params:
    m = re.search('Stream #.*Video.* ([0-9]*) fps',line)
    if m is not None:
        frameRate = float(m.group(1))
 samplesPerFrame = sampleRate/frameRate
 audioFrameCount = int(math.ceil(audioSampleCount/samplesPerFrame))
 hasLoudAudio = np.zeros((audioFrameCount))
 for i in range(audioFrameCount):
    start = int(i*samplesPerFrame)
    end = min(int((i+1)*samplesPerFrame),audioSampleCount)
    audiochunks = audioData[start:end]
    maxchunksVolume = float(getMaxVolume(audiochunks))/maxAudioVolume
    if maxchunksVolume >= SILENT_THRESHOLD:
        hasLoudAudio[i] = 1
 chunks = [[0,0,0]]
 shouldIncludeFrame = np.zeros((audioFrameCount))
 for i in range(audioFrameCount):
    start = int(max(0,i-FRAME_SPREADAGE))
    end = int(min(audioFrameCount,i+1+FRAME_SPREADAGE))
    shouldIncludeFrame[i] = np.max(hasLoudAudio[start:end])
    if (i >= 1 and shouldIncludeFrame[i] != shouldIncludeFrame[i-1]): # Did we flip?
        chunks.append([chunks[-1][1],i,shouldIncludeFrame[i-1]])
 chunks.append([chunks[-1][1],audioFrameCount,shouldIncludeFrame[i-1]])
 chunks = chunks[1:]
 outputAudioData = np.zeros((0,audioData.shape[1]))
 outputPointer = 0
 lastExistingFrame = None
 for chunk in chunks:
    audioChunk = audioData[int(chunk[0]*samplesPerFrame):int(chunk[1]*samplesPerFrame)]
    sFile = TEMP_FOLDER+"/tempStart.wav"
    eFile = TEMP_FOLDER+"/tempEnd.wav"
    wavfile.write(sFile,SAMPLE_RATE,audioChunk)
    with WavReader(sFile) as reader:
        with WavWriter(eFile, reader.channels, reader.samplerate) as writer:
            tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])])
            tsm.run(reader, writer)
    _, alteredAudioData = wavfile.read(eFile)
    leng = alteredAudioData.shape[0]
    endPointer = outputPointer+leng
    outputAudioData = np.concatenate((outputAudioData,alteredAudioData/maxAudioVolume))
    #outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume
    # smooth out transitiion's audio by quickly fading in/out
    if leng < AUDIO_FADE_ENVELOPE_SIZE:
        outputAudioData[outputPointer:endPointer] = 0 # audio is less than 0.01 sec, let's just remove it.
    else:
        premask = np.arange(AUDIO_FADE_ENVELOPE_SIZE)/AUDIO_FADE_ENVELOPE_SIZE
        mask = np.repeat(premask[:, np.newaxis],2,axis=1) # make the fade-envelope mask stereo
        outputAudioData[outputPointer:outputPointer+AUDIO_FADE_ENVELOPE_SIZE] *= mask
        outputAudioData[endPointer-AUDIO_FADE_ENVELOPE_SIZE:endPointer] *= 1-mask
    startOutputFrame = int(math.ceil(outputPointer/samplesPerFrame))
    endOutputFrame = int(math.ceil(endPointer/samplesPerFrame))
    for outputFrame in range(startOutputFrame, endOutputFrame):
        inputFrame = int(chunk[0]+NEW_SPEED[int(chunk[2])]*(outputFrame-startOutputFrame))
        didItWork = copyFrame(inputFrame,outputFrame)
        if didItWork:
            lastExistingFrame = inputFrame
        else:
            copyFrame(lastExistingFrame,outputFrame)
    outputPointer = endPointer
 wavfile.write(TEMP_FOLDER+"/audioNew.wav",SAMPLE_RATE,outputAudioData)
 '''
 outputFrame = math.ceil(outputPointer/samplesPerFrame)
 for endGap in range(outputFrame,audioFrameCount):
    copyFrame(int(audioSampleCount/samplesPerFrame)-1,endGap)
 '''
 command = "ffmpeg -framerate "+str(frameRate)+" -i "+TEMP_FOLDER+"/newFrame%06d.jpg -i "+TEMP_FOLDER+"/audioNew.wav -strict -2 "+OUTPUT_FILE
 subprocess.call(command, shell=True)
 deletePath(TEMP_FOLDER)
--- a/UserCreatedPrograms/jumpcutter/jumpcutter_gui.py
+++ b/UserCreatedPrograms/jumpcutter/jumpcutter_gui.py
@ -0,0 +1,109 @@
 import PySimpleGUI as sg
 import subprocess
 import sys
 """
    Jumpcutter GUI
    This is a front-end GUI for a command line tool named jumpcutter.
    jumpcutter is a command line based tool written by Carykh.  You'll find the repo here:
    https://github.com/carykh/jumpcutter
    The design of this GUI was made in a way that should not have required any changes to the
    jumpcutter.py file.  However, there appears to be a bug in the original code. The sample rate
    argument was specified as a float, but this later causes a crash in the program, so a single
    change was made to line 68, changing the parameter from a float to an int.  You can get around
    this change by not specifying a default value in this GUI.  Rather than specifying 44100, leave it blank
    which will cause the parameter to be skipped.
    This kind of GUI can be applied to a large number of other commandline programs.
    NOTE - it has not yet been tested on Linux.  It's only been tested on Windows.  Hoping to get it
    tested out on Linux shortly.
    Copyright 2020 PySimpleGUI.org
 """
 def build_parameter_string(values):
    values_to_parm = {'-FILE-' : '--input_file',
                      '-URL-' : '--url',
                      '-OUT FILE-' : '--output_file',
                      '-SILENT THRESHOLD-' : '--silent_threshold',
                      '-SOUNDED SPEED-' : '--sounded_speed',
                      '-SILENT SPEED-' : '--silent_speed',
                      '-FRAME MARGIN-' : '--frame_margin',
                      '-SAMPLE RATE-' : '--sample_rate',
                      '-FRAME RATE-' : '--frame_rate',
                      '-FRAME QUALITY-' : '--frame_quality',
                      }
    parms = ''
    for key in values:
        if key not in values_to_parm:
            continue
        if values[key] != '':
            parms += f"{values_to_parm[key]} {values[key]} "
    return(parms)
 def main():
    def FText(text, in_key=None, default=None, tooltip=None, input_size=(20,1)):
        """
        A "Fixed-sized Text Input".  Returns a row with a Text and an Input element.
        """
        return [sg.Text(text, size=(20, 1), justification='r', tooltip=tooltip), sg.Input(default_text=default, key=in_key, size=input_size)]
    layout = [
        [sg.Text('Jump Cutter', font='Any 20')],
        FText('Input File', '-FILE-', '', 'the video file you want modified', input_size=(40,1)) + [sg.FileBrowse()],
        FText('URL', '-URL-', '', 'A youtube url to download and process', input_size=(40,1)),
        FText('Output File', '-OUT FILE-', '', "the output file. (optional. if not included, it'll just modify the input file name)", input_size=(40,1)) + [sg.FileSaveAs()],
        FText('Silent Threshold', '-SILENT THRESHOLD-', 0.03,
              "the volume amount that frames' audio needs to surpass to be consider \"sounded\". It ranges from 0 (silence) to 1 (max volume)"),
        FText('Sounded Speed', '-SOUNDED SPEED-', 1.00, "the speed that sounded (spoken) frames should be played at. Typically 1."),
        FText('Silent Speed', '-SILENT SPEED-', 5.00, "the speed that silent frames should be played at. 999999 for jumpcutting."),
        FText('Frame Margin', '-FRAME MARGIN-', 1,
              "some silent frames adjacent to sounded frames are included to provide context. How many frames on either the side of speech should be included? That's this variable."),
        FText('Sample Rate', '-SAMPLE RATE-', '44100', "sample rate of the input and output videos"),
        FText('Frame Rate', '-FRAME RATE-', 30,
              "frame rate of the input and output videos. optional... I try to find it out myself, but it doesn't always work."),
        FText('Frame Quality', '-FRAME QUALITY-', 3, "quality of frames to be extracted from input video. 1 is highest, 31 is lowest, 3 is the default."),
        [sg.MLine(size=(90,10), reroute_stdout=True, reroute_stderr=True, reroute_cprint=True, write_only=True, font='Courier 10', autoscroll=True, key='-ML-')],
        [sg.Button('Start'), sg.Button('Exit')],
    ]
    window = sg.Window('Jump Cutter', layout)
    while True:
        event, values = window.read()
        if event in (sg.WIN_CLOSED, 'Exit'):
            break
        if event == 'Start':
            parms = build_parameter_string(values)
            print('Your parameters = ', parms)
            runCommand(cmd=r'python .\jumpcutter.py ' + parms, window=window)
            sg.cprint('*'*20+'DONE'+'*'*20, background_color='red', text_color='white')
    window.close()
 def runCommand(cmd, timeout=None, window=None):
    """ run shell command
    @param cmd: command to execute
    @param timeout: timeout for command execution
    @param window: the PySimpleGUI window that the output is going to (needed to do refresh on)
    @return: (return code from command, command output)
    """
    p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    output = ''
    for line in p.stdout:
        line = line.decode(errors='replace' if (sys.version_info) < (3, 5) else 'backslashreplace').rstrip()
        output += line
        print(line)
        window.refresh() if window else None  # yes, a 1-line if, so shoot me
    retval = p.wait(timeout)
    return (retval, output)
 if __name__ == '__main__':
    # sg.theme('Dark Grey 11')
    main()