Home

動画同期音楽生成

過去に行った研究

Pythonで実装

「猫の喧嘩、まさかの結末」

「トンビにカメパンぬすまれた」


詳細

ソースコード (Python)


import os
import time
import numpy as np
import cv2
from moviepy.editor import VideoFileClip, AudioFileClip, CompositeAudioClip
from midi2audio import FluidSynth
from magenta.models.performance_rnn import performance_sequence_generator

    cap.release()

    # ダイナミクスの正規化
    normalized_dynamics = np.interp(dynamics, (np.min(dynamics), np.max(dynamics)), (0.2, 0.8))

    # グラフの描画
    plt.figure(figsize=(12, 6))
    plt.plot(range(0, len(normalized_dynamics) * skip_frames, skip_frames), normalized_dynamics)
    plt.title("analysis")
    plt.xlabel("frame")
    plt.ylabel("intensity")
    plt.grid(True)
    plt.show()

    return normalized_dynamics

# 音楽生成(Multiconditioned Performance with Dynamics)
def generate_music_with_multiconditioned_model(dynamics, sequence_generator, total_duration):
    if len(dynamics) == 0:
        raise ValueError("ダイナミクスのデータが空です。動画の解析に問題がある可能性があります。")

    # 初期の音楽シーケンス (primer_sequence) の作成
    primer_sequence = music_pb2.NoteSequence()
    primer_sequence.ticks_per_quarter = note_seq.STANDARD_PPQ

    # 動画のダイナミクスに基づく初期音符の設定
    segment_duration = total_duration / len(dynamics)
    for i, dynamic in enumerate(dynamics):
        start_time = i * segment_duration
        end_time = start_time + segment_duration
        pitch = int(60 + 20 * dynamic)  # 動きに基づく音高
        primer_sequence.notes.add(
            pitch=pitch,
            start_time=start_time,
            end_time=end_time,
            velocity=int(63+dynamic * 64)
        )
        primer_sequence.total_time = end_time

    # 生成オプションの設定
    generator_options = generator_pb2.GeneratorOptions()

    # 生成セクションの設定 (primer_sequence の終了時点から開始)
    generate_section = generator_options.generate_sections.add(
        start_time=primer_sequence.total_time,
        end_time=total_duration
    )

    # パラメータの設定
    generator_options.args['temperature'].float_value = 1.0  # ランダム性
    generator_options.args['density'].float_value = 0.8    # 密度

    # 音楽の生成
    generated_sequence = sequence_generator.generate(primer_sequence, generator_options)

    return generated_sequence

def display_midi_content(midi_file_path):
    # MIDIファイルを読み込む
    midi_data = pretty_midi.PrettyMIDI(midi_file_path)

    # ピアノロールを取得
    piano_roll = midi_data.get_piano_roll()

    # プロットの設定
    plt.figure(figsize=(12, 6))
    plt.imshow(piano_roll, aspect='auto', origin='lower', cmap='Blues')
    plt.title('MIDI File Piano Roll')
    plt.xlabel('Time (steps)')
    plt.ylabel('Pitch')
    plt.colorbar(label='Velocity')

    # Y軸のラベルを設定(C4 = MIDI note 60)
    pitches = np.arange(0, 128, 12)
    pitch_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
    plt.yticks(pitches, [f'{pitch_names[p%12]}{p//12-1}' for p in pitches])

    plt.show()

# 音楽をMIDI/WAV形式で保存
def save_music_to_wav(note_sequence, output_midi, output_wav, soundfont_path):
    note_seq.sequence_proto_to_midi_file(note_sequence, output_midi)
    fs = FluidSynth(soundfont_path)
    fs.midi_to_audio(output_midi, output_wav)

# 動画と音楽を統合
def integrate_audio_with_video(video_path, audio_path, output_path):
    video = VideoFileClip(video_path)
    audio = AudioFileClip(audio_path)
    new_audio = CompositeAudioClip([audio])
    video = video.set_audio(new_audio)
    video.write_videofile(output_path, codec='libx264', audio_codec='aac')

# メイン処理
def main():
    video_path = "/content/drive/MyDrive/Colab Notebooks/CatFight.mp4"
    output_midi_path = "/content/drive/MyDrive/Colab Notebooks/generated_music.mid"
    output_audio_path = "/content/drive/MyDrive/Colab Notebooks/generated_music.wav"
    output_video_path = "/content/drive/MyDrive/Colab Notebooks/output_video.mp4"
    soundfont_path = "/content/drive/MyDrive/Colab Notebooks/FluidR3_GM.sf2"
    model_bundle_path = "/content/drive/MyDrive/Colab Notebooks/multiconditioned_performance_with_dynamics.mag"

    # モデルのロード
    bundle = sequence_generator_bundle.read_bundle_file(model_bundle_path)
    generator_map = performance_sequence_generator.get_generator_map()
    sequence_generator = generator_map['multiconditioned_performance_with_dynamics'](bundle=bundle)
    sequence_generator.initialize()

    # 1. 動画の盛り上がり解析
    dynamics = analyze_video_dynamics(video_path)

    # 2. 音楽の生成
    video = VideoFileClip(video_path)
    generated_sequence = generate_music_with_multiconditioned_model(dynamics, sequence_generator, video.duration)

    # MIDIファイルの内容を表示
    display_midi_content(output_midi_path)

    # 3. 音楽を保存
    save_music_to_wav(generated_sequence, output_midi_path, output_audio_path, soundfont_path)

    # 4. 動画と音楽を統合
    integrate_audio_with_video(video_path, output_audio_path, output_video_path)

    print(f"生成された動画: {output_video_path}")

if __name__ == "__main__":
    main()