過去に行った研究
Pythonで実装
「猫の喧嘩、まさかの結末」
「トンビにカメパンぬすまれた」
import os
import time
import numpy as np
import cv2
from moviepy.editor import VideoFileClip, AudioFileClip, CompositeAudioClip
from midi2audio import FluidSynth
from magenta.models.performance_rnn import performance_sequence_generator
cap.release()
# ダイナミクスの正規化
normalized_dynamics = np.interp(dynamics, (np.min(dynamics), np.max(dynamics)), (0.2, 0.8))
# グラフの描画
plt.figure(figsize=(12, 6))
plt.plot(range(0, len(normalized_dynamics) * skip_frames, skip_frames), normalized_dynamics)
plt.title("analysis")
plt.xlabel("frame")
plt.ylabel("intensity")
plt.grid(True)
plt.show()
return normalized_dynamics
# 音楽生成(Multiconditioned Performance with Dynamics)
def generate_music_with_multiconditioned_model(dynamics, sequence_generator, total_duration):
if len(dynamics) == 0:
raise ValueError("ダイナミクスのデータが空です。動画の解析に問題がある可能性があります。")
# 初期の音楽シーケンス (primer_sequence) の作成
primer_sequence = music_pb2.NoteSequence()
primer_sequence.ticks_per_quarter = note_seq.STANDARD_PPQ
# 動画のダイナミクスに基づく初期音符の設定
segment_duration = total_duration / len(dynamics)
for i, dynamic in enumerate(dynamics):
start_time = i * segment_duration
end_time = start_time + segment_duration
pitch = int(60 + 20 * dynamic) # 動きに基づく音高
primer_sequence.notes.add(
pitch=pitch,
start_time=start_time,
end_time=end_time,
velocity=int(63+dynamic * 64)
)
primer_sequence.total_time = end_time
# 生成オプションの設定
generator_options = generator_pb2.GeneratorOptions()
# 生成セクションの設定 (primer_sequence の終了時点から開始)
generate_section = generator_options.generate_sections.add(
start_time=primer_sequence.total_time,
end_time=total_duration
)
# パラメータの設定
generator_options.args['temperature'].float_value = 1.0 # ランダム性
generator_options.args['density'].float_value = 0.8 # 密度
# 音楽の生成
generated_sequence = sequence_generator.generate(primer_sequence, generator_options)
return generated_sequence
def display_midi_content(midi_file_path):
# MIDIファイルを読み込む
midi_data = pretty_midi.PrettyMIDI(midi_file_path)
# ピアノロールを取得
piano_roll = midi_data.get_piano_roll()
# プロットの設定
plt.figure(figsize=(12, 6))
plt.imshow(piano_roll, aspect='auto', origin='lower', cmap='Blues')
plt.title('MIDI File Piano Roll')
plt.xlabel('Time (steps)')
plt.ylabel('Pitch')
plt.colorbar(label='Velocity')
# Y軸のラベルを設定(C4 = MIDI note 60)
pitches = np.arange(0, 128, 12)
pitch_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
plt.yticks(pitches, [f'{pitch_names[p%12]}{p//12-1}' for p in pitches])
plt.show()
# 音楽をMIDI/WAV形式で保存
def save_music_to_wav(note_sequence, output_midi, output_wav, soundfont_path):
note_seq.sequence_proto_to_midi_file(note_sequence, output_midi)
fs = FluidSynth(soundfont_path)
fs.midi_to_audio(output_midi, output_wav)
# 動画と音楽を統合
def integrate_audio_with_video(video_path, audio_path, output_path):
video = VideoFileClip(video_path)
audio = AudioFileClip(audio_path)
new_audio = CompositeAudioClip([audio])
video = video.set_audio(new_audio)
video.write_videofile(output_path, codec='libx264', audio_codec='aac')
# メイン処理
def main():
video_path = "/content/drive/MyDrive/Colab Notebooks/CatFight.mp4"
output_midi_path = "/content/drive/MyDrive/Colab Notebooks/generated_music.mid"
output_audio_path = "/content/drive/MyDrive/Colab Notebooks/generated_music.wav"
output_video_path = "/content/drive/MyDrive/Colab Notebooks/output_video.mp4"
soundfont_path = "/content/drive/MyDrive/Colab Notebooks/FluidR3_GM.sf2"
model_bundle_path = "/content/drive/MyDrive/Colab Notebooks/multiconditioned_performance_with_dynamics.mag"
# モデルのロード
bundle = sequence_generator_bundle.read_bundle_file(model_bundle_path)
generator_map = performance_sequence_generator.get_generator_map()
sequence_generator = generator_map['multiconditioned_performance_with_dynamics'](bundle=bundle)
sequence_generator.initialize()
# 1. 動画の盛り上がり解析
dynamics = analyze_video_dynamics(video_path)
# 2. 音楽の生成
video = VideoFileClip(video_path)
generated_sequence = generate_music_with_multiconditioned_model(dynamics, sequence_generator, video.duration)
# MIDIファイルの内容を表示
display_midi_content(output_midi_path)
# 3. 音楽を保存
save_music_to_wav(generated_sequence, output_midi_path, output_audio_path, soundfont_path)
# 4. 動画と音楽を統合
integrate_audio_with_video(video_path, output_audio_path, output_video_path)
print(f"生成された動画: {output_video_path}")
if __name__ == "__main__":
main()
© Haruya Matsushima