# config output_dir = 'output/pv4' dataset = 'examples/pv_dataset.toml' epochs = 1000 micro_batch_size_per_gpu = 1 pipeline_stages = 1 gradient_accumulation_steps = 4 gradient_clipping = 1.0 warmup_steps = 100 eval_every_n_epochs = 1 eval_before_first_step = true eval_micro_batch_size_per_gpu = 1 eval_gradient_accumulation_steps = 1 save_every_n_epochs = 2 checkpoint_every_n_epochs = 2 activation_checkpointing = true partition_method = 'parameters' save_dtype = 'bfloat16' caching_batch_size = 1 steps_per_print = 1 video_clip_mode = 'single_middle' [model] type = 'hunyuan-video' transformer_path = '/u/SD/models/HunyuanVideo/models/hunyuan/hunyuan_video_720_cfgdistill_fp8_e4m3fn.safetensors' vae_path = '/u/SD/models/HunyuanVideo/models/hunyuan/hunyuan_video_vae_bf16.safetensors' llm_path = '/u/SD/models/HunyuanVideo/models/llm/llava-llama-3-8b-text-encoder-tokenizer' clip_path = '/u/SD/models/HunyuanVideo/models/clip/clip-vit-large-patch14' dtype = 'bfloat16' transformer_dtype = 'float8' timestep_sample_method = 'logit_normal' [adapter] type = 'lora' rank = 32 dtype = 'bfloat16' only_double_blocks = true [optimizer] type = 'adamw8bit' lr = 8e-5 betas = [0.9, 0.99] weight_decay = 0.01 eps = 1e-8 #dataset config resolutions = [768] enable_ar_bucket = true min_ar = 0.5 max_ar = 2.0 num_ar_buckets = 8 frame_buckets = [1, 31] [[directory]] path = '/u/SD/training/video/p/imgs' resolutions = [768] num_repeats = 4 [[directory]] path = '/u/SD/training/video/p/vids32-min/640x480' ar_buckets = [[640, 480]] resolutions = [[640, 480]] frame_buckets = [31] num_repeats = 1 [[directory]] path = '/u/SD/training/video/p/vids32-min/1280x720' ar_buckets = [[640, 360]] resolutions = [[640, 360]] frame_buckets = [31] num_repeats = 1 [[directory]] path = '/u/SD/training/video/p/vids32-min/720x1280' ar_buckets = [[360, 640]] resolutions = [[360, 640]] frame_buckets = [31] num_repeats = 1