diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_enc.c b/src/gallium/drivers/radeonsi/radeon_vcn_enc.c index 9d0aa41e342..86ab2457c93 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_enc.c +++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc.c @@ -769,7 +769,17 @@ static void radeon_vcn_enc_av1_get_param(struct radeon_encoder *enc, pic->seq.num_temporal_layers : RENCODE_MAX_NUM_TEMPORAL_LAYERS; /* 1, 2 layer needs 1 reference, and 3, 4 layer needs 2 references */ - enc->base.max_references = (enc_pic->num_temporal_layers + 1) / 2; + enc->base.max_references = (enc_pic->num_temporal_layers + 1) / 2 + + RENCODE_VCN4_AV1_MAX_NUM_LTR; + for (int i = 0; i < RENCDOE_AV1_REFS_PER_FRAME; i++) + enc_pic->av1_ref_frame_idx[i] = pic->ref_frame_idx[i]; + + for (int i = 0; i < RENCDOE_AV1_NUM_REF_FRAMES; i++) + enc_pic->av1_ref_list[i] = pic->ref_list[i]; + + enc_pic->av1_recon_frame = pic->recon_frame; + enc_pic->av1_ref_frame_ctrl_l0 = pic->ref_frame_ctrl_l0; + radeon_vcn_enc_quality_modes(enc, &pic->quality_modes); enc_pic->frame_id_numbers_present = pic->seq.seq_bits.frame_id_number_present_flag; enc_pic->enable_error_resilient_mode = pic->error_resilient_mode; @@ -785,6 +795,7 @@ static void radeon_vcn_enc_av1_get_param(struct radeon_encoder *enc, enc_pic->disable_screen_content_tools = !pic->allow_screen_content_tools; enc_pic->is_obu_frame = pic->enable_frame_obu; enc_pic->need_av1_seq = (pic->frame_type == PIPE_AV1_ENC_FRAME_TYPE_KEY); + enc_pic->av1_mark_long_term_reference = pic->long_term_reference; radeon_vcn_enc_av1_get_spec_misc_param(enc, pic); radeon_vcn_enc_av1_timing_info(enc, pic); diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_enc.h b/src/gallium/drivers/radeonsi/radeon_vcn_enc.h index 114315ac948..b3b18080207 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_enc.h +++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc.h @@ -124,6 +124,7 @@ struct radeon_enc_pic { uint32_t is_obu_frame:1; uint32_t stream_obu_frame:1; /* all frames have the same number of tiles */ uint32_t need_av1_seq:1; + uint32_t av1_mark_long_term_reference:1; }; uint32_t render_width; uint32_t render_height; @@ -131,6 +132,7 @@ struct radeon_enc_pic { enum pipe_av1_enc_frame_type last_frame_type; uint32_t display_frame_id; uint32_t frame_id; + uint32_t temporal_seq_num; uint32_t order_hint; uint32_t order_hint_bits; uint32_t refresh_frame_flags; @@ -146,6 +148,12 @@ struct radeon_enc_pic { uint32_t count_last_layer; rvcn_enc_av1_ref_frame_t frames[RENCDOE_AV1_NUM_REF_FRAMES]; rvcn_enc_av1_recon_slot_t recon_slots[RENCDOE_AV1_NUM_REF_FRAMES + 1]; + uint8_t av1_ref_frame_idx[RENCDOE_AV1_REFS_PER_FRAME]; + void *av1_ref_list[RENCDOE_AV1_NUM_REF_FRAMES]; + void *av1_recon_frame; + uint32_t av1_ref_frame_ctrl_l0; + uint32_t av1_ref_frame_ctrl_l1; + uint32_t av1_ltr_seq; }; rvcn_enc_session_info_t session_info; diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_enc_4_0.c b/src/gallium/drivers/radeonsi/radeon_vcn_enc_4_0.c index 5255922a555..05a3aa2f055 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_enc_4_0.c +++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc_4_0.c @@ -191,10 +191,14 @@ static void redeon_enc_av1_release_recon_slot(struct radeon_encoder *enc, static uint32_t radeon_enc_av1_alloc_curr_frame(struct radeon_encoder *enc, uint32_t frame_id, uint32_t temporal_id, + uint32_t mark_long_term, /* mark it as long term reference */ + void *frame_signature, enum pipe_av1_enc_frame_type frame_type) { uint32_t i = 0; + assert(frame_signature); + for (i = 0; i < ARRAY_SIZE(enc->enc_pic.frames); i++) { rvcn_enc_av1_ref_frame_t *frame = &enc->enc_pic.frames[i]; if (!frame->in_use) { @@ -203,6 +207,10 @@ static uint32_t radeon_enc_av1_alloc_curr_frame(struct radeon_encoder *enc, frame->temporal_id = temporal_id; frame->slot_id = radeon_enc_av1_alloc_recon_slot(enc); frame->frame_type = frame_type; + frame->frame_signature = frame_signature; + frame->is_ltr = !!(mark_long_term); + if (frame->is_ltr) + frame->ltr_seq = enc->enc_pic.av1_ltr_seq++; break; } } @@ -251,6 +259,33 @@ static void radeon_enc_av1_pre_scan_frames(struct radeon_encoder *enc, { uint32_t i = 0; + /* checking long term frames if it reached the limit, it needs to + * release the oldest. */ + if (enc->enc_pic.av1_mark_long_term_reference) { + int cnt = 0; + uint32_t min_seq = (uint32_t)-1; + uint32_t min_seq_idx = 0; + for (i = 0; i < ARRAY_SIZE(enc->enc_pic.frames); i++) { + rvcn_enc_av1_ref_frame_t *frame = &enc->enc_pic.frames[i]; + if (frame->in_use && frame->is_ltr) { + if (frame->ltr_seq < min_seq) { + min_seq = frame->ltr_seq; + min_seq_idx = i; + } + cnt++; + + /* this means some LTR ref buffer has been re-used. */ + if (enc->enc_pic.av1_recon_frame == frame->frame_signature) + RVID_ERR("recon duplicated! it could refer to a wrong frame!\n"); + } + } + /* release the frame with minimum ltr seq number (oldest), + * this check is happening on each frame, the total number + * of LTR is limited by RENCODE_VCN4_AV1_MAX_NUM_LTR.*/ + if (cnt > RENCODE_VCN4_AV1_MAX_NUM_LTR) + radeon_enc_av1_release_ref_frame(enc, min_seq_idx, false); + } + for (i = 0; i < ARRAY_SIZE(enc->enc_pic.recon_slots); i++) { rvcn_enc_av1_recon_slot_t *slot = &enc->enc_pic.recon_slots[i]; if (slot->in_use && slot->is_orphaned) { @@ -264,39 +299,102 @@ static void radeon_enc_av1_pre_scan_frames(struct radeon_encoder *enc, if (frame->in_use) { if (temporal_id < frame->temporal_id) radeon_enc_av1_release_ref_frame(enc, i, false); - else if (temporal_id == frame->temporal_id) + else if (temporal_id == frame->temporal_id && (!frame->is_ltr)) radeon_enc_av1_release_ref_frame(enc, i, true); } } } +static bool radeon_enc_av1_search_requested_reference( + struct radeon_encoder *enc, + uint32_t *idx) +{ + bool find = false; + /* Here is the assumption, the 3rd item of ref_frame_ctrl_l0 + indicates which slot it needs to find in ref_frame_idx[], and + from ref_frame_idx to find the requested reference frame + in ref_list[] */ + #define RENCODE_AV1_REF_CTRL_L0_THIRD_ITEM (0x1c0) /* 111 000 000 */ + uint32_t marked_ref_frame_idx = (RENCODE_AV1_REF_CTRL_L0_THIRD_ITEM & + enc->enc_pic.av1_ref_frame_ctrl_l0) >> 6; + /* valid marked_ref_frame_idx > 0 */ + if (marked_ref_frame_idx) { + uint32_t requested_frame_idx = + enc->enc_pic.av1_ref_frame_idx[marked_ref_frame_idx - 1]; + void *request_signature = NULL; + + if (requested_frame_idx >= RENCDOE_AV1_NUM_REF_FRAMES) + goto end; + + request_signature = enc->enc_pic.av1_ref_list[requested_frame_idx]; + for (uint32_t i = 0; i < ARRAY_SIZE((enc->enc_pic.frames)); i++) { + rvcn_enc_av1_ref_frame_t *frame = &enc->enc_pic.frames[i]; + if (frame->in_use && + frame->is_ltr && + (request_signature == frame->frame_signature)) { + find = true; + /* increase the frame seq number after found, when it + * reaches the maximum limit, this found one will not + * be released. */ + frame->ltr_seq = enc->enc_pic.av1_ltr_seq++; + *idx = i; + break; + } + } + } +end: + return find; +} + static uint32_t radeon_enc_av1_obtain_ref0_frame(struct radeon_encoder *enc, uint32_t temporal_id) { + /* when only ltr frames in DPB, it needs to use the biggest ltr_seq + * one (latest) for reference, instead of the first one met. */ uint32_t i = 0; + uint32_t ret_idx = 0; + uint32_t max_seq = 0; + uint32_t max_seq_idx = 0; + for (i = 0; i < ARRAY_SIZE(enc->enc_pic.frames); i++) { + rvcn_enc_av1_ref_frame_t *frame = &enc->enc_pic.frames[i]; + if (frame->in_use && frame->is_ltr && (frame->ltr_seq >= max_seq)) { + max_seq = frame->ltr_seq; + max_seq_idx = i; + } + } for (i = ARRAY_SIZE(enc->enc_pic.frames); i > 0; i--) { rvcn_enc_av1_ref_frame_t *frame = &enc->enc_pic.frames[i - 1]; - if (frame->in_use && frame->temporal_id <= temporal_id) + if (frame->in_use && frame->temporal_id <= temporal_id) { + if (frame->is_ltr) + ret_idx = max_seq_idx; + else + ret_idx = i - 1; + break; + } } - /* not find, ref = 0, or ref = i - 1 */ - return i == 0 ? i : i - 1; + return ret_idx; } static void radeon_enc_reset_av1_dpb_frames(struct radeon_encoder *enc) { - for (int i = 0; i < ARRAY_SIZE(enc->enc_pic.frames); i++) { - enc->enc_pic.frames[i].in_use = false; - enc->enc_pic.frames[i].frame_id = 0; - enc->enc_pic.frames[i].temporal_id = 0; - enc->enc_pic.frames[i].slot_id = 0; - enc->enc_pic.frames[i].frame_type = 0; - } + for (int i = 0; i < ARRAY_SIZE(enc->enc_pic.frames); i++) + enc->enc_pic.frames[i] = (rvcn_enc_av1_ref_frame_t) { + .in_use = false, + .is_ltr = false, + .ltr_seq = 0, + .frame_id = 0, + .temporal_id = 0, + .slot_id = 0, + .frame_type = 0, + .frame_signature = NULL, + }; - for (int i = 0; i < ARRAY_SIZE(enc->enc_pic.recon_slots); i++) { - enc->enc_pic.recon_slots[i].in_use = false; - enc->enc_pic.recon_slots[i].is_orphaned = false; - } + for (int i = 0; i < ARRAY_SIZE(enc->enc_pic.recon_slots); i++) + enc->enc_pic.recon_slots[i] = (rvcn_enc_av1_recon_slot_t) { + .in_use = false, + .is_orphaned = false, + }; } static void radeon_enc_av1_dpb_management(struct radeon_encoder *enc) @@ -304,21 +402,35 @@ static void radeon_enc_av1_dpb_management(struct radeon_encoder *enc) struct radeon_enc_pic *pic = &enc->enc_pic; uint32_t current_slot; uint32_t ref_slot; + uint32_t request_idx; + bool find = false; if (pic->frame_type == PIPE_AV1_ENC_FRAME_TYPE_KEY) { pic->frame_id = 0; + pic->temporal_seq_num = 0; pic->temporal_id = 0; pic->reference_delta_frame_id = 0; pic->reference_frame_index = 0; pic->last_frame_type = PIPE_AV1_ENC_FRAME_TYPE_KEY; + pic->av1_ltr_seq = 0; current_slot = 0; ref_slot = 0; + request_idx = 0; radeon_enc_reset_av1_dpb_frames(enc); } else { - pic->temporal_id = radeon_enc_av1_calculate_temporal_id(pic->frame_id, + find = radeon_enc_av1_search_requested_reference(enc, &request_idx); + if (pic->av1_mark_long_term_reference || find) + pic->temporal_seq_num = 0; /*for ltr, always temporal_id = 0 */ + else + pic->temporal_seq_num++; + + pic->temporal_id = radeon_enc_av1_calculate_temporal_id(pic->temporal_seq_num, pic->num_temporal_layers - 1); - pic->reference_frame_index = - radeon_enc_av1_obtain_ref0_frame(enc, pic->temporal_id); + if (find) + pic->reference_frame_index = request_idx; + else + pic->reference_frame_index = + radeon_enc_av1_obtain_ref0_frame(enc, pic->temporal_id); ref_slot = pic->frames[pic->reference_frame_index].slot_id; pic->last_frame_type = pic->frames[pic->reference_frame_index].frame_type; radeon_enc_av1_pre_scan_frames(enc, pic->temporal_id); @@ -336,6 +448,8 @@ static void radeon_enc_av1_dpb_management(struct radeon_encoder *enc) pic->frames[pic->reference_frame_index].frame_id; current_slot = radeon_enc_av1_alloc_curr_frame(enc, pic->frame_id, pic->temporal_id, + pic->av1_mark_long_term_reference, + pic->av1_recon_frame, pic->frame_type); if (pic->frame_type == PIPE_AV1_ENC_FRAME_TYPE_KEY || pic->frame_type == PIPE_AV1_ENC_FRAME_TYPE_SWITCH ||