@@ -679,134 +679,212 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
679
679
}
680
680
681
681
/**
682
- * ice_xmit_zc - Completes AF_XDP entries, and cleans XDP entries
682
+ * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
683
683
* @xdp_ring: XDP Tx ring
684
- * @budget: max number of frames to xmit
684
+ * @tx_buf: Tx buffer to clean
685
+ */
686
+ static void
687
+ ice_clean_xdp_tx_buf (struct ice_tx_ring * xdp_ring , struct ice_tx_buf * tx_buf )
688
+ {
689
+ xdp_return_frame ((struct xdp_frame * )tx_buf -> raw_buf );
690
+ dma_unmap_single (xdp_ring -> dev , dma_unmap_addr (tx_buf , dma ),
691
+ dma_unmap_len (tx_buf , len ), DMA_TO_DEVICE );
692
+ dma_unmap_len_set (tx_buf , len , 0 );
693
+ }
694
+
695
+ /**
696
+ * ice_clean_xdp_irq_zc - Reclaim resources after transmit completes on XDP ring
697
+ * @xdp_ring: XDP ring to clean
698
+ * @napi_budget: amount of descriptors that NAPI allows us to clean
685
699
*
686
- * Returns true if cleanup/transmission is done.
700
+ * Returns count of cleaned descriptors
687
701
*/
688
- static bool ice_xmit_zc (struct ice_tx_ring * xdp_ring , int budget )
702
+ static u16 ice_clean_xdp_irq_zc (struct ice_tx_ring * xdp_ring , int napi_budget )
689
703
{
690
- struct ice_tx_desc * tx_desc = NULL ;
691
- bool work_done = true;
692
- struct xdp_desc desc ;
693
- dma_addr_t dma ;
704
+ u16 tx_thresh = ICE_RING_QUARTER (xdp_ring );
705
+ int budget = napi_budget / tx_thresh ;
706
+ u16 ntc = xdp_ring -> next_to_clean ;
707
+ u16 next_dd = xdp_ring -> next_dd ;
708
+ u16 cleared_dds = 0 ;
694
709
695
- while (likely (budget -- > 0 )) {
710
+ do {
711
+ struct ice_tx_desc * next_dd_desc ;
712
+ u16 desc_cnt = xdp_ring -> count ;
696
713
struct ice_tx_buf * tx_buf ;
714
+ u32 xsk_frames ;
715
+ u16 i ;
697
716
698
- if (unlikely (!ICE_DESC_UNUSED (xdp_ring ))) {
699
- xdp_ring -> tx_stats .tx_busy ++ ;
700
- work_done = false;
701
- break ;
702
- }
703
-
704
- tx_buf = & xdp_ring -> tx_buf [xdp_ring -> next_to_use ];
705
-
706
- if (!xsk_tx_peek_desc (xdp_ring -> xsk_pool , & desc ))
717
+ next_dd_desc = ICE_TX_DESC (xdp_ring , next_dd );
718
+ if (!(next_dd_desc -> cmd_type_offset_bsz &
719
+ cpu_to_le64 (ICE_TX_DESC_DTYPE_DESC_DONE )))
707
720
break ;
708
721
709
- dma = xsk_buff_raw_get_dma (xdp_ring -> xsk_pool , desc .addr );
710
- xsk_buff_raw_dma_sync_for_device (xdp_ring -> xsk_pool , dma ,
711
- desc .len );
722
+ cleared_dds ++ ;
723
+ xsk_frames = 0 ;
712
724
713
- tx_buf -> bytecount = desc .len ;
725
+ for (i = 0 ; i < tx_thresh ; i ++ ) {
726
+ tx_buf = & xdp_ring -> tx_buf [ntc ];
714
727
715
- tx_desc = ICE_TX_DESC (xdp_ring , xdp_ring -> next_to_use );
716
- tx_desc -> buf_addr = cpu_to_le64 (dma );
717
- tx_desc -> cmd_type_offset_bsz =
718
- ice_build_ctob (ICE_TXD_LAST_DESC_CMD , 0 , desc .len , 0 );
728
+ if (tx_buf -> raw_buf ) {
729
+ ice_clean_xdp_tx_buf (xdp_ring , tx_buf );
730
+ tx_buf -> raw_buf = NULL ;
731
+ } else {
732
+ xsk_frames ++ ;
733
+ }
719
734
720
- xdp_ring -> next_to_use ++ ;
721
- if (xdp_ring -> next_to_use == xdp_ring -> count )
722
- xdp_ring -> next_to_use = 0 ;
723
- }
735
+ ntc ++ ;
736
+ if (ntc >= xdp_ring -> count )
737
+ ntc = 0 ;
738
+ }
739
+ if (xsk_frames )
740
+ xsk_tx_completed (xdp_ring -> xsk_pool , xsk_frames );
741
+ next_dd_desc -> cmd_type_offset_bsz = 0 ;
742
+ next_dd = next_dd + tx_thresh ;
743
+ if (next_dd >= desc_cnt )
744
+ next_dd = tx_thresh - 1 ;
745
+ } while (budget -- );
724
746
725
- if (tx_desc ) {
726
- ice_xdp_ring_update_tail (xdp_ring );
727
- xsk_tx_release (xdp_ring -> xsk_pool );
728
- }
747
+ xdp_ring -> next_to_clean = ntc ;
748
+ xdp_ring -> next_dd = next_dd ;
729
749
730
- return budget > 0 && work_done ;
750
+ return cleared_dds * tx_thresh ;
731
751
}
732
752
733
753
/**
734
- * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
735
- * @xdp_ring: XDP Tx ring
736
- * @tx_buf: Tx buffer to clean
754
+ * ice_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor
755
+ * @xdp_ring: XDP ring to produce the HW Tx descriptor on
756
+ * @desc: AF_XDP descriptor to pull the DMA address and length from
757
+ * @total_bytes: bytes accumulator that will be used for stats update
737
758
*/
738
- static void
739
- ice_clean_xdp_tx_buf ( struct ice_tx_ring * xdp_ring , struct ice_tx_buf * tx_buf )
759
+ static void ice_xmit_pkt ( struct ice_tx_ring * xdp_ring , struct xdp_desc * desc ,
760
+ unsigned int * total_bytes )
740
761
{
741
- xdp_return_frame ((struct xdp_frame * )tx_buf -> raw_buf );
742
- dma_unmap_single (xdp_ring -> dev , dma_unmap_addr (tx_buf , dma ),
743
- dma_unmap_len (tx_buf , len ), DMA_TO_DEVICE );
744
- dma_unmap_len_set (tx_buf , len , 0 );
762
+ struct ice_tx_desc * tx_desc ;
763
+ dma_addr_t dma ;
764
+
765
+ dma = xsk_buff_raw_get_dma (xdp_ring -> xsk_pool , desc -> addr );
766
+ xsk_buff_raw_dma_sync_for_device (xdp_ring -> xsk_pool , dma , desc -> len );
767
+
768
+ tx_desc = ICE_TX_DESC (xdp_ring , xdp_ring -> next_to_use ++ );
769
+ tx_desc -> buf_addr = cpu_to_le64 (dma );
770
+ tx_desc -> cmd_type_offset_bsz = ice_build_ctob (ICE_TX_DESC_CMD_EOP ,
771
+ 0 , desc -> len , 0 );
772
+
773
+ * total_bytes += desc -> len ;
745
774
}
746
775
747
776
/**
748
- * ice_clean_tx_irq_zc - Completes AF_XDP entries, and cleans XDP entries
749
- * @xdp_ring: XDP Tx ring
750
- * @budget: NAPI budget
751
- *
752
- * Returns true if cleanup/tranmission is done.
777
+ * ice_xmit_pkt_batch - produce a batch of HW Tx descriptors out of AF_XDP descriptors
778
+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
779
+ * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
780
+ * @total_bytes: bytes accumulator that will be used for stats update
753
781
*/
754
- bool ice_clean_tx_irq_zc (struct ice_tx_ring * xdp_ring , int budget )
782
+ static void ice_xmit_pkt_batch (struct ice_tx_ring * xdp_ring , struct xdp_desc * descs ,
783
+ unsigned int * total_bytes )
755
784
{
756
- int total_packets = 0 , total_bytes = 0 ;
757
- s16 ntc = xdp_ring -> next_to_clean ;
785
+ u16 tx_thresh = ICE_RING_QUARTER ( xdp_ring ) ;
786
+ u16 ntu = xdp_ring -> next_to_use ;
758
787
struct ice_tx_desc * tx_desc ;
759
- struct ice_tx_buf * tx_buf ;
760
- u32 xsk_frames = 0 ;
761
- bool xmit_done ;
788
+ u32 i ;
762
789
763
- tx_desc = ICE_TX_DESC (xdp_ring , ntc );
764
- tx_buf = & xdp_ring -> tx_buf [ntc ];
765
- ntc -= xdp_ring -> count ;
790
+ loop_unrolled_for (i = 0 ; i < PKTS_PER_BATCH ; i ++ ) {
791
+ dma_addr_t dma ;
766
792
767
- do {
768
- if (!(tx_desc -> cmd_type_offset_bsz &
769
- cpu_to_le64 (ICE_TX_DESC_DTYPE_DESC_DONE )))
770
- break ;
793
+ dma = xsk_buff_raw_get_dma (xdp_ring -> xsk_pool , descs [i ].addr );
794
+ xsk_buff_raw_dma_sync_for_device (xdp_ring -> xsk_pool , dma , descs [i ].len );
771
795
772
- total_bytes += tx_buf -> bytecount ;
773
- total_packets ++ ;
796
+ tx_desc = ICE_TX_DESC (xdp_ring , ntu ++ );
797
+ tx_desc -> buf_addr = cpu_to_le64 (dma );
798
+ tx_desc -> cmd_type_offset_bsz = ice_build_ctob (ICE_TX_DESC_CMD_EOP ,
799
+ 0 , descs [i ].len , 0 );
774
800
775
- if (tx_buf -> raw_buf ) {
776
- ice_clean_xdp_tx_buf (xdp_ring , tx_buf );
777
- tx_buf -> raw_buf = NULL ;
778
- } else {
779
- xsk_frames ++ ;
780
- }
801
+ * total_bytes += descs [i ].len ;
802
+ }
781
803
782
- tx_desc -> cmd_type_offset_bsz = 0 ;
783
- tx_buf ++ ;
784
- tx_desc ++ ;
785
- ntc ++ ;
804
+ xdp_ring -> next_to_use = ntu ;
786
805
787
- if (unlikely (!ntc )) {
788
- ntc -= xdp_ring -> count ;
789
- tx_buf = xdp_ring -> tx_buf ;
790
- tx_desc = ICE_TX_DESC (xdp_ring , 0 );
791
- }
806
+ if (xdp_ring -> next_to_use > xdp_ring -> next_rs ) {
807
+ tx_desc = ICE_TX_DESC (xdp_ring , xdp_ring -> next_rs );
808
+ tx_desc -> cmd_type_offset_bsz |=
809
+ cpu_to_le64 (ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S );
810
+ xdp_ring -> next_rs += tx_thresh ;
811
+ }
812
+ }
792
813
793
- prefetch (tx_desc );
814
+ /**
815
+ * ice_fill_tx_hw_ring - produce the number of Tx descriptors onto ring
816
+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
817
+ * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
818
+ * @nb_pkts: count of packets to be send
819
+ * @total_bytes: bytes accumulator that will be used for stats update
820
+ */
821
+ static void ice_fill_tx_hw_ring (struct ice_tx_ring * xdp_ring , struct xdp_desc * descs ,
822
+ u32 nb_pkts , unsigned int * total_bytes )
823
+ {
824
+ u16 tx_thresh = ICE_RING_QUARTER (xdp_ring );
825
+ u32 batched , leftover , i ;
826
+
827
+ batched = ALIGN_DOWN (nb_pkts , PKTS_PER_BATCH );
828
+ leftover = nb_pkts & (PKTS_PER_BATCH - 1 );
829
+ for (i = 0 ; i < batched ; i += PKTS_PER_BATCH )
830
+ ice_xmit_pkt_batch (xdp_ring , & descs [i ], total_bytes );
831
+ for (; i < batched + leftover ; i ++ )
832
+ ice_xmit_pkt (xdp_ring , & descs [i ], total_bytes );
833
+
834
+ if (xdp_ring -> next_to_use > xdp_ring -> next_rs ) {
835
+ struct ice_tx_desc * tx_desc ;
836
+
837
+ tx_desc = ICE_TX_DESC (xdp_ring , xdp_ring -> next_rs );
838
+ tx_desc -> cmd_type_offset_bsz |=
839
+ cpu_to_le64 (ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S );
840
+ xdp_ring -> next_rs += tx_thresh ;
841
+ }
842
+ }
794
843
795
- } while (likely (-- budget ));
844
+ /**
845
+ * ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring
846
+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
847
+ * @budget: number of free descriptors on HW Tx ring that can be used
848
+ * @napi_budget: amount of descriptors that NAPI allows us to clean
849
+ *
850
+ * Returns true if there is no more work that needs to be done, false otherwise
851
+ */
852
+ bool ice_xmit_zc (struct ice_tx_ring * xdp_ring , u32 budget , int napi_budget )
853
+ {
854
+ struct xdp_desc * descs = xdp_ring -> xsk_pool -> tx_descs ;
855
+ u16 tx_thresh = ICE_RING_QUARTER (xdp_ring );
856
+ u32 nb_pkts , nb_processed = 0 ;
857
+ unsigned int total_bytes = 0 ;
858
+
859
+ if (budget < tx_thresh )
860
+ budget += ice_clean_xdp_irq_zc (xdp_ring , napi_budget );
861
+
862
+ nb_pkts = xsk_tx_peek_release_desc_batch (xdp_ring -> xsk_pool , budget );
863
+ if (!nb_pkts )
864
+ return true;
865
+
866
+ if (xdp_ring -> next_to_use + nb_pkts >= xdp_ring -> count ) {
867
+ struct ice_tx_desc * tx_desc ;
868
+
869
+ nb_processed = xdp_ring -> count - xdp_ring -> next_to_use ;
870
+ ice_fill_tx_hw_ring (xdp_ring , descs , nb_processed , & total_bytes );
871
+ tx_desc = ICE_TX_DESC (xdp_ring , xdp_ring -> next_rs );
872
+ tx_desc -> cmd_type_offset_bsz |=
873
+ cpu_to_le64 (ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S );
874
+ xdp_ring -> next_rs = tx_thresh - 1 ;
875
+ xdp_ring -> next_to_use = 0 ;
876
+ }
796
877
797
- ntc += xdp_ring -> count ;
798
- xdp_ring -> next_to_clean = ntc ;
878
+ ice_fill_tx_hw_ring ( xdp_ring , & descs [ nb_processed ], nb_pkts - nb_processed ,
879
+ & total_bytes ) ;
799
880
800
- if ( xsk_frames )
801
- xsk_tx_completed (xdp_ring -> xsk_pool , xsk_frames );
881
+ ice_xdp_ring_update_tail ( xdp_ring );
882
+ ice_update_tx_ring_stats (xdp_ring , nb_pkts , total_bytes );
802
883
803
884
if (xsk_uses_need_wakeup (xdp_ring -> xsk_pool ))
804
885
xsk_set_tx_need_wakeup (xdp_ring -> xsk_pool );
805
886
806
- ice_update_tx_ring_stats (xdp_ring , total_packets , total_bytes );
807
- xmit_done = ice_xmit_zc (xdp_ring , ICE_DFLT_IRQ_WORK );
808
-
809
- return budget > 0 && xmit_done ;
887
+ return nb_pkts < budget ;
810
888
}
811
889
812
890
/**
0 commit comments