Other Parts Discussed in Thread: TDA4VH
Tool/software:
Hello TI E2E team,
I’m working with a TDA4VH-EVM (J784S4) running Processor SDK Linux ADAS (ti-processor-sdk-linux-adas-j784s4-evm-09_02_00_05) and Linux kernel 6.1.80-ti. An NVMe SSD is attached to PCIe0.
Small file transfers work fine, but when I copy a large file (~8 GB) with:
-
Board: TDA4VH (J784S4) (Made by New Board)
-
SDK: ti-processor-sdk-linux-adas-j784s4-evm-09_02_00_05
-
Kernel: 6.1.80-ti-g2e423244f8c0
-
Filesystem: ext4 on NVMe
/dev/nvme0n1p1
-
Repro:
root@j784s4-evm:/run/media/nvme0n1p# cp source_8GB.vhdx target.vhdx
the kernel panics partway through. here is logs :
root@j784s4-evm:~#
[ 246.004290] Unable to handle kernel paging request at virtual address 00562a7a2e722520
[ 246.012202] Mem abort info:
[ 246.014982] ESR = 0x0000000096000004
[ 246.018717] EC = 0x25: DABT (current EL), IL = 32 bits
[ 246.024012] SET = 0, FnV = 0
[ 246.027052] EA = 0, S1PTW = 0
[ 246.030178] FSC = 0x04: level 0 translation fault
[ 246.035038] Data abort info:
[ 246.037904] ISV = 0, ISS = 0x00000004
[ 246.041725] CM = 0, WnR = 0
[ 246.044678] [00562a7a2e722520] address between user and kernel address ranges
[ 246.051794] Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP
[ 246.058043] Modules linked in: overlay ti_cpsw_proxy_client nvme nvme_core pci_endpoint_test bluetooth ecdh_generic ecc cfg80211 rfkill xhci_plat_hcd rpmsg_ctrl rpmsg_char ti_am335x_adc cdns3 kfifo_buf omap_rng cdns_usb_common cdns_pltfrm crct10dif_ce ti_j721e_cpsw_virt_mac wave5 display_connector videobuf2_dma_contig phy_can_transceiver videobuf2_memops v4l2_mem2mem cdns_mhdp8546 ti_k3_r5_remoteproc videobuf2_v4l2 cdns_dsi drm_display_helper ti_k3_dsp_remoteproc videobuf2_common drm_kms_helper virtio_rpmsg_bus syscopyarea videodev rpmsg_ns ti_am335x_tscadc ti_k3_common k3_j72xx_bandgap pvrsrvkm(O) pci_j721e_host sysfillrect mc sysimgblt m_can_platform sa2ul fb_sys_fops pci_j721e ti_j721e_ufs m_can pcie_cadence_host cdns_dphy cdns_dphy_rx pcie_cadence cdns3_ti can_dev optee_rng rng_core rti_wdt cryptodev(O) fuse drm drm_panel_orientation_quirks ipv6
[ 246.085527] Unable to handle kernel paging request at virtual address ffff80036c2de040
[ 246.133212] CPU: 7 PID: 2387 Comm: cp Tainted: G O 6.1.80-ti-g2e423244f8c0 #1
[ 246.141109] Unable to handle kernel paging request at virtual address ffff7ffffb2c4d50
[ 246.149606] Hardware name: Texas Instruments J784S4 EVM (DT)
[ 246.157499] Mem abort info:
[ 246.163139] pstate: 000000c5 (nzcv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 246.165918] ESR = 0x0000000096000004
[ 246.172857] pc : irq_work_run_list+0x44/0x70
[ 246.176590] EC = 0x25: DABT (current EL), IL = 32 bits
[ 246.180842] lr : irq_work_tick+0x28/0x6c
[ 246.184296] Insufficient stack space to handle exception!
[ 246.184298] Insufficient stack space to handle exception!
[ 246.184300] ESR: 0x000000009a000000 -- SP Alignment
[ 246.184301] Unable to handle kernel paging request at virtual address ffff7ffffbcae590
[ 246.184302] FAR: 0x6c6172657469810f
[ 246.184304] Mem abort info:
[ 246.184304] Task stack: [0xffff800009610000..0xffff800009614000]
[ 246.184305] ESR = 0x0000000096000004
[ 246.184306] IRQ stack: [0x6c61726574696c5f..0x6c6172657469ac5f]
[ 246.184306] EC = 0x25: DABT (current EL), IL = 32 bits
[ 246.184308] Overflow stack: [0xffff000f5c3d8300..0xffff000f5c3d9300]
[ 246.184309] SET = 0, FnV = 0
[ 246.184310] EA = 0, S1PTW = 0
[ 246.184311] FSC = 0x04: level 0 translation fault
[ 246.184310] CPU: 1735289188 PID: 0 Comm: swapper/6 Tainted: G O 6.1.80-ti-g2e423244f8c0 #1
[ 246.184313] Data abort info:
[ 246.184314] ISV = 0, ISS = 0x00000004
[ 246.184315] CM = 0, WnR = 0
[ 246.184315] Hardware name: Texas Instruments J784S4 EVM (DT)
[ 246.184316] swapper pgtable: 4k pages, 48-bit VAs, pgdp=0000000082f40000
[ 246.184316] pstate: 200003c5 (nzCv DAIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 246.184319] [ffff7ffffbcae590] pgd=0000000000000000, p4d=0000000000000000
[ 246.184320] pc : el1h_64_sync+0x0/0x68
[ 246.184326] lr : call_on_irq_stack+0x24/0x4c
[ 246.184331] sp : 6c6172657469810f
[ 246.184332] x29: ffff800009613c40 x28: ffff0008bc2f3900 x27: 0000000000000000
[ 246.184337] x26: ffff0008bc2f3900 x25: 0000000000000000 x24: 0000000000000000
[ 246.184342] x23: 0000000060000005 x22: ffff800008b6c5bc x21: ffff800009613df0
[ 246.184347] x20: 6874756120726568 x19: ffff800008ff46c8 x18: 0000000000000000
[ 246.184350] x17: ffff800f533ec000 x16: 6c61726574696c5f x15: 0000000000000000
[ 246.184354] x14: 00000000000001bf x13: 0000000000000001 x12: 0000000000000000
[ 246.184358] x11: 0000000000000001 x10: 00000000000009b0 x9 : ffff800009613d70
[ 246.184363] x8 : ffff0008bc2f4310 x7 : 0000000000000000 x6 : 00000000795a097c
[ 246.184367] x5 : ffff80000853f0c0 x4 : ffff800009613c51 x3 : ffff800009613ca0
[ 246.184371] x2 : ffff800009614000 x1 : ffff80000853f0c0 x0 : ffff800009613ca0
[ 246.186135] SET = 0, FnV = 0
[ 246.186138] EA = 0, S1PTW = 0
[ 246.186139] FSC = 0x04: level 0 translation fault
[ 246.186141] Data abort info:
[ 246.186142] ISV = 0, ISS = 0x00000004
[ 246.186143] CM = 0, WnR = 0
[ 246.186144] swapper pgtable: 4k pages, 48-bit VAs, pgdp=0000000082f40000
[ 246.186147] [ffff7ffffb2c4d50] pgd=0000000000000000, p4d=0000000000000000
[ 246.190042] sp : ffff800009323e30
[ 246.190044] x29: ffff800009323e30 x28: 0000000000000000 x27: ffff8000080f2c40
[ 246.190049] x26: 0000000000000001 x25: 00000000000000c0 x24: 000000393ad26a4c
[ 246.190053] x23: ffff000f5c3f0680 x22: ffff000f5c3f06e0 x21: ffff80001532b5c0
[ 246.190058] x20: 0000000000000000 x19: 61562a7a2e722520 x18: 000000000046c56d
[ 246.190062] x17: ffff800f53400000 x16: ffff800009320000 x15: 00003d0910c8e000
[ 246.190066] x14: 00000000000c3500 x13: 000000000103ff97 x12: 003d0910be8a1cbe
[ 246.190071] x11: 0000000000000000 x10: 0000000100000008 x9 : 000000000103ff97
[ 246.190075] x8 : ffff8000091d2000 x7 : ffff800008ff5000 x6 : ffff800f53400000
[ 246.190079] x5 : ffff800009323d30 x4 : 0000000000000034 x3 : ffff800009323d30
[ 246.190083] x2 : 0000000000000000 x1 : 0000000000000000 x0 : 61562a7a2e722520
[ 246.190087] Call trace:
[ 246.190089] irq_work_run_list+0x44/0x70
[ 246.190092] irq_work_tick+0x28/0x6c
[ 246.190095] update_process_times+0x94/0xac
[ 246.190101] tick_sched_handle+0x34/0x60
[ 246.190107] tick_sched_timer+0x4c/0xa4
[ 246.190111] __hrtimer_run_queues+0x138/0x1b0
[ 246.190115] hrtimer_interrupt+0xe8/0x244
[ 246.190118] arch_timer_handler_phys+0x34/0x44
[ 246.190123] handle_percpu_devid_irq+0x84/0x130
[ 246.190128] generic_handle_domain_irq+0x2c/0x44
[ 246.190134] gic_handle_irq+0x50/0x124
[ 246.190138] call_on_irq_stack+0x24/0x4c
[ 246.190142] do_interrupt_handler+0x80/0x8c
[ 246.190145] el1_interrupt+0x34/0x70
[ 246.190152] el1h_64_irq_handler+0x18/0x2c
[ 246.190157] el1h_64_irq+0x64/0x68
[ 246.190159] _raw_spin_unlock_irqrestore+0xc/0x50
[ 246.190164] mark_buffer_dirty+0x100/0x11c
[ 246.190172] __block_commit_write.constprop.0.isra.0+0xb8/0x160
[ 246.190177] generic_write_end+0x5c/0x194
[ 246.190179] ext4_da_write_end+0xfc/0x1f4
[ 246.190186] generic_perform_write+0x11c/0x1ec
[ 246.190190] ext4_buffered_write_iter+0x80/0x130
[ 246.190193] ext4_file_write_iter+0x5c/0x674
[ 246.190196] do_iter_readv_writev+0xbc/0x150
[ 246.190201] do_iter_write+0x90/0x200
[ 246.190203] vfs_iter_write+0x1c/0x30
[ 246.190206] iter_file_splice_write+0x248/0x3c0
[ 246.190210] direct_splice_actor+0x30/0x4c
[ 246.190213] splice_direct_to_actor+0xd8/0x280
[ 246.190215] do_splice_direct+0x94/0xe4
[ 246.190218] vfs_copy_file_range+0x13c/0x450
[ 246.190222] __arm64_sys_copy_file_range+0x164/0x33c
[ 246.190225] invoke_syscall+0x48/0x114
[ 246.190230] el0_svc_common.constprop.0+0xd4/0xfc
[ 246.190234] do_el0_svc+0x20/0x30
[ 246.190237] el0_svc+0x28/0xa0
[ 246.190241] el0t_64_sync_handler+0xbc/0x140
[ 246.190245] el0t_64_sync+0x18c/0x190
[ 246.190250] Code: 35ffffc2 d5033bbf b40000b3 aa1303e0 (f9400273)
[ 246.195633] ---[ end trace 0000000000000000 ]---
Has anyone seen a similar “kernel paging request” panic during large file writes over NVMe on this platform?
Is there a known issue or patch for the NVMe/PCIe/IOMMU driver in this SDK?
I tried to improve DTSI by applying SMMU & IOMMU, etc., but it didn't work well.
k3-j784s4-main.dtsi
......
smmu0: iommu@36600000 {
compatible = "arm,mmu-500", "arm,smmu-v3";
reg = <0x00 0x36600000 0x00 0x100000>;
interrupt-controller;
#interrupt-cells = <1>;
interrupt-parent = <&gic500>;
interrupts = <GIC_SPI 772 IRQ_TYPE_EDGE_RISING>,
<GIC_SPI 768 IRQ_TYPE_EDGE_RISING>,
<GIC_SPI 773 IRQ_TYPE_EDGE_RISING>;
#global-interrupts = <2>;
#iommu-cells = <1>;
status = "disabled";
};
......
pcie0_rc: pcie@2900000 {
compatible = "ti,j784s4-pcie-host";
reg = <0x00 0x02900000 0x00 0x1000>,
<0x00 0x02907000 0x00 0x400>,
<0x00 0x0d000000 0x00 0x00800000>,
<0x00 0x10000000 0x00 0x00001000>;
reg-names = "intd_cfg", "user_cfg", "reg", "cfg";
interrupt-names = "link_state";
interrupts = <GIC_SPI 318 IRQ_TYPE_EDGE_RISING>;
device_type = "pci";
ti,syscon-pcie-ctrl = <&scm_conf 0x4070>;
max-link-speed = <3>;
num-lanes = <4>;
power-domains = <&k3_pds 332 TI_SCI_PD_EXCLUSIVE>;
clocks = <&k3_clks 332 0>;
clock-names = "fck";
#address-cells = <3>;
#size-cells = <2>;
bus-range = <0x0 0xff>;
vendor-id = <0x104c>;
device-id = <0xb00d>;
msi-map = <0x0 &gic_its 0x0 0x10000>;
dma-coherent;
ranges = <0x01000000 0x0 0x10001000 0x0 0x10001000 0x0 0x0010000>,
<0x02000000 0x0 0x10011000 0x0 0x10011000 0x0 0x7fef000>;
dma-ranges = <0x02000000 0x0 0x0 0x0 0x0 0x10000 0x0>;
/* Add to IOMMU */
iommu-map = <0x0000 &smmu0 0x0000 0x10000>;
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 7>;
interrupt-map = <0 0 0 1 &pcie0_intc 0>,
<0 0 0 2 &pcie0_intc 0>,
<0 0 0 3 &pcie0_intc 0>,
<0 0 0 4 &pcie0_intc 0>;
status = "okay";
pcie0_intc: interrupt-controller {
interrupt-controller;
#interrupt-cells = <1>;
interrupt-parent = <&gic500>;
interrupts = <GIC_SPI 312 IRQ_TYPE_EDGE_RISING>;
};
};
.....
Any suggestions for a workaround or configuration change would be greatly appreciated.
Thank you!