This thread has been locked.

If you have a related question, please click the "Ask a related question" button in the top right corner. The newly created question will be automatically linked to this question.

Unhandled fault: Precise External Abort on non-linefetch (0x1028) at 0xd5330400

Hi all:

I use DM8168, and I have met a problem of pcie bus.

My system often print the follow information :

dx83xx_ioctl DXT_IOCWDevice32 BAR0 offset = 0x162d2638, 434456, base = 0xd3000000
dx83xx_setMbar iowrite32 value 0xf62d2630 address = 0xd5330400, 
Unhandled fault: Precise External Abort on non-linefetch (0x1028) at 0xd5330400
Internal error: : 1028 1
last sysfs file: /sys/devices/virtual/seed_gpio/seed_gpio/dev
Modules linked in: seed_gpio dxt fpgaDrv
CPU: 0 Not tainted (2.6.37+ #3)
PC is at dx83xx_setMbar+0x14c/0x168 [dxt]
LR is at dx83xx_setMbar+0x104/0x168 [dxt]
pc : [<bf0066cc>] lr : [<bf006684>] psr: a0000013
sp : c3b6be78 ip : c3b6be78 fp : c3b6bea4
r10: 00000000 r9 : c3b6a000 r8 : f62d2630
r7 : 00000000 r6 : c5a81a00 r5 : 00000004 r4 : cde15000
r3 : d5330400 r2 : c04fdad4 r1 : 005b65ea r0 : d5330400
Flags: NzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user
Control: 10c5387d Table: 8e768019 DAC: 00000015
Process app_c131_dm8168 (pid: 137, stack limit = 0xc3b6a2e8)
Stack: (0xc3b6be78 to 0xc3b6c000)
be60: c03a0318 20000000
be80: cde15000 000a21b8 c5a81a00 c5a81a0c 00000000 00000000 c3b6befc c3b6bea8
bea0: bf0076d4 bf00658c d3000000 00000001 00072a0c 00000000 c3b6a000 00000002
bec0: c3b6bf54 c3b6bed0 c009a4a4 c009893c ffffffff 00000000 ce34ab00 00000009
bee0: 000a21b8 00000009 00000000 00000000 c3b6bf74 c3b6bf00 c00d94f0 bf006ee8
bf00: 00000000 00989680 c3b6bf84 c3b6bf18 c008f814 c03a13ac 0000c350 00000000
bf20: 009959d0 00000000 00000002 00000000 fffffffe 00000000 c3b6bf6c c3b6a000
bf40: c3b6bf64 c3b6bf50 c007fc10 cdde3e80 cdde3e80 000a21b8 410c6415 00000009
bf60: 00000000 c3b6a000 c3b6bfa4 c3b6bf78 c00d95bc c00d902c 409f8560 00000001
bf80: 00004007 00000000 162d2638 409f8718 00000036 c004b568 00000000 c3b6bfa8
bfa0: c004b3c0 c00d9570 00000000 162d2638 00000009 410c6415 000a21b8 00000058
bfc0: 00000000 162d2638 409f8718 00000036 000923e8 0000162d 00000016 00162d26
bfe0: 000a21b8 409f8498 00055d68 4845e1cc 20000010 00000009 00000000 00000000
Backtrace: 
[<bf006580>] (dx83xx_setMbar+0x0/0x168 [dxt]) from [<bf0076d4>] (dx83xx_ioctl+0x7f8/0x179c [dxt])
[<bf006edc>] (dx83xx_ioctl+0x0/0x179c [dxt]) from [<c00d94f0>] (do_vfs_ioctl+0x4d0/0x544)
[<c00d9020>] (do_vfs_ioctl+0x0/0x544) from [<c00d95bc>] (sys_ioctl+0x58/0x7c)
r9:c3b6a000 r8:00000000 r7:00000009 r6:410c6415 r5:000a21b8
r4:cdde3e80
[<c00d9564>] (sys_ioctl+0x0/0x7c) from [<c004b3c0>] (ret_fast_syscall+0x0/0x30)
r8:c004b568 r7:00000036 r6:409f8718 r5:162d2638 r4:00000000
Code: ea000005 e5943094 e2833833 e2833b01 (e7933107) 
--[ end trace 8c2d5207179531d9 ]--
DXT_WAIT_DMA_INT, get a wrong int sem
FPGA DEBUG: FPGA VMA close.
Fpga Driver closed

 

 

the code is :

static void dx83xx_setMbar( struct dx83xx_driver *drv, unsigned long mbar, unsigned long *maddr)
{
unsigned int bar;
unsigned char *bar_data;
unsigned int rxb;
unsigned int ctl;
unsigned int addr = *maddr;
int i;

bar_data = (unsigned char *)&bar;
#ifdef CONFIG_PPC64
for (i=0; i<4; i++)
pci_read_config_byte(drv->dev, PCI_BASE_ADDRESS_0+(mbar*4)+3-i, &bar_data[i]);
#else
for (i=0; i<4; i++)
pci_read_config_byte(drv->dev, PCI_BASE_ADDRESS_0+(mbar*4)+i, &bar_data[i]);
#endif
//WDC_PciReadCfg ( dx->hDev, PCI_BASE_ADDRESS_0 + mbar*sizeof( unsigned int), &bar, 4);
bar &= ~BAR_MASK;
addr &= ~BAR_MASK;

// use CBUS( ) to set ctl
if (CBUS(*maddr)) ctl=1; //register
else ctl=0; // memory

rxb = (~bar+1) + addr + ctl;
//printk(KERN_ALERT "dx83xx_setMbar iowrite32 value 0x%x address = 0x%x, \n", rxb,(void __iomem *) (drv->dxt.kernel_addr[CBAR] + PCIEA_RX_BASE_0(PCIEA_OFF) +mbar*4));
iowrite32(rxb, (void __iomem *) (drv->dxt.kernel_addr[CBAR] + PCIEA_RX_BASE_0(PCIEA_OFF) +mbar*4) );
*maddr &= BAR_MASK;
rxb = ioread32((void __iomem *) (drv->dxt.kernel_addr[CBAR] + PCIEA_RX_BASE_0(PCIEA_OFF) +mbar*4) ); //flush after write
}

Who can help me?

 

  • drv->dxt.base_addr[i] = (unsigned long)pci_resource_start(dev, i);
    drv->dxt.size[i] = (unsigned long)pci_resource_len(dev, i);
    drv->dxt.kernel_addr[i] = (unsigned long)ioremap_nocache(drv->dxt.base_addr[i], drv->dxt.size[i]);
    printk("%s() drv->dxt.kernel_addr[%d]=%08lx\n", __func__, i, drv->dxt.kernel_addr[i]);

    this code print :

    PCI: enabling device 0000:01:00.0 (0140 -> 0142)
    dx83xx_init_one() drv->dxt.kernel_addr[0]=d3000000
    dx83xx_init_one() drv->dxt.kernel_addr[1]=d5000000
    seed_gpio major#: 251, minor#: 0
    mknod /dev/dxt0 c 252 0

    but when I read 0xd5441000 address ,this is error.,other address is ok,

    {
    int rxb_;
    printk("dx83xx_ioctl\n");
    for(i = 0; i < 0x10; i+=4)
    {
    rxb_ = __raw_readl((unsigned int *) (0xd5441000 - 4 + i) );
    printk("rxb_[%d]=%08lx\n", i, rxb_);
    }
    }

    ioctl(wdc->dxt, dx83xx_ioctl
    DXT_IOCINFO, &wdrxb_[0]=00000000
    c->devs)
    ti81xx_pcie: Data abort: address = 0xd5441000 fsr = 0x1028 PC = 0xbf006db0 LR = 0xc0075d28
    ti81xx_pcie: Data abort: address = 0xd5441000 reg_virt = 0xd0820000 SZ_16K = 0x00004000
    Unhandled fault: Precise External Abort on non-linefetch (0x1028) at 0xd5441000
    Internal error: : 1028 [#7]
    last sysfs file: /sys/devices/virtual/seed_gpio/seed_gpio/dev
    Modules linked in: seed_gpio dxt fpgaDrv
    CPU: 0 Tainted: G D (2.6.37+ #24)
    PC is at dx83xx_ioctl+0xa0/0x1584 [dxt]
    LR is at release_console_sem+0x180/0x194
    pc : [<bf006db0>] lr : [<c0075d28>] psr: 80000013
    sp : c7337ea0 ip : c7337de0 fp : c7337efc
    r10: 00000000 r9 : c7336000 r8 : 00000000
    r7 : 40646401 r6 : 00000008 r5 : 0005d024 r4 : c4038800
    r3 : d5441000 r2 : c0511abc r1 : 00000004 r0 : bf00855b
    Flags: Nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user
    Control: 10c5387d Table: 85d8c019 DAC: 00000015
    Process pcie (pid: 138, stack limit = 0xc73362e8)
    Stack: (0xc7337ea0 to 0xc7338000)
    7ea0: c006f758 a0000013 c7337ecc c7337eb8 c01e9fdc c006f740 cdef5c00 c7337ef0
    7ec0: c7337edc c7337ed0 c7337f3c c7337ed8 c00f741c c0090260 ce34d680 00000003
    7ee0: 0005d024 00000003 00000000 00000000 c7337f74 c7337f00 c00d9a84 bf006d1c
    7f00: cdfff280 00000000 00000000 00000000 c7336000 00000029 cdfff288 00000002
    7f20: cdda4b50 00000000 00000002 00000000 fffffffe 00000000 c7337f6c c7337f48
    7f40: c03b37bc c00469ec ffffffff ce46c01c c9a75700 0005d024 40646401 00000003
    7f60: 00000000 c7336000 c7337fa4 c7337f78 c00d9b50 c00d95c0 c7337fac 00000000
    7f80: c03b0038 0005d020 00000003 00000001 00000036 c004b568 00000000 c7337fa8
    7fa0: c004b3c0 c00d9b04 0005d020 00000003 00000003 40646401 0005d024 00000000
    7fc0: 0005d020 00000003 00000001 00000036 0000817c 00008170 00008180 00044820
    7fe0: 00000000 bef8b498 0003c640 4845e1cc 60000010 00000003 cf0ec807 c8c8c68d
    Backtrace:
    [<bf006d10>] (dx83xx_ioctl+0x0/0x1584 [dxt]) from [<c00d9a84>] (do_vfs_ioctl+0x4d0/0x544)
    [<c00d95b4>] (do_vfs_ioctl+0x0/0x544) from [<c00d9b50>] (sys_ioctl+0x58/0x7c)
    r9:c7336000 r8:00000000 r7:00000003 r6:40646401 r5:0005d024
    r4:c9a75700
    [<c00d9af8>] (sys_ioctl+0x0/0x7c) from [<c004b3c0>] (ret_fast_syscall+0x0/0x30)
    r8:c004b568 r7:00000036 r6:00000001 r5:00000003 r4:0005d020
    Code: e1a01006 e59f0f2c e0863003 e2866004 (e5932000)
    ---[ end trace 42f0eeccca5edab6 ]---

  • /**
    * ti81xx_pcie_fault() - ARM abort handler for PCIe non-posted completion aborts
    * @addr: Address target on which the fault generated
    * @fsr: CP15 fault status register value
    * @regs: Pointer to register structure on abort
    *
    * Handles precise abort caused due to PCIe operation.
    *
    * Note that we are relying on virtual address filtering to determine if the
    * target of the precise aborts was a PCIe module access (i.e., config, I/O,
    * register) and only handle such aborts. We could check PCIe error status to
    * confirm if the abort was caused due to non-posted completion status received
    * by PCIESS, but this may not always be true and aborts from some downstream
    * devices, such as PCI-PCI bridges etc may not result into error status bit
    * getting set.
    *
    * Ignores and returns abort as unhandled otherwise.
    *
    * Also note that, using error status check (as was done in earlier
    * implementation) would also handle failed memory accesses (non-posted), but
    * address filerting based handling will cause aborts for memory accesses as the
    * addresses will be outside the PCIESS module space. This seems OK, as any
    * memory access after enumeration is sole responsibility of the driver and the
    * system integrator (e.g., access failures due to hotplug, suspend etc). If
    * using error check based handling, we also need to clear PCIe error status on
    * detecting errors.
    *
    * Note: Due to specific h/w implementation, we can't be sure of what kind of
    * error occurred (UR Completion, CA etc) and all we get is raw error IRQ status
    * and probably SERR which indicate 'some kind of' error - fatal or non-fatal is
    * received/happened.
    */
    static int
    ti81xx_pcie_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
    {
    unsigned long instr = *(unsigned long *)regs->ARM_pc;

    pr_debug(DRIVER_NAME ": Data abort: address = 0x%08lx "
    "fsr = 0x%03x PC = 0x%08lx LR = 0x%08lx",
    addr, fsr, regs->ARM_pc, regs->ARM_lr);

    #if 0
    if (!get_and_clear_err())
    return -1;
    #endif

    /* Note: Only handle PCIESS module space access */
    if ((addr < reg_virt) || (addr >= (reg_virt + SZ_16K)))
    return -1;

    why occured OCP error???

  • Basically you are trying to read/write a memory address on the other end of the PCIe bus that does not exist.

  • But it should not have been a fatal error(OCP error) when I read the address!!!

    This address is valid for CPU,and this address is  L3  legal address.

  • About PCIE EP,  RC Non-posted read response timeout is :

    As we discussed and checked internally, our timeout setting is calculate by following steps.
    1. you read 0xbe488000 regsiter, you can see the lower 16 bits are 0x400, so the timeout value is 0x400 * 16 cycles,
    2. The freequency is calculated by your D7 DDR3 freq configuration, if it is 1600MHz, the register module's freq is divided by 4, 400MHz, then the timeout value is 0x400 * 16 /400M, approximate 40 us.

    From your description, your host timeout is only about 10us, there is a big dismatch between our configurations, can you enlarge the host timeout, it is the most safe way.

    How to let DM8168 PCIE memory non - posted don't read error?