/******************************************************************************
 * Copyright (c) 2011-2012 Texas Instruments Incorporated - http://www.ti.com
 * Copyright (c) 2017 Appear TV AS - http://appeartv.com
 *
 * This code is a derivative work of the PCIe linux host loader
 * from Texas Instruments Processor SDK available at:
 * http://www.ti.com/tool/processor-sdk-c667x
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation version 2.
 *
 * This program is distributed "as is" WITHOUT ANY WARRANTY of any
 * kind, whether express or implied; without even the implied warranty
 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *****************************************************************************/


#include "pcie.h"
#include "../common/device.h"
#include "pci_registers.h"

#include <asm/uaccess.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/types.h>


struct dsp_region_s {
    const char *name;
    const uint32_t ib_offset;
    resource_size_t base_addr;
    resource_size_t length;
    uint32_t *virtual_ptr;
};

enum dsp_region_index {
    APP_REGION,
    DEV_REGION,
    LL2_REGION,
    MSM_REGION,
    DDR_REGION,
    NUM_REGIONS,
};

enum dsp_region_ib_index {
    /* APP_REGION (BAR0) has hardwired IB translation, hence no IB index. */
    DEV_REGION_IB,
    LL2_REGION_IB,
    MSM_REGION_IB,
    DDR_REGION_IB,
    NUM_REGION_IBS,
};

struct dsp_region_s dsp_regions[NUM_REGIONS] = {
    {
        .name = "DSPLINK_app",
        .ib_offset = PCIE_BASE_ADDRESS,
    },
    {
        .name = "DSPLINK_dev",
        .ib_offset = CHIP_LEVEL_BASE_ADDRESS,
    },
    {
        .name = "DSPLINK_ll2",
        .ib_offset = LL2_START_CORE(0),
    },
    {
        .name = "DSPLINK_msm",
        .ib_offset = MSM_START,
    },
    {
        .name = "DSPLINK_ddr",
        .ib_offset = DDR_START,
    },
};


static DEFINE_MUTEX(dev_mutex);
static DEFINE_MUTEX(ll2_mutex);
struct pci_dev *pci_dev = NULL;


/**
 * Find our PCI device
 */
static void _find_pci_device(void)
{
    struct pci_dev *dev = NULL;

    while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
        /* TODO: Check revision (and class?) also */
        if ((dev->vendor == ATVDSP_PCI_VENDOR_ID) && (dev->device == ATVDSP_PCI_DEVICE_ID)) {
            printk("Found PCI device\n");
            pci_dev = dev;
            printk("PCI device: vendor=0x%04x, dev=0x%04x, irq=0x%08x\n", dev->vendor, dev->device, dev->irq);
            break;
        }
    }
}


/**
 * Read BARs to set up region structs
 */
static void _read_pci_bars(void)
{
    struct dsp_region_s *dsp_region;
    uint8_t i;

    for (i = 0; i < NUM_REGIONS; i++) {
        dsp_region = &dsp_regions[i];

        dsp_region->length = pci_resource_len(pci_dev, i);

        if (pci_resource_flags(pci_dev, i) & IORESOURCE_MEM) {
            dsp_region->base_addr = pci_resource_start(pci_dev, i);
            request_mem_region(dsp_region->base_addr, dsp_region->length, dsp_region->name);
        } else {
            /* TODO: figure out if this is correct. this is the effect of old code */
            dsp_region->base_addr = 0;
            request_region(dsp_region->base_addr, dsp_region->length, dsp_region->name);
        }

        /* TODO: figoure out if this check is needed, or if memory vs port check is enough */
        if (dsp_region->base_addr > 0) {
            dsp_region->virtual_ptr = ioremap(dsp_region->base_addr, dsp_region->length);
        } else {
            dsp_region->virtual_ptr = NULL;
        }
    }
}


/**
 * Read or write DSP device/peripheral register
 *
 * Note that MST_PRIV bit must be set in order to access registers via PCIe.
 * Currently, atvdsp_pcie_init() sets the MST_PRIV bit.
 */
static int64_t _read_write_registers(uint32_t base_address, const loff_t offset, char *user_buffer,
        const size_t size, bool write)
{
    uint32_t block[BLOCK_TRANSFER_SIZE / 4];
    size_t block_size;
    uint32_t i;
    uint32_t *ptr;
    size_t remaining;
    int64_t ret_val;

    if (size == 0) {
        return 0;
    }

    if (size % 4) {
        printk(KERN_ERR "atvdsp pcie: size of register data is not a multiple of 32 bits!\n");
        return -EINVAL;
    }

    if (base_address % 4) {
        printk(KERN_ERR "atvdsp pcie: register base address is not 32-bit aligned!\n");
        return -EINVAL;
    }

    if (offset % 4) {
        printk(KERN_ERR "atvdsp pcie: offset of register data is not 32-bit aligned!\n");
        return -EINVAL;
    }

    if ((offset + size) > (32 << 10)) {
        printk(KERN_ERR "atvdsp pcie: access will be past end of register region!\n");
        return -EINVAL;
    }

    mutex_lock(&dev_mutex);
    iowrite32(base_address, dsp_regions[APP_REGION].virtual_ptr + IB_OFFSET(DEV_REGION_IB) / 4);

    ret_val = size;
    ptr = dsp_regions[DEV_REGION].virtual_ptr + offset / 4;

    remaining = size;
    while (remaining) {
        if (remaining > sizeof(block)) {
            block_size = sizeof(block);
        } else {
            block_size = remaining;
        }

        /* TODO: figure out if endian-conversions are necessary here */
        if (write) {
            if (copy_from_user(block, user_buffer, block_size)) {
                printk(KERN_ERR "atvdsp pcie: copy from user space failed!\n");
                ret_val = -EFAULT;
                goto return_now;
            }

            for (i = 0; i < (block_size / 4); i++) {
                iowrite32(block[i], ptr++);
            }
        } else {
            for (i = 0; i < (block_size / 4); i++) {
                block[i] = ioread32(ptr++);
            }

            if (copy_to_user(user_buffer, block, block_size)) {
                printk(KERN_ERR "atvdsp pcie: copy to user space failed!\n");
                ret_val = -EFAULT;
                goto return_now;
            }
        }

        user_buffer += block_size;
        remaining -= block_size;
    }

return_now:
    mutex_unlock(&dev_mutex);

    return ret_val;
}


int64_t atvdsp_read_registers(uint32_t base_address, const loff_t offset,
        char *user_buffer, const size_t size)
{
    printk(KERN_DEBUG "atvdsp pcie: reading %lu bytes from 0x%llx to 0x%lx", size, base_address + offset, (uintptr_t)user_buffer);
    return _read_write_registers(base_address, offset, user_buffer, size, false);
}


int64_t atvdsp_write_registers(uint32_t base_address, const loff_t offset,
        const char *user_buffer, const size_t size)
{
    printk(KERN_DEBUG "atvdsp pcie: writing %lu bytes to 0x%llx from 0x%lx", size, base_address + offset, (uintptr_t)user_buffer);
    return _read_write_registers(base_address, offset, (char *)user_buffer, size, true);
}


/**
 * Read or write DSP memory
*/
static int64_t _read_write_memory(dsp_mem_type_t mem_type, const loff_t offset, char *user_buffer,
        const size_t size, bool write)
{
    uint32_t block[BLOCK_TRANSFER_SIZE / 4];
    size_t block_size;
    uint32_t i;
    bool ll2_locked;
    uint32_t *ptr;
    size_t remaining;
    int64_t ret_val;

    if (size == 0) {
        return 0;
    }

    if (size % 4) {
        printk(KERN_ERR "atvdsp pcie: size of memory data is not a multiple of 32 bits!\n");
        return -EINVAL;
    }

    if (offset % 4) {
        printk(KERN_ERR "atvdsp pcie: offset of memory data is not 32-bit aligned!\n");
        return -EINVAL;
    }

    ll2_locked = false;
    ret_val = size;

    switch (mem_type) {
        case LL2_CORE0:
        case LL2_CORE1:
        case LL2_CORE2:
        case LL2_CORE3:
        case LL2_CORE4:
        case LL2_CORE5:
        case LL2_CORE6:
        case LL2_CORE7:
            if  ((offset + size) > LL2_SIZE) {
                printk(KERN_ERR "atvdsp pcie: access will be past end of LL2!\n");
                return -EINVAL;
            }
            /* prevent others from changing IB translation until we're done */
            mutex_lock(&ll2_mutex);
            ll2_locked = true;

            iowrite32(LL2_START_CORE(mem_type - LL2_CORE0), dsp_regions[APP_REGION].virtual_ptr + IB_OFFSET(LL2_REGION_IB) / 4);
            ptr = dsp_regions[LL2_REGION].virtual_ptr  + offset / 4;
            break;

        case MSM:
            if  ((offset + size) > MSM_SIZE) {
                printk(KERN_ERR "atvdsp pcie: access will be past end of MSM!\n");
                return -EINVAL;
            }
            ptr = dsp_regions[MSM_REGION].virtual_ptr + offset / 4;
            break;

        case DDR:
            if  ((offset + size) > DDR_SIZE) {
                printk(KERN_ERR "atvdsp pcie: access will be past end of DDR!\n");
                return -EINVAL;
            }
            ptr = dsp_regions[DDR_REGION].virtual_ptr + offset / 4;
            break;

        default:
            printk(KERN_ERR "atvdsp pcie: got invalid memory type %d!\n", mem_type);
            return -EINVAL;
    }

    remaining = size;
    while (remaining) {
        if (remaining > sizeof(block)) {
            block_size = sizeof(block);
        } else {
            block_size = remaining;
        }

        /* TODO: figure out if endian-conversions are necessary here */
        if (write) {
            if (copy_from_user(block, user_buffer, block_size)) {
                printk(KERN_ERR "atvdsp pcie: copy from user space failed!\n");
                ret_val = -EFAULT;
                goto return_now;
            }

            for (i = 0; i < (block_size / 4); i++) {
                iowrite32(block[i], ptr++);
            }
        } else {
            for (i = 0; i < (block_size / 4); i++) {
                block[i] = ioread32(ptr++);
            }

            if (copy_to_user(user_buffer, block, block_size)) {
                printk(KERN_ERR "atvdsp pcie: copy to user space failed!\n");
                ret_val = -EFAULT;
                goto return_now;
            }
        }

        user_buffer += block_size;
        remaining -= block_size;
    }

return_now:
    if (ll2_locked) {
        mutex_unlock(&ll2_mutex);
    }

    return ret_val;
}

int64_t atvdsp_read_memory(dsp_mem_type_t mem_type, const loff_t offset,
        char *user_buffer, const size_t size)
{
    printk(KERN_DEBUG "atvdsp pcie: reading %lu bytes from 0x%llx of mem %d to 0x%lx", size, offset, mem_type, (uintptr_t)user_buffer);
    return _read_write_memory(mem_type, offset, user_buffer, size, false);
}


int64_t atvdsp_write_memory(dsp_mem_type_t mem_type, const loff_t offset,
        const char *user_buffer, const size_t size)
{
    printk(KERN_DEBUG "atvdsp pcie: writing %lu bytes to 0x%llx of mem %d from 0x%lx", size, offset, mem_type, (uintptr_t)user_buffer);
    return _read_write_memory(mem_type, offset, (char *)user_buffer, size, true);
}


/**
 * Initialize the PCI device, grab resources
 */
int atvdsp_pcie_init(void)
{
    uint32_t i;
    uint32_t *reg_ptr;
    int ret_val;

    printk(KERN_DEBUG "Finding the device...\n");
    _find_pci_device();

    if (pci_dev == NULL) {
        printk(KERN_ERR "PCIe device not found!\n");
        return -ENODEV;
    }

    printk(KERN_DEBUG "Reading the BAR areas...\n");
    _read_pci_bars();

    printk(KERN_DEBUG "Enabling the device...\n");
    /* Linux Function: Initialize device before it's used by a driver */
    ret_val = pci_enable_device(pci_dev);
    if (ret_val != 0) {
        printk(KERN_ERR "Got error %d when enabling the device!\n", ret_val);
        return ret_val;
    }

    /* TODO: figure out if this is necessary -- do something more useful with it? */
    /* Linux Function: Associates the given data with the given pci_driver structure */
    pci_set_drvdata(pci_dev, dsp_regions[LL2_REGION].virtual_ptr);

    /* TODO: uncomment if DMA support needed
    _enable_bus_master(); */

    printk(KERN_DEBUG "Setting up IB translation...\n");
    /* Pointing to the beginning of the application registers */
    reg_ptr = dsp_regions[APP_REGION].virtual_ptr; // (uint32_t *)appRegVirt;

	/* Set MST_PRIV bit to access device registers via PCIe */
	iowrite32(((ioread32(reg_ptr + PRIORITY / 4)) | 0x00010000), reg_ptr + PRIORITY / 4);

    /**
    * Set up IB translations:
    * Skip APP region (BAR0) because it has hardwired IB translation.
    * Use IB0 for DEV region (BAR1),
    * IB1 for LL2 region (BAR2),
    * IB2 for MSM region (BAR3) and
    * IB3 for DDR region (BAR4/5).
    */
    for (i = 0; i < NUM_REGION_IBS; i++) {
        /* IB<i>_BAR shall be set to BAR<i+1> */
        iowrite32(i + 1, reg_ptr + IB_BAR(i)/4);

        /* Set IB start address equal to BAR */
        iowrite32(pci_dev->resource[i + 1].start & 0xFFFFFFFF, reg_ptr + IB_START_LO(i)/4);
        iowrite32(pci_dev->resource[i + 1].start >> 32, reg_ptr + IB_START_HI(i)/4);

        /* Set IB offset address to what's configured -- note that we skip APP_REGION */
        iowrite32(dsp_regions[i + 1].ib_offset, reg_ptr + IB_OFFSET(i)/4);
    }

    /* TODO: uncomment if DMA support needed
    printk(KERN_DEBUG "Registering the irq %d...\n", pci_dev_irq);
    request_irq(pci_dev_irq, _pci_irq_handler, IRQF_SHARED, "ATVDSP PCI", &dummy);
    _enable_pci_interrupt(); */

    return 0;
}


/**
 * Deinitialize the PCI device, free resources
 */
void atvdsp_pcie_deinit(void)
{
    uint8_t i;
    struct dsp_region_s *dsp_region;

    if (pci_dev != NULL) {
        /* TODO: uncomment if DMA support needed
        _disable_pci_interrupt(); */

        for (i = 0; i < NUM_REGIONS; i++) {
            dsp_region = &dsp_regions[i];

            iounmap(dsp_region->virtual_ptr);
            if (pci_resource_flags(pci_dev, i) & IORESOURCE_MEM) {
                release_mem_region(dsp_region->base_addr, dsp_region->length);
            } else {
                release_region(dsp_region->base_addr, dsp_region->length);
            }
        }

        /* TODO: uncomment if DMA support needed
        free_irq(pci_dev_irq, &dummy); */
    }
}
