If we have only 2k RBs like on the latest (AX210) hardware, then
even on x86 where PAGE_SIZE is 4k we currently waste half of the
memory.
If this is the case, return partial pages from the allocator and
track the offset in each RBD (to be able to find the data in them
and remap them later.)
This might also address other platforms with larger PAGE_SIZE by
putting more RBs into a single large page.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
* @page: driver's pointer to the rxb page
* @invalid: rxb is in driver ownership - not owned by HW
* @vid: index of this rxb in the global table
* @page: driver's pointer to the rxb page
* @invalid: rxb is in driver ownership - not owned by HW
* @vid: index of this rxb in the global table
+ * @offset: indicates which offset of the page (in bytes)
+ * this buffer uses (if multiple RBs fit into one page)
*/
struct iwl_rx_mem_buffer {
dma_addr_t page_dma;
*/
struct iwl_rx_mem_buffer {
dma_addr_t page_dma;
u16 vid;
bool invalid;
struct list_head list;
u16 vid;
bool invalid;
struct list_head list;
* @in_rescan: true if we have triggered a device rescan
* @base_rb_stts: base virtual address of receive buffer status for all queues
* @base_rb_stts_dma: base physical address of receive buffer status
* @in_rescan: true if we have triggered a device rescan
* @base_rb_stts: base virtual address of receive buffer status for all queues
* @base_rb_stts_dma: base physical address of receive buffer status
+ * @supported_dma_mask: DMA mask to validate the actual address against,
+ * will be DMA_BIT_MASK(11) or DMA_BIT_MASK(12) depending on the device
+ * @alloc_page_lock: spinlock for the page allocator
+ * @alloc_page: allocated page to still use parts of
+ * @alloc_page_used: how much of the allocated page was already used (bytes)
*/
struct iwl_trans_pcie {
struct iwl_rxq *rxq;
*/
struct iwl_trans_pcie {
struct iwl_rxq *rxq;
bool pcie_dbg_dumped_once;
u32 rx_page_order;
u32 rx_buf_bytes;
bool pcie_dbg_dumped_once;
u32 rx_page_order;
u32 rx_buf_bytes;
+ u32 supported_dma_mask;
+
+ /* allocator lock for the two values below */
+ spinlock_t alloc_page_lock;
+ struct page *alloc_page;
+ u32 alloc_page_used;
/*protect hw register */
spinlock_t reg_lock;
/*protect hw register */
spinlock_t reg_lock;
static void iwl_pcie_rxmq_restock(struct iwl_trans *trans,
struct iwl_rxq *rxq)
{
static void iwl_pcie_rxmq_restock(struct iwl_trans *trans,
struct iwl_rxq *rxq)
{
+ struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
struct iwl_rx_mem_buffer *rxb;
/*
struct iwl_rx_mem_buffer *rxb;
/*
list);
list_del(&rxb->list);
rxb->invalid = false;
list);
list_del(&rxb->list);
rxb->invalid = false;
- /* 12 first bits are expected to be empty */
- WARN_ON(rxb->page_dma & DMA_BIT_MASK(12));
+ /* some low bits are expected to be unset (depending on hw) */
+ WARN_ON(rxb->page_dma & trans_pcie->supported_dma_mask);
/* Point to Rx buffer via next RBD in circular buffer */
iwl_pcie_restock_bd(trans, rxq, rxb);
rxq->write = (rxq->write + 1) & (rxq->queue_size - 1);
/* Point to Rx buffer via next RBD in circular buffer */
iwl_pcie_restock_bd(trans, rxq, rxb);
rxq->write = (rxq->write + 1) & (rxq->queue_size - 1);
*
*/
static struct page *iwl_pcie_rx_alloc_page(struct iwl_trans *trans,
*
*/
static struct page *iwl_pcie_rx_alloc_page(struct iwl_trans *trans,
+ u32 *offset, gfp_t priority)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+ unsigned int rbsize = iwl_trans_get_rb_size(trans_pcie->rx_buf_size);
+ unsigned int allocsize = PAGE_SIZE << trans_pcie->rx_page_order;
struct page *page;
gfp_t gfp_mask = priority;
if (trans_pcie->rx_page_order > 0)
gfp_mask |= __GFP_COMP;
struct page *page;
gfp_t gfp_mask = priority;
if (trans_pcie->rx_page_order > 0)
gfp_mask |= __GFP_COMP;
+ if (trans_pcie->alloc_page) {
+ spin_lock_bh(&trans_pcie->alloc_page_lock);
+ /* recheck */
+ if (trans_pcie->alloc_page) {
+ *offset = trans_pcie->alloc_page_used;
+ page = trans_pcie->alloc_page;
+ trans_pcie->alloc_page_used += rbsize;
+ if (trans_pcie->alloc_page_used >= allocsize)
+ trans_pcie->alloc_page = NULL;
+ else
+ get_page(page);
+ spin_unlock_bh(&trans_pcie->alloc_page_lock);
+ return page;
+ }
+ spin_unlock_bh(&trans_pcie->alloc_page_lock);
+ }
+
/* Alloc a new receive buffer */
page = alloc_pages(gfp_mask, trans_pcie->rx_page_order);
if (!page) {
/* Alloc a new receive buffer */
page = alloc_pages(gfp_mask, trans_pcie->rx_page_order);
if (!page) {
"Failed to alloc_pages\n");
return NULL;
}
"Failed to alloc_pages\n");
return NULL;
}
+
+ if (2 * rbsize <= allocsize) {
+ spin_lock_bh(&trans_pcie->alloc_page_lock);
+ if (!trans_pcie->alloc_page) {
+ get_page(page);
+ trans_pcie->alloc_page = page;
+ trans_pcie->alloc_page_used = rbsize;
+ }
+ spin_unlock_bh(&trans_pcie->alloc_page_lock);
+ }
+
+ *offset = 0;
struct page *page;
while (1) {
struct page *page;
while (1) {
spin_lock(&rxq->lock);
if (list_empty(&rxq->rx_used)) {
spin_unlock(&rxq->lock);
spin_lock(&rxq->lock);
if (list_empty(&rxq->rx_used)) {
spin_unlock(&rxq->lock);
}
spin_unlock(&rxq->lock);
}
spin_unlock(&rxq->lock);
- /* Alloc a new receive buffer */
- page = iwl_pcie_rx_alloc_page(trans, priority);
+ page = iwl_pcie_rx_alloc_page(trans, &offset, priority);
BUG_ON(rxb->page);
rxb->page = page;
BUG_ON(rxb->page);
rxb->page = page;
/* Get physical address of the RB */
rxb->page_dma =
/* Get physical address of the RB */
rxb->page_dma =
- dma_map_page(trans->dev, page, 0,
+ dma_map_page(trans->dev, page, rxb->offset,
trans_pcie->rx_buf_bytes,
DMA_FROM_DEVICE);
if (dma_mapping_error(trans->dev, rxb->page_dma)) {
trans_pcie->rx_buf_bytes,
DMA_FROM_DEVICE);
if (dma_mapping_error(trans->dev, rxb->page_dma)) {
BUG_ON(rxb->page);
/* Alloc a new receive buffer */
BUG_ON(rxb->page);
/* Alloc a new receive buffer */
- page = iwl_pcie_rx_alloc_page(trans, gfp_mask);
+ page = iwl_pcie_rx_alloc_page(trans, &rxb->offset,
+ gfp_mask);
if (!page)
continue;
rxb->page = page;
/* Get physical address of the RB */
if (!page)
continue;
rxb->page = page;
/* Get physical address of the RB */
- rxb->page_dma = dma_map_page(trans->dev, page, 0,
+ rxb->page_dma = dma_map_page(trans->dev, page,
+ rxb->offset,
trans_pcie->rx_buf_bytes,
DMA_FROM_DEVICE);
if (dma_mapping_error(trans->dev, rxb->page_dma)) {
trans_pcie->rx_buf_bytes,
DMA_FROM_DEVICE);
if (dma_mapping_error(trans->dev, rxb->page_dma)) {
kfree(trans_pcie->rx_pool);
kfree(trans_pcie->global_table);
kfree(trans_pcie->rxq);
kfree(trans_pcie->rx_pool);
kfree(trans_pcie->global_table);
kfree(trans_pcie->rxq);
+
+ if (trans_pcie->alloc_page)
+ __free_pages(trans_pcie->alloc_page, trans_pcie->rx_page_order);
}
static void iwl_pcie_rx_move_to_allocator(struct iwl_rxq *rxq,
}
static void iwl_pcie_rx_move_to_allocator(struct iwl_rxq *rxq,
bool reclaim;
int index, cmd_index, len;
struct iwl_rx_cmd_buffer rxcb = {
bool reclaim;
int index, cmd_index, len;
struct iwl_rx_cmd_buffer rxcb = {
+ ._offset = rxb->offset + offset,
._rx_page_order = trans_pcie->rx_page_order,
._page = rxb->page,
._page_stolen = false,
._rx_page_order = trans_pcie->rx_page_order,
._page = rxb->page,
._page_stolen = false,
* rx_free list for reuse later. */
if (rxb->page != NULL) {
rxb->page_dma =
* rx_free list for reuse later. */
if (rxb->page != NULL) {
rxb->page_dma =
- dma_map_page(trans->dev, rxb->page, 0,
+ dma_map_page(trans->dev, rxb->page, rxb->offset,
trans_pcie->rx_buf_bytes,
DMA_FROM_DEVICE);
if (dma_mapping_error(trans->dev, rxb->page_dma)) {
trans_pcie->rx_buf_bytes,
DMA_FROM_DEVICE);
if (dma_mapping_error(trans->dev, rxb->page_dma)) {
iwl_trans_get_rb_size_order(trans_pcie->rx_buf_size);
trans_pcie->rx_buf_bytes =
iwl_trans_get_rb_size(trans_pcie->rx_buf_size);
iwl_trans_get_rb_size_order(trans_pcie->rx_buf_size);
trans_pcie->rx_buf_bytes =
iwl_trans_get_rb_size(trans_pcie->rx_buf_size);
+ trans_pcie->supported_dma_mask = DMA_BIT_MASK(12);
+ if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210)
+ trans_pcie->supported_dma_mask = DMA_BIT_MASK(11);
trans_pcie->bc_table_dword = trans_cfg->bc_table_dword;
trans_pcie->scd_set_active = trans_cfg->scd_set_active;
trans_pcie->bc_table_dword = trans_cfg->bc_table_dword;
trans_pcie->scd_set_active = trans_cfg->scd_set_active;
rb->index = cpu_to_le32(i);
memcpy(rb->data, page_address(rxb->page), max_len);
/* remap the page for the free benefit */
rb->index = cpu_to_le32(i);
memcpy(rb->data, page_address(rxb->page), max_len);
/* remap the page for the free benefit */
- rxb->page_dma = dma_map_page(trans->dev, rxb->page, 0,
- max_len,
- DMA_FROM_DEVICE);
+ rxb->page_dma = dma_map_page(trans->dev, rxb->page,
+ rxb->offset, max_len,
+ DMA_FROM_DEVICE);
*data = iwl_fw_error_next_data(*data);
}
*data = iwl_fw_error_next_data(*data);
}
trans_pcie->opmode_down = true;
spin_lock_init(&trans_pcie->irq_lock);
spin_lock_init(&trans_pcie->reg_lock);
trans_pcie->opmode_down = true;
spin_lock_init(&trans_pcie->irq_lock);
spin_lock_init(&trans_pcie->reg_lock);
+ spin_lock_init(&trans_pcie->alloc_page_lock);
mutex_init(&trans_pcie->mutex);
init_waitqueue_head(&trans_pcie->ucode_write_waitq);
mutex_init(&trans_pcie->mutex);
init_waitqueue_head(&trans_pcie->ucode_write_waitq);