Discussion:
[PATCH v2 1/2] crypto: qat - Prevent dma mapping zero length assoc data
Tadeusz Struk
2014-10-14 01:24:26 UTC
Permalink
Do not attempt to dma map associated data if it is zero length.

Signed-off-by: Tadeusz Struk <***@intel.com>
---
drivers/crypto/qat/qat_common/qat_algs.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c
index 3e26fa2..bd4bd27 100644
--- a/drivers/crypto/qat/qat_common/qat_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_algs.c
@@ -608,6 +608,8 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst,
goto err;

for_each_sg(assoc, sg, assoc_n, i) {
+ if (!sg->length)
+ continue;
bufl->bufers[bufs].addr = dma_map_single(dev,
sg_virt(sg),
sg->length,
Tadeusz Struk
2014-10-14 01:24:32 UTC
Permalink
In a system with NUMA configuration we want to enforce that the accelerator is
connected to a node with memory to avoid cross QPI memory transaction.
Otherwise there is no point in using the accelerator as the encryption in
software will be faster.

Signed-off-by: Tadeusz Struk <***@intel.com>
---
drivers/crypto/qat/qat_common/adf_accel_devices.h | 3 +-
drivers/crypto/qat/qat_common/adf_transport.c | 12 +++++---
drivers/crypto/qat/qat_common/qat_algs.c | 5 ++-
drivers/crypto/qat/qat_common/qat_crypto.c | 8 +++--
drivers/crypto/qat/qat_dh895xcc/adf_admin.c | 2 +
drivers/crypto/qat/qat_dh895xcc/adf_drv.c | 32 ++++++++-------------
drivers/crypto/qat/qat_dh895xcc/adf_isr.c | 2 +
7 files changed, 30 insertions(+), 34 deletions(-)

diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h
index 9282381..fe7b3f0 100644
--- a/drivers/crypto/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h
@@ -198,8 +198,7 @@ struct adf_accel_dev {
struct dentry *debugfs_dir;
struct list_head list;
struct module *owner;
- uint8_t accel_id;
- uint8_t numa_node;
struct adf_accel_pci accel_pci_dev;
+ uint8_t accel_id;
} __packed;
#endif
diff --git a/drivers/crypto/qat/qat_common/adf_transport.c b/drivers/crypto/qat/qat_common/adf_transport.c
index 5f3fa45..9dd2cb7 100644
--- a/drivers/crypto/qat/qat_common/adf_transport.c
+++ b/drivers/crypto/qat/qat_common/adf_transport.c
@@ -419,9 +419,10 @@ static int adf_init_bank(struct adf_accel_dev *accel_dev,
WRITE_CSR_RING_BASE(csr_addr, bank_num, i, 0);
ring = &bank->rings[i];
if (hw_data->tx_rings_mask & (1 << i)) {
- ring->inflights = kzalloc_node(sizeof(atomic_t),
- GFP_KERNEL,
- accel_dev->numa_node);
+ ring->inflights =
+ kzalloc_node(sizeof(atomic_t),
+ GFP_KERNEL,
+ dev_to_node(&GET_DEV(accel_dev)));
if (!ring->inflights)
goto err;
} else {
@@ -469,13 +470,14 @@ int adf_init_etr_data(struct adf_accel_dev *accel_dev)
int i, ret;

etr_data = kzalloc_node(sizeof(*etr_data), GFP_KERNEL,
- accel_dev->numa_node);
+ dev_to_node(&GET_DEV(accel_dev)));
if (!etr_data)
return -ENOMEM;

num_banks = GET_MAX_BANKS(accel_dev);
size = num_banks * sizeof(struct adf_etr_bank_data);
- etr_data->banks = kzalloc_node(size, GFP_KERNEL, accel_dev->numa_node);
+ etr_data->banks = kzalloc_node(size, GFP_KERNEL,
+ dev_to_node(&GET_DEV(accel_dev)));
if (!etr_data->banks) {
ret = -ENOMEM;
goto err_bank;
diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c
index bd4bd27..7893a8b 100644
--- a/drivers/crypto/qat/qat_common/qat_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_algs.c
@@ -599,7 +599,8 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst,
if (unlikely(!n))
return -EINVAL;

- bufl = kmalloc_node(sz, GFP_ATOMIC, inst->accel_dev->numa_node);
+ bufl = kmalloc_node(sz, GFP_ATOMIC,
+ dev_to_node(&GET_DEV(inst->accel_dev)));
if (unlikely(!bufl))
return -ENOMEM;

@@ -645,7 +646,7 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst,
struct qat_alg_buf *bufers;

buflout = kmalloc_node(sz, GFP_ATOMIC,
- inst->accel_dev->numa_node);
+ dev_to_node(&GET_DEV(inst->accel_dev)));
if (unlikely(!buflout))
goto err;
bloutp = dma_map_single(dev, buflout, sz, DMA_TO_DEVICE);
diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c
index 0d59bcb..828f2a6 100644
--- a/drivers/crypto/qat/qat_common/qat_crypto.c
+++ b/drivers/crypto/qat/qat_common/qat_crypto.c
@@ -109,12 +109,14 @@ struct qat_crypto_instance *qat_crypto_get_instance_node(int node)

list_for_each(itr, adf_devmgr_get_head()) {
accel_dev = list_entry(itr, struct adf_accel_dev, list);
- if (accel_dev->numa_node == node && adf_dev_started(accel_dev))
+ if ((node == dev_to_node(&GET_DEV(accel_dev)) ||
+ dev_to_node(&GET_DEV(accel_dev)) < 0)
+ && adf_dev_started(accel_dev))
break;
accel_dev = NULL;
}
if (!accel_dev) {
- pr_err("QAT: Could not find device on give node\n");
+ pr_err("QAT: Could not find device on node %d\n", node);
accel_dev = adf_devmgr_get_first();
}
if (!accel_dev || !adf_dev_started(accel_dev))
@@ -164,7 +166,7 @@ static int qat_crypto_create_instances(struct adf_accel_dev *accel_dev)

for (i = 0; i < num_inst; i++) {
inst = kzalloc_node(sizeof(*inst), GFP_KERNEL,
- accel_dev->numa_node);
+ dev_to_node(&GET_DEV(accel_dev)));
if (!inst)
goto err;

diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_admin.c b/drivers/crypto/qat/qat_dh895xcc/adf_admin.c
index 978d6c5..53c491b 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_admin.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_admin.c
@@ -108,7 +108,7 @@ int adf_init_admin_comms(struct adf_accel_dev *accel_dev)
uint64_t reg_val;

admin = kzalloc_node(sizeof(*accel_dev->admin), GFP_KERNEL,
- accel_dev->numa_node);
+ dev_to_node(&GET_DEV(accel_dev)));
if (!admin)
return -ENOMEM;
admin->virt_addr = dma_zalloc_coherent(&GET_DEV(accel_dev), PAGE_SIZE,
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
index 0d0435a..948f66b 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
@@ -119,21 +119,6 @@ static void adf_cleanup_accel(struct adf_accel_dev *accel_dev)
kfree(accel_dev);
}

-static uint8_t adf_get_dev_node_id(struct pci_dev *pdev)
-{
- unsigned int bus_per_cpu = 0;
- struct cpuinfo_x86 *c = &cpu_data(num_online_cpus() - 1);
-
- if (!c->phys_proc_id)
- return 0;
-
- bus_per_cpu = 256 / (c->phys_proc_id + 1);
-
- if (bus_per_cpu != 0)
- return pdev->bus->number / bus_per_cpu;
- return 0;
-}
-
static int qat_dev_start(struct adf_accel_dev *accel_dev)
{
int cpus = num_online_cpus();
@@ -235,7 +220,6 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
void __iomem *pmisc_bar_addr = NULL;
char name[ADF_DEVICE_NAME_LENGTH];
unsigned int i, bar_nr;
- uint8_t node;
int ret;

switch (ent->device) {
@@ -246,12 +230,19 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
return -ENODEV;
}

- node = adf_get_dev_node_id(pdev);
- accel_dev = kzalloc_node(sizeof(*accel_dev), GFP_KERNEL, node);
+ if (num_possible_nodes() > 1 && dev_to_node(&pdev->dev) < 0) {
+ /* If the accelerator is connected to a node with no memory
+ * there is no point in using the accelerator since the remote
+ * memory transaction will be very slow. */
+ dev_err(&pdev->dev, "Invalid NUMA configuration.\n");
+ return -EINVAL;
+ }
+
+ accel_dev = kzalloc_node(sizeof(*accel_dev), GFP_KERNEL,
+ dev_to_node(&pdev->dev));
if (!accel_dev)
return -ENOMEM;

- accel_dev->numa_node = node;
INIT_LIST_HEAD(&accel_dev->crypto_list);

/* Add accel device to accel table.
@@ -264,7 +255,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)

accel_dev->owner = THIS_MODULE;
/* Allocate and configure device configuration structure */
- hw_data = kzalloc_node(sizeof(*hw_data), GFP_KERNEL, node);
+ hw_data = kzalloc_node(sizeof(*hw_data), GFP_KERNEL,
+ dev_to_node(&pdev->dev));
if (!hw_data) {
ret = -ENOMEM;
goto out_err;
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_isr.c b/drivers/crypto/qat/qat_dh895xcc/adf_isr.c
index 67ec61e..d96ee21 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_isr.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_isr.c
@@ -168,7 +168,7 @@ static int adf_isr_alloc_msix_entry_table(struct adf_accel_dev *accel_dev)
uint32_t msix_num_entries = hw_data->num_banks + 1;

entries = kzalloc_node(msix_num_entries * sizeof(*entries),
- GFP_KERNEL, accel_dev->numa_node);
+ GFP_KERNEL, dev_to_node(&GET_DEV(accel_dev)));
if (!entries)
return -ENOMEM;
Prarit Bhargava
2014-10-14 10:53:55 UTC
Permalink
On 10/13/2014 09:24 PM, Tadeusz Struk wrote:

<snip>
Post by Tadeusz Struk
- node = adf_get_dev_node_id(pdev);
- accel_dev = kzalloc_node(sizeof(*accel_dev), GFP_KERNEL, node);
+ if (num_possible_nodes() > 1 && dev_to_node(&pdev->dev) < 0) {
+ /* If the accelerator is connected to a node with no memory
+ * there is no point in using the accelerator since the remote
+ * memory transaction will be very slow. */
+ dev_err(&pdev->dev, "Invalid NUMA configuration.\n");
This is a lot better. Thank you for taking my comments into account here.

Let's say I have a non-functional qat device and I see the above message in
the boot log. The log doesn't say what to do ... so perhaps change it to

dev_err(&pdev->dev, FW_BUG "numa node is set to %d. This can be overridden by
using the numa_node module parameter.",
dev_to_node(&pdev->dev));

and add a numa_node module parameter to let the user set that at module load
time in case their FW is broken? I've found that sysadmins are knowledgeable
about these types of things these days and are more than capable of looking
at sysfs and numactl to determine where a device is.

P.
Tadeusz Struk
2014-10-14 14:50:03 UTC
Permalink
Post by Prarit Bhargava
Post by Tadeusz Struk
- node = adf_get_dev_node_id(pdev);
- accel_dev = kzalloc_node(sizeof(*accel_dev), GFP_KERNEL, node);
+ if (num_possible_nodes() > 1 && dev_to_node(&pdev->dev) < 0) {
+ /* If the accelerator is connected to a node with no memory
+ * there is no point in using the accelerator since the remote
+ * memory transaction will be very slow. */
+ dev_err(&pdev->dev, "Invalid NUMA configuration.\n");
This is a lot better. Thank you for taking my comments into account here.
Thanks for taking the time to review my patch and providing your comments.
Post by Prarit Bhargava
Let's say I have a non-functional qat device and I see the above message in
the boot log. The log doesn't say what to do ... so perhaps change it to
dev_err(&pdev->dev, FW_BUG "numa node is set to %d. This can be overridden by
using the numa_node module parameter.",
dev_to_node(&pdev->dev));
and add a numa_node module parameter to let the user set that at module load
time in case their FW is broken? I've found that sysadmins are knowledgeable
about these types of things these days and are more than capable of looking
at sysfs and numactl to determine where a device is.
But then what if there are two devices and each belongs to different
node. In this case we would fix one and break the other. I think if the
FW is broken then using on core encryption will be safer. If a sysadmins
is really knowledgeable, then she or he can change the code to customize
it for a given platform and rebuild the module.
Other than that as far as I know module parameters are not encouraged.
T
Prarit Bhargava
2014-10-14 15:41:17 UTC
Permalink
Post by Tadeusz Struk
Post by Prarit Bhargava
Post by Tadeusz Struk
- node = adf_get_dev_node_id(pdev);
- accel_dev = kzalloc_node(sizeof(*accel_dev), GFP_KERNEL, node);
+ if (num_possible_nodes() > 1 && dev_to_node(&pdev->dev) < 0) {
+ /* If the accelerator is connected to a node with no memory
+ * there is no point in using the accelerator since the remote
+ * memory transaction will be very slow. */
+ dev_err(&pdev->dev, "Invalid NUMA configuration.\n");
This is a lot better. Thank you for taking my comments into account here.
Thanks for taking the time to review my patch and providing your comments.
Post by Prarit Bhargava
Let's say I have a non-functional qat device and I see the above message in
the boot log. The log doesn't say what to do ... so perhaps change it to
dev_err(&pdev->dev, FW_BUG "numa node is set to %d. This can be overridden by
using the numa_node module parameter.",
dev_to_node(&pdev->dev));
and add a numa_node module parameter to let the user set that at module load
time in case their FW is broken? I've found that sysadmins are knowledgeable
about these types of things these days and are more than capable of looking
at sysfs and numactl to determine where a device is.
But then what if there are two devices and each belongs to different
node. In this case we would fix one and break the other. I think if the
Oh, that's a really good point. But can you at least change the message to do a
FW_BUG and dump the node information? That would be useful for debugging.

P.
Post by Tadeusz Struk
FW is broken then using on core encryption will be safer. If a sysadmins
is really knowledgeable, then she or he can change the code to customize
it for a given platform and rebuild the module.
Other than that as far as I know module parameters are not encouraged.
T
Tadeusz Struk
2014-10-14 17:18:12 UTC
Permalink
Post by Prarit Bhargava
Oh, that's a really good point. But can you at least change the message to do a
FW_BUG and dump the node information? That would be useful for debugging.
But this not always will be a FW_BUG. If a user will not populate one of
the nodes with memory this will happen as well. I could see this to be
the main reason of this message to be printed. In this case
num_possible_nodes() will be e.g. 2 and dev_to_node(&pdev->dev) will be
-1 so I don't really know what will be a useful info to print so we
don't confuse the user.
T
Prarit Bhargava
2014-10-14 17:27:27 UTC
Permalink
Post by Tadeusz Struk
Post by Prarit Bhargava
Oh, that's a really good point. But can you at least change the message to do a
FW_BUG and dump the node information? That would be useful for debugging.
But this not always will be a FW_BUG. If a user will not populate one of
the nodes with memory this will happen as well.
Hmmm ... let's maybe think about this. I wonder if there is some mechanism with
which we can determine that? Larry Woodman -- is there any mm related call that
we can make to determine if a node is memory-less?

I could see this to be
Post by Tadeusz Struk
the main reason of this message to be printed. In this case
num_possible_nodes() will be e.g. 2 and dev_to_node(&pdev->dev) will be
-1 so I don't really know what will be a useful info to print so we
don't confuse the user.
If you see -1, it means "No node was assigned" ... so -1 in a debug message is
okay IMO.

P.
Post by Tadeusz Struk
T
Prarit Bhargava
2014-10-14 17:32:56 UTC
Permalink
Post by Prarit Bhargava
Post by Tadeusz Struk
Post by Prarit Bhargava
Oh, that's a really good point. But can you at least change the message to do a
FW_BUG and dump the node information? That would be useful for debugging.
But this not always will be a FW_BUG. If a user will not populate one of
the nodes with memory this will happen as well.
Hmmm ... let's maybe think about this. I wonder if there is some mechanism with
which we can determine that? Larry Woodman -- is there any mm related call that
we can make to determine if a node is memory-less?
I could see this to be
Post by Tadeusz Struk
the main reason of this message to be printed. In this case
num_possible_nodes() will be e.g. 2 and dev_to_node(&pdev->dev) will be
-1 so I don't really know what will be a useful info to print so we
don't confuse the user.
If you see -1, it means "No node was assigned" ... so -1 in a debug message is
okay IMO.
Never mind -- I'm not thinking straight after a long weekend :) This is all
okay. The message above will only print iff node < 0, ie) -1.

So I'll ack shortly.

P.
Post by Prarit Bhargava
P.
Post by Tadeusz Struk
T
Nikolay Aleksandrov
2014-10-15 10:35:43 UTC
Permalink
Hi,
These two patches fix invalid (zero length) dma mapping and
enforce numa configuration for maximum performance.
v2 - Removed numa node calculation based bus number and use predefined
functions instead.
---
crypto: qat - Prevent dma mapping zero length assoc data
crypto: qat - Enforce valid numa configuration
drivers/crypto/qat/qat_common/adf_accel_devices.h | 3 +-
drivers/crypto/qat/qat_common/adf_transport.c | 12 +++++---
drivers/crypto/qat/qat_common/qat_algs.c | 7 +++--
drivers/crypto/qat/qat_common/qat_crypto.c | 8 +++--
drivers/crypto/qat/qat_dh895xcc/adf_admin.c | 2 +
drivers/crypto/qat/qat_dh895xcc/adf_drv.c | 32 ++++++++-------------
drivers/crypto/qat/qat_dh895xcc/adf_isr.c | 2 +
7 files changed, 32 insertions(+), 34 deletions(-)
I just gave a quick run of these patches and they seem to fix the NUMA issue and
the 0 length warnings.

Tested-by: Nikolay Aleksandrov <***@redhat.com>
Prarit Bhargava
2014-10-15 11:25:45 UTC
Permalink
Post by Nikolay Aleksandrov
Hi,
These two patches fix invalid (zero length) dma mapping and
enforce numa configuration for maximum performance.
v2 - Removed numa node calculation based bus number and use predefined
functions instead.
---
crypto: qat - Prevent dma mapping zero length assoc data
crypto: qat - Enforce valid numa configuration
drivers/crypto/qat/qat_common/adf_accel_devices.h | 3 +-
drivers/crypto/qat/qat_common/adf_transport.c | 12 +++++---
drivers/crypto/qat/qat_common/qat_algs.c | 7 +++--
drivers/crypto/qat/qat_common/qat_crypto.c | 8 +++--
drivers/crypto/qat/qat_dh895xcc/adf_admin.c | 2 +
drivers/crypto/qat/qat_dh895xcc/adf_drv.c | 32 ++++++++-------------
drivers/crypto/qat/qat_dh895xcc/adf_isr.c | 2 +
7 files changed, 32 insertions(+), 34 deletions(-)
I just gave a quick run of these patches and they seem to fix the NUMA issue and
the 0 length warnings.
Thanks Nik :)

Reviewed-by: Prarit Bhargava <***@redhat.com>

P.
Tadeusz Struk
2014-10-15 16:24:30 UTC
Permalink
Post by Prarit Bhargava
Post by Nikolay Aleksandrov
I just gave a quick run of these patches and they seem to fix the NUMA issue and
Post by Nikolay Aleksandrov
the 0 length warnings.
Thanks Nik :)
Thank you Nik and Prarit

Loading...