123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362 |
- // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
- /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
- */
- #include "cmd.h"
- enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
- static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id,
- u16 *vhca_id);
- static void
- _mlx5vf_free_page_tracker_resources(struct mlx5vf_pci_core_device *mvdev);
- int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod)
- {
- u32 out[MLX5_ST_SZ_DW(suspend_vhca_out)] = {};
- u32 in[MLX5_ST_SZ_DW(suspend_vhca_in)] = {};
- lockdep_assert_held(&mvdev->state_mutex);
- if (mvdev->mdev_detach)
- return -ENOTCONN;
- MLX5_SET(suspend_vhca_in, in, opcode, MLX5_CMD_OP_SUSPEND_VHCA);
- MLX5_SET(suspend_vhca_in, in, vhca_id, mvdev->vhca_id);
- MLX5_SET(suspend_vhca_in, in, op_mod, op_mod);
- return mlx5_cmd_exec_inout(mvdev->mdev, suspend_vhca, in, out);
- }
- int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod)
- {
- u32 out[MLX5_ST_SZ_DW(resume_vhca_out)] = {};
- u32 in[MLX5_ST_SZ_DW(resume_vhca_in)] = {};
- lockdep_assert_held(&mvdev->state_mutex);
- if (mvdev->mdev_detach)
- return -ENOTCONN;
- MLX5_SET(resume_vhca_in, in, opcode, MLX5_CMD_OP_RESUME_VHCA);
- MLX5_SET(resume_vhca_in, in, vhca_id, mvdev->vhca_id);
- MLX5_SET(resume_vhca_in, in, op_mod, op_mod);
- return mlx5_cmd_exec_inout(mvdev->mdev, resume_vhca, in, out);
- }
- int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
- size_t *state_size)
- {
- u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {};
- u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {};
- int ret;
- lockdep_assert_held(&mvdev->state_mutex);
- if (mvdev->mdev_detach)
- return -ENOTCONN;
- MLX5_SET(query_vhca_migration_state_in, in, opcode,
- MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE);
- MLX5_SET(query_vhca_migration_state_in, in, vhca_id, mvdev->vhca_id);
- MLX5_SET(query_vhca_migration_state_in, in, op_mod, 0);
- ret = mlx5_cmd_exec_inout(mvdev->mdev, query_vhca_migration_state, in,
- out);
- if (ret)
- return ret;
- *state_size = MLX5_GET(query_vhca_migration_state_out, out,
- required_umem_size);
- return 0;
- }
- static void set_tracker_error(struct mlx5vf_pci_core_device *mvdev)
- {
- /* Mark the tracker under an error and wake it up if it's running */
- mvdev->tracker.is_err = true;
- complete(&mvdev->tracker_comp);
- }
- static int mlx5fv_vf_event(struct notifier_block *nb,
- unsigned long event, void *data)
- {
- struct mlx5vf_pci_core_device *mvdev =
- container_of(nb, struct mlx5vf_pci_core_device, nb);
- switch (event) {
- case MLX5_PF_NOTIFY_ENABLE_VF:
- mutex_lock(&mvdev->state_mutex);
- mvdev->mdev_detach = false;
- mlx5vf_state_mutex_unlock(mvdev);
- break;
- case MLX5_PF_NOTIFY_DISABLE_VF:
- mlx5vf_cmd_close_migratable(mvdev);
- mutex_lock(&mvdev->state_mutex);
- mvdev->mdev_detach = true;
- mlx5vf_state_mutex_unlock(mvdev);
- break;
- default:
- break;
- }
- return 0;
- }
- void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev)
- {
- if (!mvdev->migrate_cap)
- return;
- /* Must be done outside the lock to let it progress */
- set_tracker_error(mvdev);
- mutex_lock(&mvdev->state_mutex);
- mlx5vf_disable_fds(mvdev);
- _mlx5vf_free_page_tracker_resources(mvdev);
- mlx5vf_state_mutex_unlock(mvdev);
- }
- void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev)
- {
- if (!mvdev->migrate_cap)
- return;
- mlx5_sriov_blocking_notifier_unregister(mvdev->mdev, mvdev->vf_id,
- &mvdev->nb);
- destroy_workqueue(mvdev->cb_wq);
- }
- void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
- const struct vfio_migration_ops *mig_ops,
- const struct vfio_log_ops *log_ops)
- {
- struct pci_dev *pdev = mvdev->core_device.pdev;
- int ret;
- if (!pdev->is_virtfn)
- return;
- mvdev->mdev = mlx5_vf_get_core_dev(pdev);
- if (!mvdev->mdev)
- return;
- if (!MLX5_CAP_GEN(mvdev->mdev, migration))
- goto end;
- mvdev->vf_id = pci_iov_vf_id(pdev);
- if (mvdev->vf_id < 0)
- goto end;
- if (mlx5vf_cmd_get_vhca_id(mvdev->mdev, mvdev->vf_id + 1,
- &mvdev->vhca_id))
- goto end;
- mvdev->cb_wq = alloc_ordered_workqueue("mlx5vf_wq", 0);
- if (!mvdev->cb_wq)
- goto end;
- mutex_init(&mvdev->state_mutex);
- spin_lock_init(&mvdev->reset_lock);
- mvdev->nb.notifier_call = mlx5fv_vf_event;
- ret = mlx5_sriov_blocking_notifier_register(mvdev->mdev, mvdev->vf_id,
- &mvdev->nb);
- if (ret) {
- destroy_workqueue(mvdev->cb_wq);
- goto end;
- }
- mvdev->migrate_cap = 1;
- mvdev->core_device.vdev.migration_flags =
- VFIO_MIGRATION_STOP_COPY |
- VFIO_MIGRATION_P2P;
- mvdev->core_device.vdev.mig_ops = mig_ops;
- init_completion(&mvdev->tracker_comp);
- if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization))
- mvdev->core_device.vdev.log_ops = log_ops;
- end:
- mlx5_vf_put_core_dev(mvdev->mdev);
- }
- static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id,
- u16 *vhca_id)
- {
- u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
- int out_size;
- void *out;
- int ret;
- out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
- out = kzalloc(out_size, GFP_KERNEL);
- if (!out)
- return -ENOMEM;
- MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
- MLX5_SET(query_hca_cap_in, in, other_function, 1);
- MLX5_SET(query_hca_cap_in, in, function_id, function_id);
- MLX5_SET(query_hca_cap_in, in, op_mod,
- MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 |
- HCA_CAP_OPMOD_GET_CUR);
- ret = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
- if (ret)
- goto err_exec;
- *vhca_id = MLX5_GET(query_hca_cap_out, out,
- capability.cmd_hca_cap.vhca_id);
- err_exec:
- kfree(out);
- return ret;
- }
- static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
- struct mlx5_vf_migration_file *migf,
- struct mlx5_vhca_recv_buf *recv_buf,
- u32 *mkey)
- {
- size_t npages = migf ? DIV_ROUND_UP(migf->total_length, PAGE_SIZE) :
- recv_buf->npages;
- int err = 0, inlen;
- __be64 *mtt;
- void *mkc;
- u32 *in;
- inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
- sizeof(*mtt) * round_up(npages, 2);
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
- MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
- DIV_ROUND_UP(npages, 2));
- mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
- if (migf) {
- struct sg_dma_page_iter dma_iter;
- for_each_sgtable_dma_page(&migf->table.sgt, &dma_iter, 0)
- *mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter));
- } else {
- int i;
- for (i = 0; i < npages; i++)
- *mtt++ = cpu_to_be64(recv_buf->dma_addrs[i]);
- }
- mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
- MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
- MLX5_SET(mkc, mkc, lr, 1);
- MLX5_SET(mkc, mkc, lw, 1);
- MLX5_SET(mkc, mkc, rr, 1);
- MLX5_SET(mkc, mkc, rw, 1);
- MLX5_SET(mkc, mkc, pd, pdn);
- MLX5_SET(mkc, mkc, bsf_octword_size, 0);
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
- MLX5_SET(mkc, mkc, translations_octword_size, DIV_ROUND_UP(npages, 2));
- MLX5_SET64(mkc, mkc, len,
- migf ? migf->total_length : (npages * PAGE_SIZE));
- err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
- kvfree(in);
- return err;
- }
- void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)
- {
- struct mlx5vf_async_data *async_data = container_of(_work,
- struct mlx5vf_async_data, work);
- struct mlx5_vf_migration_file *migf = container_of(async_data,
- struct mlx5_vf_migration_file, async_data);
- struct mlx5_core_dev *mdev = migf->mvdev->mdev;
- mutex_lock(&migf->lock);
- if (async_data->status) {
- migf->is_err = true;
- wake_up_interruptible(&migf->poll_wait);
- }
- mutex_unlock(&migf->lock);
- mlx5_core_destroy_mkey(mdev, async_data->mkey);
- dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
- mlx5_core_dealloc_pd(mdev, async_data->pdn);
- kvfree(async_data->out);
- fput(migf->filp);
- }
- static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
- {
- struct mlx5vf_async_data *async_data = container_of(context,
- struct mlx5vf_async_data, cb_work);
- struct mlx5_vf_migration_file *migf = container_of(async_data,
- struct mlx5_vf_migration_file, async_data);
- if (!status) {
- WRITE_ONCE(migf->total_length,
- MLX5_GET(save_vhca_state_out, async_data->out,
- actual_image_size));
- wake_up_interruptible(&migf->poll_wait);
- }
- /*
- * The error and the cleanup flows can't run from an
- * interrupt context
- */
- async_data->status = status;
- queue_work(migf->mvdev->cb_wq, &async_data->work);
- }
- int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
- struct mlx5_vf_migration_file *migf)
- {
- u32 out_size = MLX5_ST_SZ_BYTES(save_vhca_state_out);
- u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
- struct mlx5vf_async_data *async_data;
- struct mlx5_core_dev *mdev;
- u32 pdn, mkey;
- int err;
- lockdep_assert_held(&mvdev->state_mutex);
- if (mvdev->mdev_detach)
- return -ENOTCONN;
- mdev = mvdev->mdev;
- err = mlx5_core_alloc_pd(mdev, &pdn);
- if (err)
- return err;
- err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE,
- 0);
- if (err)
- goto err_dma_map;
- err = _create_mkey(mdev, pdn, migf, NULL, &mkey);
- if (err)
- goto err_create_mkey;
- MLX5_SET(save_vhca_state_in, in, opcode,
- MLX5_CMD_OP_SAVE_VHCA_STATE);
- MLX5_SET(save_vhca_state_in, in, op_mod, 0);
- MLX5_SET(save_vhca_state_in, in, vhca_id, mvdev->vhca_id);
- MLX5_SET(save_vhca_state_in, in, mkey, mkey);
- MLX5_SET(save_vhca_state_in, in, size, migf->total_length);
- async_data = &migf->async_data;
- async_data->out = kvzalloc(out_size, GFP_KERNEL);
- if (!async_data->out) {
- err = -ENOMEM;
- goto err_out;
- }
- /* no data exists till the callback comes back */
- migf->total_length = 0;
- get_file(migf->filp);
- async_data->mkey = mkey;
- async_data->pdn = pdn;
- err = mlx5_cmd_exec_cb(&migf->async_ctx, in, sizeof(in),
- async_data->out,
- out_size, mlx5vf_save_callback,
- &async_data->cb_work);
- if (err)
- goto err_exec;
- return 0;
- err_exec:
- fput(migf->filp);
- kvfree(async_data->out);
- err_out:
- mlx5_core_destroy_mkey(mdev, mkey);
- err_create_mkey:
- dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
- err_dma_map:
- mlx5_core_dealloc_pd(mdev, pdn);
- return err;
- }
- int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
- struct mlx5_vf_migration_file *migf)
- {
- struct mlx5_core_dev *mdev;
- u32 out[MLX5_ST_SZ_DW(save_vhca_state_out)] = {};
- u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
- u32 pdn, mkey;
- int err;
- lockdep_assert_held(&mvdev->state_mutex);
- if (mvdev->mdev_detach)
- return -ENOTCONN;
- mutex_lock(&migf->lock);
- if (!migf->total_length) {
- err = -EINVAL;
- goto end;
- }
- mdev = mvdev->mdev;
- err = mlx5_core_alloc_pd(mdev, &pdn);
- if (err)
- goto end;
- err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0);
- if (err)
- goto err_reg;
- err = _create_mkey(mdev, pdn, migf, NULL, &mkey);
- if (err)
- goto err_mkey;
- MLX5_SET(load_vhca_state_in, in, opcode,
- MLX5_CMD_OP_LOAD_VHCA_STATE);
- MLX5_SET(load_vhca_state_in, in, op_mod, 0);
- MLX5_SET(load_vhca_state_in, in, vhca_id, mvdev->vhca_id);
- MLX5_SET(load_vhca_state_in, in, mkey, mkey);
- MLX5_SET(load_vhca_state_in, in, size, migf->total_length);
- err = mlx5_cmd_exec_inout(mdev, load_vhca_state, in, out);
- mlx5_core_destroy_mkey(mdev, mkey);
- err_mkey:
- dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0);
- err_reg:
- mlx5_core_dealloc_pd(mdev, pdn);
- end:
- mutex_unlock(&migf->lock);
- return err;
- }
- static void combine_ranges(struct rb_root_cached *root, u32 cur_nodes,
- u32 req_nodes)
- {
- struct interval_tree_node *prev, *curr, *comb_start, *comb_end;
- unsigned long min_gap;
- unsigned long curr_gap;
- /* Special shortcut when a single range is required */
- if (req_nodes == 1) {
- unsigned long last;
- curr = comb_start = interval_tree_iter_first(root, 0, ULONG_MAX);
- while (curr) {
- last = curr->last;
- prev = curr;
- curr = interval_tree_iter_next(curr, 0, ULONG_MAX);
- if (prev != comb_start)
- interval_tree_remove(prev, root);
- }
- comb_start->last = last;
- return;
- }
- /* Combine ranges which have the smallest gap */
- while (cur_nodes > req_nodes) {
- prev = NULL;
- min_gap = ULONG_MAX;
- curr = interval_tree_iter_first(root, 0, ULONG_MAX);
- while (curr) {
- if (prev) {
- curr_gap = curr->start - prev->last;
- if (curr_gap < min_gap) {
- min_gap = curr_gap;
- comb_start = prev;
- comb_end = curr;
- }
- }
- prev = curr;
- curr = interval_tree_iter_next(curr, 0, ULONG_MAX);
- }
- comb_start->last = comb_end->last;
- interval_tree_remove(comb_end, root);
- cur_nodes--;
- }
- }
- static int mlx5vf_create_tracker(struct mlx5_core_dev *mdev,
- struct mlx5vf_pci_core_device *mvdev,
- struct rb_root_cached *ranges, u32 nnodes)
- {
- int max_num_range =
- MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_max_num_range);
- struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker;
- int record_size = MLX5_ST_SZ_BYTES(page_track_range);
- u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
- struct interval_tree_node *node = NULL;
- u64 total_ranges_len = 0;
- u32 num_ranges = nnodes;
- u8 log_addr_space_size;
- void *range_list_ptr;
- void *obj_context;
- void *cmd_hdr;
- int inlen;
- void *in;
- int err;
- int i;
- if (num_ranges > max_num_range) {
- combine_ranges(ranges, nnodes, max_num_range);
- num_ranges = max_num_range;
- }
- inlen = MLX5_ST_SZ_BYTES(create_page_track_obj_in) +
- record_size * num_ranges;
- in = kzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
- cmd_hdr = MLX5_ADDR_OF(create_page_track_obj_in, in,
- general_obj_in_cmd_hdr);
- MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode,
- MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
- MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type,
- MLX5_OBJ_TYPE_PAGE_TRACK);
- obj_context = MLX5_ADDR_OF(create_page_track_obj_in, in, obj_context);
- MLX5_SET(page_track, obj_context, vhca_id, mvdev->vhca_id);
- MLX5_SET(page_track, obj_context, track_type, 1);
- MLX5_SET(page_track, obj_context, log_page_size,
- ilog2(tracker->host_qp->tracked_page_size));
- MLX5_SET(page_track, obj_context, log_msg_size,
- ilog2(tracker->host_qp->max_msg_size));
- MLX5_SET(page_track, obj_context, reporting_qpn, tracker->fw_qp->qpn);
- MLX5_SET(page_track, obj_context, num_ranges, num_ranges);
- range_list_ptr = MLX5_ADDR_OF(page_track, obj_context, track_range);
- node = interval_tree_iter_first(ranges, 0, ULONG_MAX);
- for (i = 0; i < num_ranges; i++) {
- void *addr_range_i_base = range_list_ptr + record_size * i;
- unsigned long length = node->last - node->start;
- MLX5_SET64(page_track_range, addr_range_i_base, start_address,
- node->start);
- MLX5_SET64(page_track_range, addr_range_i_base, length, length);
- total_ranges_len += length;
- node = interval_tree_iter_next(node, 0, ULONG_MAX);
- }
- WARN_ON(node);
- log_addr_space_size = ilog2(total_ranges_len);
- if (log_addr_space_size <
- (MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_min_addr_space)) ||
- log_addr_space_size >
- (MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_max_addr_space))) {
- err = -EOPNOTSUPP;
- goto out;
- }
- MLX5_SET(page_track, obj_context, log_addr_space_size,
- log_addr_space_size);
- err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
- if (err)
- goto out;
- tracker->id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
- out:
- kfree(in);
- return err;
- }
- static int mlx5vf_cmd_destroy_tracker(struct mlx5_core_dev *mdev,
- u32 tracker_id)
- {
- u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
- u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
- MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
- MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK);
- MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, tracker_id);
- return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
- }
- static int mlx5vf_cmd_modify_tracker(struct mlx5_core_dev *mdev,
- u32 tracker_id, unsigned long iova,
- unsigned long length, u32 tracker_state)
- {
- u32 in[MLX5_ST_SZ_DW(modify_page_track_obj_in)] = {};
- u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
- void *obj_context;
- void *cmd_hdr;
- cmd_hdr = MLX5_ADDR_OF(modify_page_track_obj_in, in, general_obj_in_cmd_hdr);
- MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
- MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK);
- MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, tracker_id);
- obj_context = MLX5_ADDR_OF(modify_page_track_obj_in, in, obj_context);
- MLX5_SET64(page_track, obj_context, modify_field_select, 0x3);
- MLX5_SET64(page_track, obj_context, range_start_address, iova);
- MLX5_SET64(page_track, obj_context, length, length);
- MLX5_SET(page_track, obj_context, state, tracker_state);
- return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
- }
- static int alloc_cq_frag_buf(struct mlx5_core_dev *mdev,
- struct mlx5_vhca_cq_buf *buf, int nent,
- int cqe_size)
- {
- struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
- u8 log_wq_stride = 6 + (cqe_size == 128 ? 1 : 0);
- u8 log_wq_sz = ilog2(cqe_size);
- int err;
- err = mlx5_frag_buf_alloc_node(mdev, nent * cqe_size, frag_buf,
- mdev->priv.numa_node);
- if (err)
- return err;
- mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
- buf->cqe_size = cqe_size;
- buf->nent = nent;
- return 0;
- }
- static void init_cq_frag_buf(struct mlx5_vhca_cq_buf *buf)
- {
- struct mlx5_cqe64 *cqe64;
- void *cqe;
- int i;
- for (i = 0; i < buf->nent; i++) {
- cqe = mlx5_frag_buf_get_wqe(&buf->fbc, i);
- cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
- cqe64->op_own = MLX5_CQE_INVALID << 4;
- }
- }
- static void mlx5vf_destroy_cq(struct mlx5_core_dev *mdev,
- struct mlx5_vhca_cq *cq)
- {
- mlx5_core_destroy_cq(mdev, &cq->mcq);
- mlx5_frag_buf_free(mdev, &cq->buf.frag_buf);
- mlx5_db_free(mdev, &cq->db);
- }
- static void mlx5vf_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type)
- {
- if (type != MLX5_EVENT_TYPE_CQ_ERROR)
- return;
- set_tracker_error(container_of(mcq, struct mlx5vf_pci_core_device,
- tracker.cq.mcq));
- }
- static int mlx5vf_event_notifier(struct notifier_block *nb, unsigned long type,
- void *data)
- {
- struct mlx5_vhca_page_tracker *tracker =
- mlx5_nb_cof(nb, struct mlx5_vhca_page_tracker, nb);
- struct mlx5vf_pci_core_device *mvdev = container_of(
- tracker, struct mlx5vf_pci_core_device, tracker);
- struct mlx5_eqe *eqe = data;
- u8 event_type = (u8)type;
- u8 queue_type;
- int qp_num;
- switch (event_type) {
- case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
- case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
- case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
- queue_type = eqe->data.qp_srq.type;
- if (queue_type != MLX5_EVENT_QUEUE_TYPE_QP)
- break;
- qp_num = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
- if (qp_num != tracker->host_qp->qpn &&
- qp_num != tracker->fw_qp->qpn)
- break;
- set_tracker_error(mvdev);
- break;
- default:
- break;
- }
- return NOTIFY_OK;
- }
- static void mlx5vf_cq_complete(struct mlx5_core_cq *mcq,
- struct mlx5_eqe *eqe)
- {
- struct mlx5vf_pci_core_device *mvdev =
- container_of(mcq, struct mlx5vf_pci_core_device,
- tracker.cq.mcq);
- complete(&mvdev->tracker_comp);
- }
- static int mlx5vf_create_cq(struct mlx5_core_dev *mdev,
- struct mlx5_vhca_page_tracker *tracker,
- size_t ncqe)
- {
- int cqe_size = cache_line_size() == 128 ? 128 : 64;
- u32 out[MLX5_ST_SZ_DW(create_cq_out)];
- struct mlx5_vhca_cq *cq;
- int inlen, err, eqn;
- void *cqc, *in;
- __be64 *pas;
- int vector;
- cq = &tracker->cq;
- ncqe = roundup_pow_of_two(ncqe);
- err = mlx5_db_alloc_node(mdev, &cq->db, mdev->priv.numa_node);
- if (err)
- return err;
- cq->ncqe = ncqe;
- cq->mcq.set_ci_db = cq->db.db;
- cq->mcq.arm_db = cq->db.db + 1;
- cq->mcq.cqe_sz = cqe_size;
- err = alloc_cq_frag_buf(mdev, &cq->buf, ncqe, cqe_size);
- if (err)
- goto err_db_free;
- init_cq_frag_buf(&cq->buf);
- inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
- MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) *
- cq->buf.frag_buf.npages;
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in) {
- err = -ENOMEM;
- goto err_buff;
- }
- vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev);
- err = mlx5_vector2eqn(mdev, vector, &eqn);
- if (err)
- goto err_vec;
- cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
- MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
- MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
- MLX5_SET(cqc, cqc, uar_page, tracker->uar->index);
- MLX5_SET(cqc, cqc, log_page_size, cq->buf.frag_buf.page_shift -
- MLX5_ADAPTER_PAGE_SHIFT);
- MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma);
- pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
- mlx5_fill_page_frag_array(&cq->buf.frag_buf, pas);
- cq->mcq.comp = mlx5vf_cq_complete;
- cq->mcq.event = mlx5vf_cq_event;
- err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out));
- if (err)
- goto err_vec;
- mlx5_cq_arm(&cq->mcq, MLX5_CQ_DB_REQ_NOT, tracker->uar->map,
- cq->mcq.cons_index);
- kvfree(in);
- return 0;
- err_vec:
- kvfree(in);
- err_buff:
- mlx5_frag_buf_free(mdev, &cq->buf.frag_buf);
- err_db_free:
- mlx5_db_free(mdev, &cq->db);
- return err;
- }
- static struct mlx5_vhca_qp *
- mlx5vf_create_rc_qp(struct mlx5_core_dev *mdev,
- struct mlx5_vhca_page_tracker *tracker, u32 max_recv_wr)
- {
- u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
- struct mlx5_vhca_qp *qp;
- u8 log_rq_stride;
- u8 log_rq_sz;
- void *qpc;
- int inlen;
- void *in;
- int err;
- qp = kzalloc(sizeof(*qp), GFP_KERNEL);
- if (!qp)
- return ERR_PTR(-ENOMEM);
- qp->rq.wqe_cnt = roundup_pow_of_two(max_recv_wr);
- log_rq_stride = ilog2(MLX5_SEND_WQE_DS);
- log_rq_sz = ilog2(qp->rq.wqe_cnt);
- err = mlx5_db_alloc_node(mdev, &qp->db, mdev->priv.numa_node);
- if (err)
- goto err_free;
- if (max_recv_wr) {
- err = mlx5_frag_buf_alloc_node(mdev,
- wq_get_byte_sz(log_rq_sz, log_rq_stride),
- &qp->buf, mdev->priv.numa_node);
- if (err)
- goto err_db_free;
- mlx5_init_fbc(qp->buf.frags, log_rq_stride, log_rq_sz, &qp->rq.fbc);
- }
- qp->rq.db = &qp->db.db[MLX5_RCV_DBR];
- inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
- MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
- qp->buf.npages;
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in) {
- err = -ENOMEM;
- goto err_in;
- }
- qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
- MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
- MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
- MLX5_SET(qpc, qpc, pd, tracker->pdn);
- MLX5_SET(qpc, qpc, uar_page, tracker->uar->index);
- MLX5_SET(qpc, qpc, log_page_size,
- qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
- MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev));
- if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
- MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
- MLX5_SET(qpc, qpc, no_sq, 1);
- if (max_recv_wr) {
- MLX5_SET(qpc, qpc, cqn_rcv, tracker->cq.mcq.cqn);
- MLX5_SET(qpc, qpc, log_rq_stride, log_rq_stride - 4);
- MLX5_SET(qpc, qpc, log_rq_size, log_rq_sz);
- MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
- MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
- mlx5_fill_page_frag_array(&qp->buf,
- (__be64 *)MLX5_ADDR_OF(create_qp_in,
- in, pas));
- } else {
- MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
- }
- MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
- err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
- kvfree(in);
- if (err)
- goto err_in;
- qp->qpn = MLX5_GET(create_qp_out, out, qpn);
- return qp;
- err_in:
- if (max_recv_wr)
- mlx5_frag_buf_free(mdev, &qp->buf);
- err_db_free:
- mlx5_db_free(mdev, &qp->db);
- err_free:
- kfree(qp);
- return ERR_PTR(err);
- }
- static void mlx5vf_post_recv(struct mlx5_vhca_qp *qp)
- {
- struct mlx5_wqe_data_seg *data;
- unsigned int ix;
- WARN_ON(qp->rq.pc - qp->rq.cc >= qp->rq.wqe_cnt);
- ix = qp->rq.pc & (qp->rq.wqe_cnt - 1);
- data = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ix);
- data->byte_count = cpu_to_be32(qp->max_msg_size);
- data->lkey = cpu_to_be32(qp->recv_buf.mkey);
- data->addr = cpu_to_be64(qp->recv_buf.next_rq_offset);
- qp->rq.pc++;
- /* Make sure that descriptors are written before doorbell record. */
- dma_wmb();
- *qp->rq.db = cpu_to_be32(qp->rq.pc & 0xffff);
- }
- static int mlx5vf_activate_qp(struct mlx5_core_dev *mdev,
- struct mlx5_vhca_qp *qp, u32 remote_qpn,
- bool host_qp)
- {
- u32 init_in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
- u32 rtr_in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
- u32 rts_in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
- void *qpc;
- int ret;
- /* Init */
- qpc = MLX5_ADDR_OF(rst2init_qp_in, init_in, qpc);
- MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, 1);
- MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED);
- MLX5_SET(qpc, qpc, rre, 1);
- MLX5_SET(qpc, qpc, rwe, 1);
- MLX5_SET(rst2init_qp_in, init_in, opcode, MLX5_CMD_OP_RST2INIT_QP);
- MLX5_SET(rst2init_qp_in, init_in, qpn, qp->qpn);
- ret = mlx5_cmd_exec_in(mdev, rst2init_qp, init_in);
- if (ret)
- return ret;
- if (host_qp) {
- struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf;
- int i;
- for (i = 0; i < qp->rq.wqe_cnt; i++) {
- mlx5vf_post_recv(qp);
- recv_buf->next_rq_offset += qp->max_msg_size;
- }
- }
- /* RTR */
- qpc = MLX5_ADDR_OF(init2rtr_qp_in, rtr_in, qpc);
- MLX5_SET(init2rtr_qp_in, rtr_in, qpn, qp->qpn);
- MLX5_SET(qpc, qpc, mtu, IB_MTU_4096);
- MLX5_SET(qpc, qpc, log_msg_max, MLX5_CAP_GEN(mdev, log_max_msg));
- MLX5_SET(qpc, qpc, remote_qpn, remote_qpn);
- MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, 1);
- MLX5_SET(qpc, qpc, primary_address_path.fl, 1);
- MLX5_SET(qpc, qpc, min_rnr_nak, 1);
- MLX5_SET(init2rtr_qp_in, rtr_in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
- MLX5_SET(init2rtr_qp_in, rtr_in, qpn, qp->qpn);
- ret = mlx5_cmd_exec_in(mdev, init2rtr_qp, rtr_in);
- if (ret || host_qp)
- return ret;
- /* RTS */
- qpc = MLX5_ADDR_OF(rtr2rts_qp_in, rts_in, qpc);
- MLX5_SET(rtr2rts_qp_in, rts_in, qpn, qp->qpn);
- MLX5_SET(qpc, qpc, retry_count, 7);
- MLX5_SET(qpc, qpc, rnr_retry, 7); /* Infinite retry if RNR NACK */
- MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */
- MLX5_SET(rtr2rts_qp_in, rts_in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
- MLX5_SET(rtr2rts_qp_in, rts_in, qpn, qp->qpn);
- return mlx5_cmd_exec_in(mdev, rtr2rts_qp, rts_in);
- }
- static void mlx5vf_destroy_qp(struct mlx5_core_dev *mdev,
- struct mlx5_vhca_qp *qp)
- {
- u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
- MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
- MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
- mlx5_cmd_exec_in(mdev, destroy_qp, in);
- mlx5_frag_buf_free(mdev, &qp->buf);
- mlx5_db_free(mdev, &qp->db);
- kfree(qp);
- }
- static void free_recv_pages(struct mlx5_vhca_recv_buf *recv_buf)
- {
- int i;
- /* Undo alloc_pages_bulk_array() */
- for (i = 0; i < recv_buf->npages; i++)
- __free_page(recv_buf->page_list[i]);
- kvfree(recv_buf->page_list);
- }
- static int alloc_recv_pages(struct mlx5_vhca_recv_buf *recv_buf,
- unsigned int npages)
- {
- unsigned int filled = 0, done = 0;
- int i;
- recv_buf->page_list = kvcalloc(npages, sizeof(*recv_buf->page_list),
- GFP_KERNEL);
- if (!recv_buf->page_list)
- return -ENOMEM;
- for (;;) {
- filled = alloc_pages_bulk_array(GFP_KERNEL, npages - done,
- recv_buf->page_list + done);
- if (!filled)
- goto err;
- done += filled;
- if (done == npages)
- break;
- }
- recv_buf->npages = npages;
- return 0;
- err:
- for (i = 0; i < npages; i++) {
- if (recv_buf->page_list[i])
- __free_page(recv_buf->page_list[i]);
- }
- kvfree(recv_buf->page_list);
- return -ENOMEM;
- }
- static int register_dma_recv_pages(struct mlx5_core_dev *mdev,
- struct mlx5_vhca_recv_buf *recv_buf)
- {
- int i, j;
- recv_buf->dma_addrs = kvcalloc(recv_buf->npages,
- sizeof(*recv_buf->dma_addrs),
- GFP_KERNEL);
- if (!recv_buf->dma_addrs)
- return -ENOMEM;
- for (i = 0; i < recv_buf->npages; i++) {
- recv_buf->dma_addrs[i] = dma_map_page(mdev->device,
- recv_buf->page_list[i],
- 0, PAGE_SIZE,
- DMA_FROM_DEVICE);
- if (dma_mapping_error(mdev->device, recv_buf->dma_addrs[i]))
- goto error;
- }
- return 0;
- error:
- for (j = 0; j < i; j++)
- dma_unmap_single(mdev->device, recv_buf->dma_addrs[j],
- PAGE_SIZE, DMA_FROM_DEVICE);
- kvfree(recv_buf->dma_addrs);
- return -ENOMEM;
- }
- static void unregister_dma_recv_pages(struct mlx5_core_dev *mdev,
- struct mlx5_vhca_recv_buf *recv_buf)
- {
- int i;
- for (i = 0; i < recv_buf->npages; i++)
- dma_unmap_single(mdev->device, recv_buf->dma_addrs[i],
- PAGE_SIZE, DMA_FROM_DEVICE);
- kvfree(recv_buf->dma_addrs);
- }
- static void mlx5vf_free_qp_recv_resources(struct mlx5_core_dev *mdev,
- struct mlx5_vhca_qp *qp)
- {
- struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf;
- mlx5_core_destroy_mkey(mdev, recv_buf->mkey);
- unregister_dma_recv_pages(mdev, recv_buf);
- free_recv_pages(&qp->recv_buf);
- }
- static int mlx5vf_alloc_qp_recv_resources(struct mlx5_core_dev *mdev,
- struct mlx5_vhca_qp *qp, u32 pdn,
- u64 rq_size)
- {
- unsigned int npages = DIV_ROUND_UP_ULL(rq_size, PAGE_SIZE);
- struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf;
- int err;
- err = alloc_recv_pages(recv_buf, npages);
- if (err < 0)
- return err;
- err = register_dma_recv_pages(mdev, recv_buf);
- if (err)
- goto end;
- err = _create_mkey(mdev, pdn, NULL, recv_buf, &recv_buf->mkey);
- if (err)
- goto err_create_mkey;
- return 0;
- err_create_mkey:
- unregister_dma_recv_pages(mdev, recv_buf);
- end:
- free_recv_pages(recv_buf);
- return err;
- }
- static void
- _mlx5vf_free_page_tracker_resources(struct mlx5vf_pci_core_device *mvdev)
- {
- struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker;
- struct mlx5_core_dev *mdev = mvdev->mdev;
- lockdep_assert_held(&mvdev->state_mutex);
- if (!mvdev->log_active)
- return;
- WARN_ON(mvdev->mdev_detach);
- mlx5_eq_notifier_unregister(mdev, &tracker->nb);
- mlx5vf_cmd_destroy_tracker(mdev, tracker->id);
- mlx5vf_destroy_qp(mdev, tracker->fw_qp);
- mlx5vf_free_qp_recv_resources(mdev, tracker->host_qp);
- mlx5vf_destroy_qp(mdev, tracker->host_qp);
- mlx5vf_destroy_cq(mdev, &tracker->cq);
- mlx5_core_dealloc_pd(mdev, tracker->pdn);
- mlx5_put_uars_page(mdev, tracker->uar);
- mvdev->log_active = false;
- }
- int mlx5vf_stop_page_tracker(struct vfio_device *vdev)
- {
- struct mlx5vf_pci_core_device *mvdev = container_of(
- vdev, struct mlx5vf_pci_core_device, core_device.vdev);
- mutex_lock(&mvdev->state_mutex);
- if (!mvdev->log_active)
- goto end;
- _mlx5vf_free_page_tracker_resources(mvdev);
- mvdev->log_active = false;
- end:
- mlx5vf_state_mutex_unlock(mvdev);
- return 0;
- }
- int mlx5vf_start_page_tracker(struct vfio_device *vdev,
- struct rb_root_cached *ranges, u32 nnodes,
- u64 *page_size)
- {
- struct mlx5vf_pci_core_device *mvdev = container_of(
- vdev, struct mlx5vf_pci_core_device, core_device.vdev);
- struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker;
- u8 log_tracked_page = ilog2(*page_size);
- struct mlx5_vhca_qp *host_qp;
- struct mlx5_vhca_qp *fw_qp;
- struct mlx5_core_dev *mdev;
- u32 max_msg_size = PAGE_SIZE;
- u64 rq_size = SZ_2M;
- u32 max_recv_wr;
- int err;
- mutex_lock(&mvdev->state_mutex);
- if (mvdev->mdev_detach) {
- err = -ENOTCONN;
- goto end;
- }
- if (mvdev->log_active) {
- err = -EINVAL;
- goto end;
- }
- mdev = mvdev->mdev;
- memset(tracker, 0, sizeof(*tracker));
- tracker->uar = mlx5_get_uars_page(mdev);
- if (IS_ERR(tracker->uar)) {
- err = PTR_ERR(tracker->uar);
- goto end;
- }
- err = mlx5_core_alloc_pd(mdev, &tracker->pdn);
- if (err)
- goto err_uar;
- max_recv_wr = DIV_ROUND_UP_ULL(rq_size, max_msg_size);
- err = mlx5vf_create_cq(mdev, tracker, max_recv_wr);
- if (err)
- goto err_dealloc_pd;
- host_qp = mlx5vf_create_rc_qp(mdev, tracker, max_recv_wr);
- if (IS_ERR(host_qp)) {
- err = PTR_ERR(host_qp);
- goto err_cq;
- }
- host_qp->max_msg_size = max_msg_size;
- if (log_tracked_page < MLX5_CAP_ADV_VIRTUALIZATION(mdev,
- pg_track_log_min_page_size)) {
- log_tracked_page = MLX5_CAP_ADV_VIRTUALIZATION(mdev,
- pg_track_log_min_page_size);
- } else if (log_tracked_page > MLX5_CAP_ADV_VIRTUALIZATION(mdev,
- pg_track_log_max_page_size)) {
- log_tracked_page = MLX5_CAP_ADV_VIRTUALIZATION(mdev,
- pg_track_log_max_page_size);
- }
- host_qp->tracked_page_size = (1ULL << log_tracked_page);
- err = mlx5vf_alloc_qp_recv_resources(mdev, host_qp, tracker->pdn,
- rq_size);
- if (err)
- goto err_host_qp;
- fw_qp = mlx5vf_create_rc_qp(mdev, tracker, 0);
- if (IS_ERR(fw_qp)) {
- err = PTR_ERR(fw_qp);
- goto err_recv_resources;
- }
- err = mlx5vf_activate_qp(mdev, host_qp, fw_qp->qpn, true);
- if (err)
- goto err_activate;
- err = mlx5vf_activate_qp(mdev, fw_qp, host_qp->qpn, false);
- if (err)
- goto err_activate;
- tracker->host_qp = host_qp;
- tracker->fw_qp = fw_qp;
- err = mlx5vf_create_tracker(mdev, mvdev, ranges, nnodes);
- if (err)
- goto err_activate;
- MLX5_NB_INIT(&tracker->nb, mlx5vf_event_notifier, NOTIFY_ANY);
- mlx5_eq_notifier_register(mdev, &tracker->nb);
- *page_size = host_qp->tracked_page_size;
- mvdev->log_active = true;
- mlx5vf_state_mutex_unlock(mvdev);
- return 0;
- err_activate:
- mlx5vf_destroy_qp(mdev, fw_qp);
- err_recv_resources:
- mlx5vf_free_qp_recv_resources(mdev, host_qp);
- err_host_qp:
- mlx5vf_destroy_qp(mdev, host_qp);
- err_cq:
- mlx5vf_destroy_cq(mdev, &tracker->cq);
- err_dealloc_pd:
- mlx5_core_dealloc_pd(mdev, tracker->pdn);
- err_uar:
- mlx5_put_uars_page(mdev, tracker->uar);
- end:
- mlx5vf_state_mutex_unlock(mvdev);
- return err;
- }
- static void
- set_report_output(u32 size, int index, struct mlx5_vhca_qp *qp,
- struct iova_bitmap *dirty)
- {
- u32 entry_size = MLX5_ST_SZ_BYTES(page_track_report_entry);
- u32 nent = size / entry_size;
- struct page *page;
- u64 addr;
- u64 *buf;
- int i;
- if (WARN_ON(index >= qp->recv_buf.npages ||
- (nent > qp->max_msg_size / entry_size)))
- return;
- page = qp->recv_buf.page_list[index];
- buf = kmap_local_page(page);
- for (i = 0; i < nent; i++) {
- addr = MLX5_GET(page_track_report_entry, buf + i,
- dirty_address_low);
- addr |= (u64)MLX5_GET(page_track_report_entry, buf + i,
- dirty_address_high) << 32;
- iova_bitmap_set(dirty, addr, qp->tracked_page_size);
- }
- kunmap_local(buf);
- }
- static void
- mlx5vf_rq_cqe(struct mlx5_vhca_qp *qp, struct mlx5_cqe64 *cqe,
- struct iova_bitmap *dirty, int *tracker_status)
- {
- u32 size;
- int ix;
- qp->rq.cc++;
- *tracker_status = be32_to_cpu(cqe->immediate) >> 28;
- size = be32_to_cpu(cqe->byte_cnt);
- ix = be16_to_cpu(cqe->wqe_counter) & (qp->rq.wqe_cnt - 1);
- /* zero length CQE, no data */
- WARN_ON(!size && *tracker_status == MLX5_PAGE_TRACK_STATE_REPORTING);
- if (size)
- set_report_output(size, ix, qp, dirty);
- qp->recv_buf.next_rq_offset = ix * qp->max_msg_size;
- mlx5vf_post_recv(qp);
- }
- static void *get_cqe(struct mlx5_vhca_cq *cq, int n)
- {
- return mlx5_frag_buf_get_wqe(&cq->buf.fbc, n);
- }
- static struct mlx5_cqe64 *get_sw_cqe(struct mlx5_vhca_cq *cq, int n)
- {
- void *cqe = get_cqe(cq, n & (cq->ncqe - 1));
- struct mlx5_cqe64 *cqe64;
- cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
- if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
- !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ncqe)))) {
- return cqe64;
- } else {
- return NULL;
- }
- }
- static int
- mlx5vf_cq_poll_one(struct mlx5_vhca_cq *cq, struct mlx5_vhca_qp *qp,
- struct iova_bitmap *dirty, int *tracker_status)
- {
- struct mlx5_cqe64 *cqe;
- u8 opcode;
- cqe = get_sw_cqe(cq, cq->mcq.cons_index);
- if (!cqe)
- return CQ_EMPTY;
- ++cq->mcq.cons_index;
- /*
- * Make sure we read CQ entry contents after we've checked the
- * ownership bit.
- */
- rmb();
- opcode = get_cqe_opcode(cqe);
- switch (opcode) {
- case MLX5_CQE_RESP_SEND_IMM:
- mlx5vf_rq_cqe(qp, cqe, dirty, tracker_status);
- return CQ_OK;
- default:
- return CQ_POLL_ERR;
- }
- }
- int mlx5vf_tracker_read_and_clear(struct vfio_device *vdev, unsigned long iova,
- unsigned long length,
- struct iova_bitmap *dirty)
- {
- struct mlx5vf_pci_core_device *mvdev = container_of(
- vdev, struct mlx5vf_pci_core_device, core_device.vdev);
- struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker;
- struct mlx5_vhca_cq *cq = &tracker->cq;
- struct mlx5_core_dev *mdev;
- int poll_err, err;
- mutex_lock(&mvdev->state_mutex);
- if (!mvdev->log_active) {
- err = -EINVAL;
- goto end;
- }
- if (mvdev->mdev_detach) {
- err = -ENOTCONN;
- goto end;
- }
- mdev = mvdev->mdev;
- err = mlx5vf_cmd_modify_tracker(mdev, tracker->id, iova, length,
- MLX5_PAGE_TRACK_STATE_REPORTING);
- if (err)
- goto end;
- tracker->status = MLX5_PAGE_TRACK_STATE_REPORTING;
- while (tracker->status == MLX5_PAGE_TRACK_STATE_REPORTING &&
- !tracker->is_err) {
- poll_err = mlx5vf_cq_poll_one(cq, tracker->host_qp, dirty,
- &tracker->status);
- if (poll_err == CQ_EMPTY) {
- mlx5_cq_arm(&cq->mcq, MLX5_CQ_DB_REQ_NOT, tracker->uar->map,
- cq->mcq.cons_index);
- poll_err = mlx5vf_cq_poll_one(cq, tracker->host_qp,
- dirty, &tracker->status);
- if (poll_err == CQ_EMPTY) {
- wait_for_completion(&mvdev->tracker_comp);
- continue;
- }
- }
- if (poll_err == CQ_POLL_ERR) {
- err = -EIO;
- goto end;
- }
- mlx5_cq_set_ci(&cq->mcq);
- }
- if (tracker->status == MLX5_PAGE_TRACK_STATE_ERROR)
- tracker->is_err = true;
- if (tracker->is_err)
- err = -EIO;
- end:
- mlx5vf_state_mutex_unlock(mvdev);
- return err;
- }
|