Skip to content

Commit aeb713f

Browse files
authored
Merge pull request open-mpi#12976 from hppritcha/fix_for_issue10895_v50x
comm_create_from_groups: cherrypick two commits from main to fix mpi4py comm create from group etc tests
2 parents 428650e + cb77d84 commit aeb713f

File tree

7 files changed

+286
-61
lines changed

7 files changed

+286
-61
lines changed

ompi/communicator/comm.c

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
* Copyright (c) 2015 Mellanox Technologies. All rights reserved.
2525
* Copyright (c) 2017-2022 IBM Corporation. All rights reserved.
2626
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
27-
* Copyright (c) 2018-2022 Triad National Security, LLC. All rights
27+
* Copyright (c) 2018-2024 Triad National Security, LLC. All rights
2828
* reserved.
2929
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
3030
* $COPYRIGHT$
@@ -1741,7 +1741,7 @@ int ompi_intercomm_create_from_groups (ompi_group_t *local_group, int local_lead
17411741
ompi_communicator_t **newintercomm)
17421742
{
17431743
ompi_communicator_t *newcomp = NULL, *local_comm, *leader_comm = MPI_COMM_NULL;
1744-
ompi_comm_extended_cid_block_t new_block;
1744+
ompi_comm_extended_cid_block_t new_block = {0};
17451745
bool i_am_leader = local_leader == local_group->grp_my_rank;
17461746
ompi_proc_t **rprocs;
17471747
uint64_t data[4];
@@ -1787,22 +1787,22 @@ int ompi_intercomm_create_from_groups (ompi_group_t *local_group, int local_lead
17871787
leader_procs[1] = tmp;
17881788
}
17891789

1790-
/* create a unique tag for allocating the leader communicator. we can eliminate this step
1791-
* if we take a CID from the newly allocated block belonging to local_comm. this is
1792-
* a note to make this change at a later time. */
1793-
opal_asprintf (&sub_tag, "%s-OMPIi-LC", tag);
1794-
if (OPAL_UNLIKELY(NULL == sub_tag)) {
1795-
ompi_comm_free (&local_comm);
1790+
leader_group = ompi_group_allocate_plist_w_procs (NULL, leader_procs, 2);
1791+
ompi_set_group_rank (leader_group, my_proc);
1792+
if (OPAL_UNLIKELY(NULL == leader_group)) {
17961793
free(leader_procs);
1794+
ompi_comm_free (&local_comm);
17971795
return OMPI_ERR_OUT_OF_RESOURCE;
17981796
}
17991797

1800-
leader_group = ompi_group_allocate_plist_w_procs (NULL, leader_procs, 2);
1801-
ompi_set_group_rank (leader_group, my_proc);
1802-
if (OPAL_UNLIKELY(NULL == leader_group)) {
1803-
free (sub_tag);
1798+
/* create a unique tag for allocating the leader communicator. we can eliminate this step
1799+
* if we take a CID from the newly allocated block belonging to local_comm. this is
1800+
* a note to make this change at a later time. */
1801+
opal_asprintf (&sub_tag, "%s-OMPIi-LC-%s", tag, OPAL_NAME_PRINT(ompi_group_get_proc_name (leader_group, 0)));
1802+
if (OPAL_UNLIKELY(NULL == sub_tag)) {
18041803
free(leader_procs);
18051804
ompi_comm_free (&local_comm);
1805+
OBJ_RELEASE(leader_group);
18061806
return OMPI_ERR_OUT_OF_RESOURCE;
18071807
}
18081808

@@ -1812,6 +1812,7 @@ int ompi_intercomm_create_from_groups (ompi_group_t *local_group, int local_lead
18121812
rc = ompi_comm_create_from_group (leader_group, sub_tag, info, errhandler, &leader_comm);
18131813
OBJ_RELEASE(leader_group);
18141814
free (sub_tag);
1815+
sub_tag = NULL;
18151816
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
18161817
free(leader_procs);
18171818
ompi_comm_free (&local_comm);
@@ -1867,14 +1868,16 @@ int ompi_intercomm_create_from_groups (ompi_group_t *local_group, int local_lead
18671868
return rc;
18681869
}
18691870

1870-
/* will be using a communicator ID derived from the bridge communicator to save some time */
1871-
new_block.block_cid.cid_base = data[1];
1872-
new_block.block_cid.cid_sub.u64 = data[2];
1873-
new_block.block_nextsub = 0;
1874-
new_block.block_nexttag = 0;
1875-
new_block.block_level = (int8_t) data[3];
1871+
/*
1872+
* append the pmix CONTEXT_ID obtained when creating the leader comm as discriminator
1873+
*/
1874+
opal_asprintf (&sub_tag, "%s-%ld", tag, data[1]);
1875+
if (OPAL_UNLIKELY(NULL == sub_tag)) {
1876+
return OMPI_ERR_OUT_OF_RESOURCE;
1877+
}
18761878

1877-
rc = ompi_comm_nextcid (newcomp, NULL, NULL, (void *) tag, &new_block, false, OMPI_COMM_CID_GROUP_NEW);
1879+
rc = ompi_comm_nextcid (newcomp, NULL, NULL, (void *) sub_tag, NULL, false, OMPI_COMM_CID_GROUP_NEW);
1880+
free (sub_tag);
18781881
if ( OMPI_SUCCESS != rc ) {
18791882
OBJ_RELEASE(newcomp);
18801883
return rc;

0 commit comments

Comments
 (0)