Hi,

I have a question regarding Ceph CRUSH. I have been going through Crush.h file. It says that  struct crush_bucket **buckets  (below) is an array of pointers. My understanding is that this particular array of pointers is a collection of addresses of six scalar values namely  __s32 id;   __u16 type;    __u8 alg,  __u8 hash, __u32 weight, __u32 size and the reason it has double pointer **buckets because it also points to another pointer namely __s32 *items? Please correct me if I am wrong.


/** @ingroup API
 *
 * A crush map define a hierarchy of crush_bucket that end with leaves
 * (buckets and leaves are called items) and a set of crush_rule to
 * map an integer to items with the crush_do_rule() function.
 *
 */
struct crush_map {
        /*! An array of crush_bucket pointers of size __max_buckets__.
         * An element of the array may be NULL if the bucket was removed with
         * crush_remove_bucket(). The buckets must be added with crush_add_bucket().
         * The bucket found at __buckets[i]__ must have a crush_bucket.id == -1-i.
         */
struct crush_bucket **buckets;
        /*! An array of crush_rule pointers of size __max_rules__.
         * An element of the array may be NULL if the rule was removed (there is
         * no API to do so but there may be one in the future). The rules must be added
         * with crush_add_rule().
         */
struct crush_rule **rules;
        __s32 max_buckets; /*!< the size of __buckets__ */
__u32 max_rules; /*!< the size of __rules__ */
        /*! The value of the highest item stored in the crush_map + 1
         */
__s32 max_devices;

/*! Backward compatibility tunable. It implements a bad solution
         * and must always be set to 0 except for backward compatibility
         * purposes
         */
__u32 choose_local_tries;
/*! Backward compatibility tunable. It implements a bad solution
         * and must always be set to 0 except for backward compatibility
         * purposes
         */
__u32 choose_local_fallback_tries;
/*! Tunable. The default value when the CHOOSE_TRIES or
         * CHOOSELEAF_TRIES steps are omitted in a rule. See the
         * documentation for crush_rule_set_step() for more
         * information
         */
__u32 choose_total_tries;
/*! Backward compatibility tunable. It should always be set
         *  to 1 except for backward compatibility. Implemented in 2012
         *  it was generalized late 2013 and is mostly unused except
         *  in one border case, reason why it must be set to 1.
         *
         *  Attempt chooseleaf inner descent once for firstn mode; on
         *  reject retry outer descent.  Note that this does *not*
         *  apply to a collision: in that case we will retry as we
         *  used to.
         */
__u32 chooseleaf_descend_once;
/*! Backward compatibility tunable. It is a fix for bad
         *  mappings implemented in 2014 at
         *  https://github.com/ceph/ceph/pull/1185. It should always
         *  be set to 1 except for backward compatibility.
         *
         *  If non-zero, feed r into chooseleaf, bit-shifted right by
*  (r-1) bits.  a value of 1 is best for new clusters.  for
*  legacy clusters that want to limit reshuffling, a value of
*  3 or 4 will make the mappings line up a bit better with
*  previous mappings.
         */
__u8 chooseleaf_vary_r;

/*! Backward compatibility tunable. It is an improvement that
         *  avoids unnecessary mapping changes, implemented at
         *  https://github.com/ceph/ceph/pull/6572 and explained in
         *  this post: "chooseleaf may cause some unnecessary pg
         *  migrations" in October 2015
         *  https://www.mail-archive.com/ceph-devel@vger.kernel.org/msg26075.html
         *  It should always be set to 1 except for backward compatibility.
         */
__u8 chooseleaf_stable;

        /*! @cond INTERNAL */
/* This value is calculated after decode or construction by
  the builder. It is exposed here (rather than having a
  'build CRUSH working space' function) so that callers can
  reserve a static buffer, allocate space on the stack, or
  otherwise avoid calling into the heap allocator if they
  want to. The size of the working space depends on the map,
  while the size of the scratch vector passed to the mapper
  depends on the size of the desired result set.

  Nothing stops the caller from allocating both in one swell
  foop and passing in two points, though. */
size_t working_size;

#ifndef __KERNEL__
/*! @endcond */
/*! Backward compatibility tunable. It is a fix for the straw
         *  scaler values for the straw algorithm which is deprecated
         *  (straw2 replaces it) implemented at
         *  https://github.com/ceph/ceph/pull/3057. It should always
         *  be set to 1 except for backward compatibility.
         *
*/
__u8 straw_calc_version;

        /*! @cond INTERNAL */
/*
* allowed bucket algs is a bitmask, here the bit positions
* are CRUSH_BUCKET_*.  note that these are *bits* and
* CRUSH_BUCKET_* values are not, so we need to or together (1
* << CRUSH_BUCKET_WHATEVER).  The 0th bit is not used to
* minimize confusion (bucket type values start at 1).
*/
__u32 allowed_bucket_algs;

__u32 *choose_tries;
#endif
/*! @endcond */
};


BR