从二进制林中删除根并将其添加为子根

Deleting roots from a binary forest and adding as children

本文关键字:添加 二进制 林中 删除      更新时间:2023-10-16

我正试图实现这个算法,但我已经被困了半个星期。我一定对指针有根本的误解。如有任何帮助,我将不胜感激。

算法:构建查找树

**Require**: list of features in root
    **while** root contains feature vectors wtih common 1-bits **do**
        f1,f2 <- features with most common 1-bits
        p <- f1 **bitwise and** f2
        set f1, f2 as children of p
        remove f1, f2 from root
        insert p into root
    **end while**

希望代码中的注释能让你了解我的思考过程:

#include <iostream>
#include <bitset>
#include <list>
#include <vector>
#include <string>
#include <sstream>
const std::size_t HIP_VECTOR_SIZE = 9;

struct DatabaseFeature {
public:
    DatabaseFeature(std::bitset<HIP_VECTOR_SIZE> &_HIP_vector,
        DatabaseFeature *_left_child,
        DatabaseFeature *_right_child,
        std::string _name) :
        HIP_vector(_HIP_vector),
        left_child(_left_child),
        right_child(_right_child),
        name(_name) {}
  std::bitset<HIP_VECTOR_SIZE> HIP_vector;
  DatabaseFeature *left_child = nullptr;
  DatabaseFeature *right_child = nullptr;
  std::string name;
private:
};
//Create a string representing the binary tree recursively
std::string serialize(DatabaseFeature *root, std::size_t depth=0) {
  std::string output = "";
  if(root==nullptr) {
     return " ";
  } 
  output += serialize(root->right_child, depth+1);
  output += "n" + std::string(depth,'t') + root->HIP_vector.to_string() + "--" + root->name;
  output += serialize(root->left_child, depth+1);
  return output;
}
int main(int argc, char* argv[]) {
  std::list<DatabaseFeature> features;
  //Load sample data
  std::bitset<HIP_VECTOR_SIZE> F1("010100011");
  std::bitset<HIP_VECTOR_SIZE> F2("011010100");
  std::bitset<HIP_VECTOR_SIZE> F3("011010101");
  std::bitset<HIP_VECTOR_SIZE> F4("110010100");
  std::bitset<HIP_VECTOR_SIZE> F5("001100010");
  features.emplace_back(F1, nullptr, nullptr, "F1");
  features.emplace_back(F2, nullptr, nullptr, "F2");
  features.emplace_back(F3, nullptr, nullptr, "F3");
  features.emplace_back(F4, nullptr, nullptr, "F4");
  features.emplace_back(F5, nullptr, nullptr, "F5");

  std::vector<DatabaseFeature> non_roots;
  std::size_t parent_count = 1;
  //While root contains features with common 1-bits
  while(1) {
    std::size_t highest_common_bit_count = 0;
    std::list<DatabaseFeature>::iterator feature_iter1;
    std::list<DatabaseFeature>::iterator feature_iter2;
    std::bitset<HIP_VECTOR_SIZE> most_common_bits;
    //Compare every feature to every other feature
    for(auto feature1 = features.begin(); feature1 != features.end(); ++feature1) {
      for(auto feature2 = std::next(feature1); feature2 != features.end(); ++feature2) {
        auto common_bits = feature1->HIP_vector & feature2->HIP_vector;
        //If a new max is found, save it
        if(common_bits.count() > highest_common_bit_count) {
          feature_iter1 = feature1;
          feature_iter2 = feature2;
          most_common_bits = common_bits;
          highest_common_bit_count = common_bits.count();
        }
      }
    }
    if(highest_common_bit_count == 0) {
      std::cout << "No more root features with common bits. "
                << "Number of root features after forest construction: " 
                << features.size() << std::endl;
      break;
    }

    //Move nodes from roots to non-roots
    non_roots.push_back(std::move(*feature_iter1));
    non_roots.push_back(std::move(*feature_iter2));

    //Take the last two nodes for assignment as children
    auto left_child = std::prev(std::end(non_roots));
    auto right_child = std::prev(std::end(non_roots),2);

    //Erase children from the root
    features.erase(feature_iter1);
    features.erase(feature_iter2);
    std::ostringstream parent_name;
    parent_name << "P" << parent_count;
    //Add new parent to root
    features.emplace_back(most_common_bits, &*left_child, &*right_child, parent_name.str());
    std::cout << "Non-Roots: " << std::endl;
    for(auto & nr : non_roots) {
      std::cout << nr.HIP_vector << std::endl;
    }
    std::cout << std::endl;
    std::cout << "Serializing:" << std::endl;
    for(auto it = features.begin(); it != features.end(); ++it) {
      if(&*it) {
        std::cout << serialize(&*it) << std::endl << std::endl;
      }
    }
    parent_count++;
  }
}

大多数时候我收到分段错误。当使用指向非根向量中的子向量的指针时,会发生错误。谁能告诉我为什么这些分割错误发生?谢谢。

我通过使用DatabaseFeature智能指针的列表和向量解决了我的问题:

#include <iostream>
#include <bitset>
#include <list>
#include <vector>
#include <string>
#include <memory>
#include <sstream>
const std::size_t HIP_VECTOR_SIZE = 9;

struct DatabaseFeature {
  DatabaseFeature(const std::bitset<HIP_VECTOR_SIZE> &_HIP_vector,
                  std::shared_ptr<DatabaseFeature> _left_child,
                  std::shared_ptr<DatabaseFeature> _right_child,
                  const std::string &_node_name) : 
                  HIP_vector(_HIP_vector),
                  left_child(_left_child),
                  right_child(_right_child),
                  node_name(_node_name) {};
  std::bitset<HIP_VECTOR_SIZE> HIP_vector;
  std::shared_ptr<DatabaseFeature> left_child = nullptr;
  std::shared_ptr<DatabaseFeature> right_child = nullptr;
  std::string node_name;
};

//Create a string representation of the binary tree recursively
std::string serialize(std::shared_ptr<DatabaseFeature> root, std::size_t depth = 0) {
  std::string output = "";
  if(root==nullptr) {
    return " ";
  } 
  output += serialize(root->right_child, depth+1);
  output += "n" + std::string(depth,'t') + root->HIP_vector.to_string() 
         +  "--" + root->node_name;
  output += serialize(root->left_child, depth+1);
  return output;
}

int main(int argc, char* argv[]) {
  std::list<std::shared_ptr<DatabaseFeature>> root_features;

  std::bitset<9> f1("010100011");
  std::bitset<9> f2("011010100");
  std::bitset<9> f3("011010101");
  std::bitset<9> f4("110010100");
  std::bitset<9> f5("001100010");
  std::bitset<9> f6("001010101");
  std::bitset<9> f7("001110101");
  std::bitset<9> f8("111010101");
  std::bitset<9> f9("110100111");
  std::bitset<9> f0("000010111");
  root_features.push_back(std::make_shared<DatabaseFeature>(f1, nullptr, nullptr, "F1"));
  root_features.push_back(std::make_shared<DatabaseFeature>(f2, nullptr, nullptr, "F2"));
  root_features.push_back(std::make_shared<DatabaseFeature>(f3, nullptr, nullptr, "F3"));
  root_features.push_back(std::make_shared<DatabaseFeature>(f4, nullptr, nullptr, "F4"));
  root_features.push_back(std::make_shared<DatabaseFeature>(f5, nullptr, nullptr, "F5"));
  root_features.push_back(std::make_shared<DatabaseFeature>(f6, nullptr, nullptr, "F6"));
  root_features.push_back(std::make_shared<DatabaseFeature>(f7, nullptr, nullptr, "F7"));
  root_features.push_back(std::make_shared<DatabaseFeature>(f8, nullptr, nullptr, "F8"));
  root_features.push_back(std::make_shared<DatabaseFeature>(f9, nullptr, nullptr, "F9"));
  root_features.push_back(std::make_shared<DatabaseFeature>(f0, nullptr, nullptr, "F0"));

  std::vector<std::shared_ptr<DatabaseFeature>> non_root_features;
  std::size_t parent_count = 1;

  //While root contains features with common 1-bits
  while(1) {
    std::size_t highest_common_bit_count = 0;
    std::list<std::shared_ptr<DatabaseFeature>>::iterator feature_iter1;
    std::list<std::shared_ptr<DatabaseFeature>>::iterator feature_iter2;
    std::bitset<HIP_VECTOR_SIZE> most_common_bits;
    //Compare every feature to every other feature
    for(auto feature1 = root_features.begin(); feature1 != root_features.end(); ++feature1) {
      for(auto feature2 = std::next(feature1); feature2 != root_features.end(); ++feature2) {
        auto common_bits = (*feature1)->HIP_vector & (*feature2)->HIP_vector;
        if(common_bits.count() > highest_common_bit_count) {
          feature_iter1 = feature1;
          feature_iter2 = feature2;
          most_common_bits = common_bits;
          highest_common_bit_count = common_bits.count();
        }
      }
    }
    if(highest_common_bit_count == 0) {
      std::cout << "No more root features with common bits. "
                << "Number of root features after forest construction: " 
                << root_features.size() << std::endl;
      break;
    }

    //Move nodes from roots to non-roots
    non_root_features.push_back(*feature_iter1);
    non_root_features.push_back(*feature_iter2);

    //Take the last two nodes for assignment as children
    auto left_child = std::prev(std::end(non_root_features));
    auto right_child = std::prev(std::end(non_root_features),2);

    //Erase children from the root
    root_features.erase(feature_iter1);
    root_features.erase(feature_iter2);

    //Name the new node
    std::ostringstream parent_name;
    parent_name << "P" << parent_count;

    //root_features.emplace_back(most_common_bits, &*left_child, &*right_child);
    root_features.push_back(std::make_shared<DatabaseFeature>(
        most_common_bits, *left_child, *right_child, parent_name.str()));

    std::cout << "Non root features: " << std::endl;
    for(auto & non_root : non_root_features) {
      std::cout << non_root->HIP_vector << std::endl;
    }
    std::cout << std::endl;
    std::cout << "Binary forest: " << std::endl;
    for(auto it = root_features.begin(); it != root_features.end(); ++it) {
      if(*it) {
        std::cout << serialize(*it) << std::endl << std::endl;
      }
    }
    parent_count++;
  }
}