琐碎的字符串解析算法

Trivial string parse algorithm

本文关键字:算法 字符串      更新时间:2023-10-16

在这个代码示例中,我对向量使用const-ref,并检查四个函数中的条件。如果每个条件都有一个大列表(例如,数据库中的所有动词(例如,英语词典)),那么在传递给函数之前,最好在类Token中检查一下(这样函数只会得到值动词),还是在函数内检查一下(所以函数本身必须检查数据库)?

#include <iostream>
#include <string>
#include <vector>
using namespace std;
bool full_stop_check(vector<string> &sentence);
bool verb_check(vector<string> &sentence, int index);
bool noun_check(vector<string> &sentence, int index);
bool conj_arti_check(vector<string> &sentence, int index);
int main ()
{
    vector<string> sentence;
    string temp_word;
    while(cin >> temp_word)
    {
        sentence.push_back(temp_word);
    }
    // Output test (commented out)
    // for (string x : sentence)
    //   cout << x << 'n';
    // Check for sentence
    if (full_stop_check(sentence))
        cout << "It is a sentence." << 'n';
    else
        cout << "It is not a sentence." << 'n';
    return 0;
}
bool full_stop_check(vector<string> &sentence)
{
    int index = sentence.size()-1;
    // Full Stop Check
    if (sentence[index] != ".")
        return false;
    --index;
    if (index < 0 )
        return false;
    return verb_check(sentence, index);         // full stop (not first character)
}
bool verb_check(vector<string> &sentence, int index)
{
    // Verb Check
    if (sentence[index] != "verb")
        return false;
    --index;
    if (index < 0 )
        return false;
    return noun_check(sentence, index);         // verb (not first word)
}
bool noun_check(vector<string> &sentence, int index)
{
    // Noun Check
    if (sentence[index] != "noun")
        return false;
    --index;
    if (index < 0 )                             // first word is a noun
        return true;
    return conj_arti_check(sentence, index);    // noun (not first word)
}
bool conj_arti_check(vector<string> &sentence, int index)
{   
    // Conjugation & Article Check
    if (sentence[index] != "conjugation" && sentence[index] != "article")
        return false;
    // If it is either an article or conjugation
    if (index == 0 && sentence[index] == "article") // first word is an article
        return true;
    else if (index == 0)                            // first word not article (or noun)
        return false;   
    else if (sentence[index] == "conjugation") {    // conjugation
        --index;        
        return verb_check(sentence, index);
    }
    else {                                          // article (not first word)
        --index;
        return conj_arti_check(sentence, index);    // recursion
    }
}

上下文无关语言识别器之外的理论通常基于下推自动机(具有递归的状态机)。手工处理它非常乏味,但很容易自动化,因此存在相当多的解析器生成器,如yacc、ANTLR或BOOST Spirit。它们只是获取语言语法并生成相应的解析器代码。