如何搜索txt文件以获取正则

How to search a txt file for regex?

本文关键字:文件 获取 txt 何搜索 搜索      更新时间:2023-10-16

我正在尝试使用REGEX以HTML格式输出文本文件中的电子邮件地址数,我可以打开文件并读取文件,但我不确定如何使用正则搜索文件以获取正则模式。

更新:好的,我使用了一个测试文本文件,但在以HTML格式的实际文本文件上工作,它不输出电话号码,但没有输出电子邮件地址的数量。

int _tmain(int argc, _TCHAR* argv[])
{
ifstream htmlText;
string line;

string eduEmail = "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.+-]+.edu$";
string nonEduEmail = "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.+-]+.com$";
string phoneNumbers = "[[:digit:]]{2}-[[:digit:]]{3}-[[:digit:]]{4}";
int eduEmails = 0;
int nonEduEmails = 0;
int num_phoneNumbers = 0;
htmlText.open("ltudirectory.txt");

if (htmlText.good())
{
    while (getline(htmlText, line))
    {
        cout << line << endl;
        regex r_edu(eduEmail); //the pattern to search for edu emails
        regex r_com(nonEduEmail); //the pattern to search for .com emails
        regex r_phoneNumbers(phoneNumbers); //the pattern to search for .com    emails

        bool eduEmail_match = regex_search(line, r_edu);
        bool nonEmail_match = regex_search(line, r_com);
        bool phoneNumber_match = regex_search(line, r_phoneNumbers);

        if (eduEmail_match)
        {
            ++eduEmails;
        }
        if (nonEmail_match)
        {
            ++nonEduEmails;
        }
        if (phoneNumber_match)
        {
            ++num_phoneNumbers;
        }
    }
}

htmlText.close();
cout << "Emails ending with .edu : " << eduEmails << endl;
cout << "Emails ending with .com : " << nonEduEmails << endl;
cout << "Number of Phone Numbers: " << num_phoneNumbers << endl;

system("pause");
return 0;
}
int _tmain(int argc, _TCHAR* argv[])
{
ifstream htmlText;
string line;
string eduEmail = "(\w+)(\.|_)?(\w*)@(\w+)(\.(\w+))+";

int testNum = 0;
list<string> l;

htmlText.open("ltudirectory.txt");
if (htmlText.good())
{
    while (getline(htmlText, line))
    {
        regex e(eduEmail); // the pattern
        bool match = regex_search(line, e);
        if (match) {
          ++testNum;
       }
    }
}
htmlText.close();
system("pause");
return 0;
}