如何用指定的编码编译c++

How to compile C++ with a specified encoding?

本文关键字：编译 c++ 编码何用指更新时间：2023-10-16

我正在为我自己的编程语言做一个解释器，作为一种爱好。我的问题是非ascii字符在Windows CMD中显示不正确。我正在读取的源文件保存为UTF-8。我认为它是UTF-8没有BOM。当我的源文件说，例如;

print "á"

在我的Mac上，我得到预期的输出。字母á，但在我的电脑上，我得到的是├í。我认为这是一个代码页问题，我使用的代码页有字母á。然后我尝试了一种不同的字体。Lucida Grande的作品。但是在Python解释器中，字母á以默认字体显示。

我问了StackOverflow上的人，有人说我的程序本身可能是用错误的编码编译的。所以我的问题是，我如何指定/更改编码时使用的c++编译我的文件。我正在使用TDM-GCC作为我的编译器，我也使用过MinGW，也有同样的问题。

谢谢你的帮助

——编辑——

下面是我的整个源文件。你可以这样编译它:

c++ myfile.cc -o myprogram -std=c++11

每当我运行"myprogram.exe somefile. exe"Mylang"，其中somefile。mylang说:

print "Hello á"

我在windows CMD上得到这样的输出:

"Hello ├í"

我不知道如何Python, Lua, Ruby等…可以使用默认的控制台字体并输出正确的字符。

#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
#include <vector>
#include <cstdlib>
using namespace std;
/* Global Variables */
/* Not all of these are actual "keywords" that can be used in programs.
   They are called keywords because they are reserved, either because they
   are specified as keywords in the grammar or because they are reserved by
   the interpreter for internal use. */
string keywords[9] = { "print", "string", "sc", "variable", "eq", "undefined", "nl", "num", "expr" };
/* We store tokens in a vector, we could use an array but specifying an arrays
   size at runtime is technically impossible and the work arounds are a pain. */
vector<string> tokens;
/* Our "symbol table" is just a vector too because, we can only determine how
   large the symbol table should be at runtime, so we use a vector to make things
   easier. */
vector<string> variables;
/* Function Declarations */
/* We declare all of the functions up here because it makes it easy to see how many
   functions we have and it makes it easier to find inefficiencies, also it makes the
   code look nicer. */
void exec_program();
string load_program();
string lex();
void parse();
void doPRINT();
void doASSIGN();
void goGETVAR();
/* Definitions */
/* These are our constants, these are defined as constant at the start of the program so
   that if anything goes wrong in the execution of the code we can always display the
   right kind of errors. */
#define IO_ERROR "[IO ERROR] "
#define SYNTAX_ERROR "[SYNTAX ERROR] "
#define ASSIGN_ERROR "[ASSIGN ERROR] "
/* We load the program into the interpreter by reading the file */
string load_program(string filename) {
    string filedata;
    ifstream rdfile(filename);
    /* We check to see whether or not we can open the file. This doesn't tell use whether
       the file exists because permissions could also prevent us being able to open the file. */
    if (!rdfile) {
        cout << IO_ERROR << "Unable to open the file "" << filename << ""." << endl;
        exit(0);
    }
    /* Loop through and grab each line of the file, then store each line in filedata. */
    for (std::string line; std::getline(rdfile, line); )
    {
        filedata += line;
        filedata += "n";
    }
    /* Close the file when we're done. */
    rdfile.close();
    /* Return the data so that the rest of the program can use it. */
    return filedata;                       
}
void lex(string prog) {
    int i = 0;
    string toks = "";
  string n = "";
  string expr = "";
    bool state = 0;
  bool exprStarted = 0;
  bool isexpr = 0;
    string s = "";
    for(i = 0; i < prog.size(); ++i) {
        toks += prog[i];
        if (toks == " " and state == 0) {
        toks = "";
        if (n != "") {
          //isexpr = 1;
          //tokens.push_back(keywords[7] + ":" + n);
        }
        n = "";
      } else if (toks == ";" and state == 0) {
        toks = "";
        if (expr != "" and isexpr == 1) {
          tokens.push_back(keywords[8] + ":[" + expr + "]");
        } else if (n != "" and isexpr == 0) {
          tokens.push_back(keywords[7] + ":" + expr);
        }
        if (tokens.back() != "sc") {
          tokens.push_back(keywords[2]); 
        }
        n = "";
        expr = "";
        isexpr = 0;
      } else if (toks == "n" and state == 0) {
            toks = "";
        if (expr != "" and isexpr == 1) {
          tokens.push_back(keywords[8] + ":[" + expr + "]");
        } else if (n != "" and isexpr == 0) {
          tokens.push_back(keywords[7] + ":" + expr);
        }
        if (tokens.back() != "sc") {
          tokens.push_back(keywords[2]); 
        }
        n = "";
        expr = "";
        isexpr = 0;
        } else if (toks == "0" or toks == "1" or toks == "2" or toks == "3" or toks == "4" or toks == "5" 
        or toks == "6" or toks == "7" or toks == "8" or toks == "9") {
        if (state == 0) {
          n += toks;
          expr += toks;
        } else {
          s += toks;
        }
        toks = "";
      } else if (toks == "+" or toks == "-" or toks == "*" or toks == "/") {
        expr += toks;
        isexpr = 1;
        toks = "";
        n = "";
      } else if (toks == keywords[0]) {
            tokens.push_back(keywords[0]);
            toks = "";
        } else if (toks == """) {
            if (state == 0) {
                state = 1;
            } else if (state == 1) {
                state = 0;
                tokens.push_back(keywords[1] + ":" + s + """);
                s = "";
                toks = "";
            }
        } else if (state == 1) {
            s += toks;
            toks = "";
        }
    }
    int ii = 0;
    while (ii < tokens.size()) {
        //cout << tokens[ii] << endl;
        ii++;
    }
}
string evalExpression(string expr) {
  int res = 0;
  int getnextnum = 0;
  int iter = 0;
  int it = 0;
  string opp = "";
  string num = "";
  string num1 = "";
  string num2 = "";
  string result = "";
  vector<string> numholder;
  for (char & c : expr) {
    if (c == '0' or c == '1' or c == '2' or c == '3' or c == '4' or c == '5' or
      c == '6' or c == '7' or c == '8' or c == '9') {
      // c is a number
      num += c;
    } else if (c == '+' or c == '-' or c == '*' or c == '/') {
      // c is an operator
      numholder.push_back(num);
      if (c == '+') {
        opp = "+";
      } else if (c == '-') {
        opp = "-";
      } else if (c == '*') {
        opp = "*";
      } else if (c == '/') {
        opp = "/";
      }
      numholder.push_back(opp);
      num = "";
    } else if (c == ']') {
      // end of expression
      numholder.push_back(num);
    } else if (c == '(' or c == ')') {
      // c is a round bracket
    }
  }
  for ( iter = 0; iter < numholder.size(); ++iter) {
    if (numholder[iter][0] == '+' or numholder[iter][0] == '-' or numholder[iter][0] == '*' or numholder[iter][0] == '/') {
      iter++;
    }
    if (numholder[iter][0] == '0' or '1' or '2' or '3' or '4' or '5' or '6' or '7' or '8' or '9') {
      // num = NUMBER
      if (num1 == "") {
        num1 = numholder[iter];
      }
      else if (num2 == "") {
        num2 = numholder[iter];
      }
    }
    if (iter-1 >= 0) {
        it = iter - 1;
        //cout << numholder[iter] << "    " << numholder[iter-1] << "    num1 = " << num1 << "    num2 = " << num2 << endl;
        if (numholder[it][0] == '+' and num1 != "" and num2 != "") {
          res = stoi(num1) + stoi(num2);
          num1 = to_string(res);
          num2 = "";
        } else if (numholder[it][0] == '-' and num1 != "" and num2 != "") {
          res = stoi(num1) - stoi(num2);
          num1 = to_string(res);
          num2 = "";
        } else if (numholder[it][0] == '*' and num1 != "" and num2 != "") {
          res = stoi(num1) * stoi(num2);
          num1 = to_string(res);
          num2 = "";
        } else if (numholder[it][0] == '/' and num1 != "" and num2 != "") {
          res = stoi(num1) / stoi(num2);
          num1 = to_string(res);
          num2 = "";
        }
    }
    //iter++;
  }
  numholder.clear();
  num1 = "";
  num2 = "";
  num = "";
  //cout << res << endl;
  expr = to_string(res);
  return expr;
}
void doPRINT(string toPrint) {
  if (toPrint.substr(0,6) == "string") {
    toPrint = toPrint.substr (7);
    toPrint = toPrint.substr(1,toPrint.size() - 2);
  } else if (toPrint.substr(0,3) == "num") {
    toPrint = toPrint.substr (4);
  } else if (toPrint.substr(0,4) == "expr") {
    toPrint = toPrint.substr (6);
    toPrint = evalExpression(toPrint);
  }
  cout << toPrint << endl;
}
void parse(vector<string> tokens) {
    int i = 0;
    while (i < tokens.size()) {
    if (tokens[i] + " " + tokens[i+1].substr(0,6) + " " + tokens[i+2] == "print string sc" or
        tokens[i] + " " + tokens[i+1].substr(0,3) + " " + tokens[i+2] == "print num sc" or
        tokens[i] + " " + tokens[i+1].substr(0,4) + " " + tokens[i+2] == "print expr sc") {
      doPRINT(tokens[i+1]);
      i+=3;
    }
    }
}
/* Main program exec function */
void exec_program(string filename) {
    lex(load_program(filename));
    parse(tokens);
}
/* The main function, we have to start somewhere. */
int main(int argc, char* argv[]) {
    if (!argv[1]) {
        cout << "Usage: reedoo <filename> [args]" << endl;
    } else {
    exec_program(argv[1]);
    }
    return 0;
}

这不是关于如何编译myprogram.exe，而是myprogram.exe与somefile.mylang的关系

作为语言开发人员，你有责任说"mylang脚本中程序的源文件应该是utf-8"，或者在源文件中提供一个识别代码页标签。你还应该说"mylang语言中的字符串被编码为UTF-foo"(因为这会影响像"hello". charat(3)或其他等效方法这样的操作)。

然后是你的编译器/解释器(myprogram.exe)的责任，以适当的编码打开源代码(somefile.mylang)，并将其转换为内部表示的UTF-foo。