这种奇怪的 I/O 方法是如何工作的?

How does this strange I/O method work?

本文关键字：何工作工作方法更新时间：2023-10-16

虽然在C++中获取输入输出，但我只使用了scanf/printf和cin/cout。现在我最近遇到了这个代码，以一种奇怪的方式获取I/O。

另请注意，此 I/O 方法导致代码运行速度极快，因为此代码使用的算法与大多数其他代码几乎相同，但执行时间要短得多。为什么这个 I/O 这么快，它通常是如何工作的？

编辑：代码

#include <bits/stdtr1c++.h>
#define MAXN 200010
#define MAXQ 200010
#define MAXV 1000010
#define clr(ar) memset(ar, 0, sizeof(ar))
#define read() freopen("lol.txt", "r", stdin)
using namespace std;
const int block_size = 633;
long long res, out[MAXQ]; int n, q, ar[MAXN], val[MAXN], freq[MAXV];
namespace fastio{
int ptr, ye;
char temp[25], str[8333667], out[8333669];
void init(){
ptr = 0, ye = 0;
fread(str, 1, 8333667, stdin);
}
inline int number(){
int i, j, val = 0;
while (str[ptr] < 45 || str[ptr] > 57) ptr++;
while (str[ptr] > 47 && str[ptr] < 58) val = (val * 10) + (str[ptr++] - 48);
return val;
}
inline void convert(long long x){
int i, d = 0;
for (; ;){
temp[++d] = (x % 10) + 48;
x /= 10;
if (!x) break;
}
for (i = d; i; i--) out[ye++] = temp[i];
out[ye++] = 10;
}
inline void print(){
fwrite(out, 1, ye, stdout);
} }
struct query{
int l, r, d, i;
inline query() {}
inline query(int a, int b, int c){
i = c;
l = a, r = b, d = l / block_size;
}
inline bool operator < (const query& other) const{
if (d != other.d) return (d < other.d);
return ((d & 1) ? (r < other.r) : (r > other.r));
} } Q[MAXQ];
void compress(int n, int* in, int* out){
unordered_map <int, int> mp;
for (int i = 0; i < n; i++) out[i] = mp.emplace(in[i], mp.size()).first->second; }
inline void insert(int i){
res += (long long)val[i] * (1 + 2 * freq[ar[i]]++); }
inline void erase(int i){
res -= (long long)val[i] * (1 + 2 * --freq[ar[i]]); }
inline void run(){
sort(Q, Q + q);
int i, l, r, a = 0, b = 0;
for (res = 0, i = 0; i < q; i++){
l = Q[i].l, r = Q[i].r;
while (a > l) insert(--a);
while (b <= r) insert(b++);
while (a < l) erase(a++);
while (b > (r + 1)) erase(--b);
out[Q[i].i] = res;
}
for (i = 0; i < q; i++) fastio::convert(out[i]); }
int main(){
fastio::init();
int n, i, j, k, a, b;
n = fastio::number();
q = fastio::number();
for (i = 0; i < n; i++) val[i] = fastio::number();
compress(n, val, ar);
for (i = 0; i < q; i++){
a = fastio::number();
b = fastio::number();
Q[i] = query(a - 1, b - 1, i);
}
run();
fastio::print();
return 0; }

这个解决方案，http://codeforces.com/contest/86/submission/22526466(624 毫秒，32 MB RAM使用)使用单一的记忆和手动解析内存中的数字(因此它使用更多的内存);许多其他解决方案速度较慢，使用scanf(http://codeforces.com/contest/86/submission/27561563 1620毫秒9MB)或C++iostreamcin(http://codeforces.com/contest/86/submission/27558562 3118毫秒，15 MB)。并非所有解决方案的差异都来自输入输出和解析(解决方案方法也有差异)，但有些是。

fread(str, 1, 8333667, stdin);

此代码使用单个freadlibcall 读取最多 8MB，这是完整文件。该文件最多可以有 2 (n，t) + 200000 (a_i) + 2*200000 (l，r) 6/7 位数字，带或不带换行符或由一个 (？) 空格分隔，因此数字最多大约 8 个字符(数字最多 6 或 7，因为也允许 1000000，以及 1 个空格或n);最大输入文件大小为 0.6 M * 8 字节 =~ 5 MB。

inline int number(){
int i, j, val = 0;
while (str[ptr] < 45 || str[ptr] > 57) ptr++;
while (str[ptr] > 47 && str[ptr] < 58) val = (val * 10) + (str[ptr++] - 48);
return val;
}

然后代码使用手动代码解析十进制整数。根据 ascii 表，http://www.asciitable.com/48...57 的十进制代码是十进制数字(秒 while 循环)：'0'...'9'，我们可以从字母代码中减去 48 得到数字;将部分读取val乘以 10 并添加当前数字。chr<45 || chr > 57在第一个循环听起来像是从输入中跳过非数字。这是不正确的，因为此代码不会解析代码 45， 46， 47 ='-', '.', '/'，并且不会读取这些字符之后的任何数字。

n = fastio::number();
q = fastio::number();
for (i = 0; i < n; i++) val[i] = fastio::number();
for (i = 0; i < q; i++){
a = fastio::number();
b = fastio::number();

实际读取使用以下fastio::number()方法;其他解决方案在循环中使用scanf或iostreamoperator <<调用：

for (int i = 0; i < N; i++) {
scanf("%d", &(arr[i]));
add(arr[i]);
}

或

for (int i = 1; i <= n; ++i)
cin >> a[i];

这两种方法都更通用，但它们都进行库调用，这将从内部缓冲区(如 4KB)读取一些字符或调用操作系统系统调用进行缓冲区重填，并且每个函数都会进行许多检查并报告错误：对于每个数量的输入 scanf 将重新解析第一个参数的相同格式字符串，并将执行 POSIX http://pubs.opengroup.org/onlinepubs/7908799/xsh/fscanf.html 中描述的所有逻辑和所有错误检查。C++iostream没有格式字符串，但它仍然更通用：https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/bits/istream.tcc#L156'operator>>(int& __n)"。

因此，标准库函数内部有更多的逻辑，更多的调用，更多的分支;它们更通用，更安全，应该在实际编程中使用。而这个"体育编程"竞赛允许用户使用标准库函数来解决任务，如果你能想象算法的话，这些函数足够快。作者或任务需要编写多个具有标准 I/O 函数的解决方案，以检查任务的时间限制是否正确以及是否可以解决任务。(TopCoder 系统使用 I/O 更好，你不会实现 I/O，数据已经在某些语言结构/集合中传递到你的函数中)。

有时，运动编程中的任务对内存有严格的限制：输入文件比允许的内存使用量大几倍，程序员无法将整个文件读入内存。例如：从输入文件中获取 20 mln 的单个超长数字并向其添加 1，内存限制为 2 MB;您无法从文件中向前读取完整的输入编号;很难在反向方向上以块为单位进行正确的读取;你只需要忘记标准的加法方法(柱式加法)并构建带有状态的 FSM(有限状态机)，计算9s 的序列。