2021GoogleCTF部分赛题writeup及赛后复现

这次比赛时做出两道题，分别是

filestore
cpp

最终排名193位~~做出题就算成功~~。

因为前一段时间比较忙还有自己瞎安排时间，导致屯着几场比赛的wp写了一半，这几天会整理好陆续放出。

Misc

filestore

这题的解题关键是理解程序中的“去冗余”算法得到爆破flag的思路。

附件给了源代码文件和nc地址，先nc过去看下程序的交互逻辑

[root@centos ~]# nc filestore.2021.ctfcompetition.com 1337
== proof-of-work: disabled ==
Welcome to our file storage solution.

Menu:
- load
- store
- status
- exit
status
User: ctfplayer
Time: Mon Jul 19 10:14:26 2021
Quota: 0.026kB/64.000kB
Files: 1

Menu:
- load
- store
- status
- exit
store
Send me a line of data...
give me the flag plz
Stored! Here's your file id:
K2XAzOSaLU4m0Mz5

Menu:
- load
- store
- status
- exit
load
Send me the file id...
K2XAzOSaLU4m0Mz5
give me the flag plz

Menu:
- load
- store
- status
- exit
status
User: ctfplayer
Time: Mon Jul 19 10:15:11 2021
Quota: 0.046kB/64.000kB
Files: 2

再看源代码，源代码内容如下

# filestore.py

# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os, secrets, string, time
from flag import flag


def main():
    # It's a tiny server...
    blob = bytearray(2**16)
    files = {}
    used = 0

    # Use deduplication to save space.
    def store(data):
        nonlocal used
        MINIMUM_BLOCK = 16
        MAXIMUM_BLOCK = 1024
        part_list = []
        while data:
            prefix = data[:MINIMUM_BLOCK]
            ind = -1
            bestlen, bestind = 0, -1
            while True:
                ind = blob.find(prefix, ind+1)
                if ind == -1: break
                length = len(os.path.commonprefix([data, bytes(blob[ind:ind+MAXIMUM_BLOCK])]))
                if length > bestlen:
                    bestlen, bestind = length, ind

            if bestind != -1:
                part, data = data[:bestlen], data[bestlen:]
                part_list.append((bestind, bestlen))
            else:
                part, data = data[:MINIMUM_BLOCK], data[MINIMUM_BLOCK:]
                blob[used:used+len(part)] = part
                part_list.append((used, len(part)))
                used += len(part)
                assert used <= len(blob)

        fid = "".join(secrets.choice(string.ascii_letters+string.digits) for i in range(16))
        files[fid] = part_list
        return fid

    def load(fid):
        data = []
        for ind, length in files[fid]:
            data.append(blob[ind:ind+length])
        return b"".join(data)

    print("Welcome to our file storage solution.")

    # Store the flag as one of the files.
    store(bytes(flag, "utf-8"))

    while True:
        print()
        print("Menu:")
        print("- load")
        print("- store")
        print("- status")
        print("- exit")
        choice = input().strip().lower()
        if choice == "load":
            print("Send me the file id...")
            fid = input().strip()
            data = load(fid)
            print(data.decode())
        elif choice == "store":
            print("Send me a line of data...")
            data = input().strip()
            fid = store(bytes(data, "utf-8"))
            print("Stored! Here's your file id:")
            print(fid)
        elif choice == "status":
            print("User: ctfplayer")
            print("Time: %s" % time.asctime())
            kb = used / 1024.0
            kb_all = len(blob) / 1024.0
            print("Quota: %0.3fkB/%0.3fkB" % (kb, kb_all))
            print("Files: %d" % len(files))
        elif choice == "exit":
            break
        else:
            print("Nope.")
            break

try:
    main()
except Exception:
    print("Nope.")
time.sleep(1)

脚本开了一块64kb的存储空间，flag被预先写入这块空间中。我们与这块存储空间进行交互的方式有三种：

向里面写入一段数据，写入时执行了一定的策略（下详述）。
使用写入数据时生成的标识fid，读出相应的数据。
查看当前空间占用情况。

一种考虑是直接从存储空间中直接读出flag。flag内容也是通过脚本设计的load接口写入的，写入时生成了一个对应的fid用于读取flag。如果走这条路，我们需要拿到这个fid，其是由python中的secrets模块生成的，这里摘取python官方文档中对该模块的描述

The secrets module is used for generating cryptographically strong random numbers suitable for managing data such as passwords, account authentication, security tokens, and related secrets.

总之随机性是非常强的，所以不考虑从伪随机角度突破。fid的格式是10位长的数字加字母组合，爆破显然也不可取，故直接读出flag这条路应该是堵死了。

接下来具体分析store方法具体内容

# Use deduplication to save space.
    def store(data):
        nonlocal used
        MINIMUM_BLOCK = 16
        MAXIMUM_BLOCK = 1024
        part_list = []
        while data:
            prefix = data[:MINIMUM_BLOCK]
            ind = -1
            bestlen, bestind = 0, -1
            while True:
                ind = blob.find(prefix, ind+1)
                if ind == -1: break
                length = len(os.path.commonprefix([data, bytes(blob[ind:ind+MAXIMUM_BLOCK])]))
                if length > bestlen:
                    bestlen, bestind = length, ind

            if bestind != -1:
                part, data = data[:bestlen], data[bestlen:]
                part_list.append((bestind, bestlen))
            else:
                part, data = data[:MINIMUM_BLOCK], data[MINIMUM_BLOCK:]
                blob[used:used+len(part)] = part
                part_list.append((used, len(part)))
                used += len(part)
                assert used <= len(blob)

传入数据后，先从中取出最多前16字节的数据。

1	prefix = data[:MINIMUM_BLOCK]

接下来方法尝试从整个数据块blob中匹配出所有与这前16字节数据相同的数据片段，如果有，则逐个计算剩余的传入数据（因为传入的数据是每16个字节逐块处理的，当前一个块被处理完后，对下一轮循环来说，它的传入数据指的是去掉已经处理过的数据块的数据，所以说是剩余的传入数据。这里看代码可能比文字描述更好理解。）与这些匹配的数据片段的重合部分的长度，得到这些长度中的最大值bestlen。

ind = -1
bestlen, bestind = 0, -1
while True:
    ind = blob.find(prefix, ind+1)
    if ind == -1: break
        length = len(os.path.commonprefix([data, bytes(blob[ind:ind+MAXIMUM_BLOCK])]))
        if length > bestlen:
            bestlen, bestind = length, ind

接下来是实际存储的部分，判断如果找到有重合部分，则标记已经存储了重合部分，而不把这部分重合数据再存一遍，也就是deduplication；如果没有重合部分，则存储16个字节并标记。

if bestind != -1:
    part, data = data[:bestlen], data[bestlen:]
    part_list.append((bestind, bestlen))
    else:
        part, data = data[:MINIMUM_BLOCK], data[MINIMUM_BLOCK:]
        blob[used:used+len(part)] = part
        part_list.append((used, len(part)))
        used += len(part)
        assert used <= len(blob)

剩余的传入数据将被交给下一轮循环处理。

也就是说，既然flag已经储存在blob中，当我们传入的内容和flag片段完全重合时，是不会多占用任何空间的。故我们可以逐位爆破flag，当爆破到正确的字符时，占用的空间会比其他字符更少。

我们可以简单验证一下。

[root@centos ~]# nc filestore.2021.ctfcompetition.com 1337
== proof-of-work: disabled ==
Welcome to our file storage solution.

Menu:
- load
- store
- status
- exit
status
User: ctfplayer
Time: Mon Jul 19 11:21:37 2021
Quota: 0.026kB/64.000kB
Files: 1

Menu:
- load
- store
- status
- exit
store
Send me a line of data...
CTF{
Stored! Here's your file id:
Elbg53LQZsOuJVwA

Menu:
- load
- store
- status
- exit
status
User: ctfplayer
Time: Mon Jul 19 11:21:47 2021
Quota: 0.026kB/64.000kB
Files: 2

Menu:
- load
- store
- status
- exit
store
Send me a line of data...
good
Stored! Here's your file id:
TPESgYdiaZzSKm6G

Menu:
- load
- store
- status
- exit
status
User: ctfplayer
Time: Mon Jul 19 11:21:59 2021
Quota: 0.030kB/64.000kB
Files: 3

思路有效，接下来写脚本爆破，这里我套用了以前做一道类似的题（这道题也很有意思，不妨看一下）写的脚本，爆这题会比较慢，可以自行优化下

from pwn import *

letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_{}0123456789'

flag = 'CTF{CR1M3'

while True:

    log.info(f'current flag: {flag}')

    length_list = []

    for letter in letters:

        io = remote('filestore.2021.ctfcompetition.com', 1337)

        io.recvuntil('- exit')
        
        io.sendline('store')
        
        io.recvuntil('Send me a line of data...')

        io.sendline(flag + letter)

        log.info(f'testing letter: {letter}')

        io.recvuntil('- exit')

        io.sendline('status')

        status = io.recvlinesS(4)

        recv_length = float(status[3][7:12]) # 这里recv_length拿到当前占用的空间

        length_list.append(recv_length)

        log.info(f'length: {recv_length}')

        io.close()

    min_length = min(length_list)
    assert length_list.count(min_length) == 1
    new_letter = letters[length_list.index(min_length)]
    flag += new_letter
    
    log.info(f'new letter is {new_letter}')

爆到一半assert报错了，看一下输出信息

...
[*] current flag: CTF{CR1M3_0f_d3d
[*] now solving the 5-th letter
[+] Opening connection to filestore.2021.ctfcompetition.com on port 1337: Done
[*] letter: 0
[*] length: 0.026
[+] Opening connection to filestore.2021.ctfcompetition.com on port 1337: Done
[*] letter: 1
[*] length: 0.026
[+] Opening connection to filestore.2021.ctfcompetition.com on port 1337: Done
[*] letter: 2
[*] length: 0.027
[+] Opening connection to filestore.2021.ctfcompetition.com on port 1337: Done
[*] letter: 3
[*] length: 0.026
[+] Opening connection to filestore.2021.ctfcompetition.com on port 1337: Done
[*] letter: 4
[*] length: 0.026
[+] Opening connection to filestore.2021.ctfcompetition.com on port 1337: Done
[*] letter: 5
[*] length: 0.027
...

此时flag恰好爆出16位，反应过来问题在于爆破第17个字符时，如果当前测试的是错误的字符，那么程序会先以“去冗余”的方法存入前面16个字节，然后单独处理第17个字符。只要这个字符在flag中出现过，就不会占用新的存储空间，相应地如果没有出现，则增加空间占用。所以这里我们要选定一个字符作为基准开始爆破，运气好的话这个字符就是第17个字符，可以一路爆完整个flag。

我在这个地方绕了点弯路，去google翻译看了下ded开头的单词后面大多跟i，所以从i开始爆了，爆出来是ic4ti0n}，拼起来正好是CTF{CR1M3_0f_d3dic4ti0n}~~嫌疑人的献身？~~还挺顺，交上去提示不是正确的flag。

google翻译联想

根据前面的分析我们可以得到flag中都有些什么字符，整理出来如下

1	0134CFMRTcdfinptu_{}

发现还有u和p没出现，原来flag是CTF{CR1M3_0f_d3dup1ic4ti0n}，确实和题意比较相符。

Reverse

cpp

把代码都分析懂，还原出程序逻辑后发现可以逐位爆破flag，主要是费体力，化整为零，没什么技巧。~~分析，就嗯分析，不就是6k行代码吗~~

给了c源码附件，看过去全是宏，头大。

先直接运行看看

报错，~~删掉报错信息对应的语句就可以正常编译并运行程序~~。

先简单看看程序大概干了些什么。

16-42行，让你输入flag，需要按照规定的格式（例如CHAR_C）设置字符。
45-109行，将规定格式的字符转成数值，就是字符对应的ascii值。
112-839行，存放ROM常量。
840-1919行，将用户设置的flag值写入到ROM。
1920-1925行，定义了一些操作。
1926-6215行，当满足一定的include层数后，一大堆#define、#undef等操作，这里应该是涉及到验证flag正确性的核心逻辑。
6216-6223行，套娃，include层数不够时include本身。
6224-end行，如果满足条件S == -1，则输出拿到有效key的提示，否则编译时报异常。

这里需要意识到题目的想法应该是要你输入一个flag作为key，程序用宏实现了一套DRM（Digital rights management，数字版权管理）方案，当你输对了flag时就能满足上面说的正常运行程序提示key有效的条件。

代码中多次出现__INCLUDE_LEVEL__，查资料得知这是一个预定义的宏，这里援引文档

This macro expands to a decimal integer constant that represents the depth of nesting in include files. The value of this macro is incremented on every ‘#include’ directive and decremented at the end of every included file. It starts out at 0, its value within the base file specified on the command line.

再结合一些简单的例子，大致理解这个宏的用途。当预处理器预处理源码文本时，对于一开始处理的这个cpp.c（本题中源码的名称），其__INCLUDE_LEVEL__为0，当处理到1926行的判断条件时

1	#if __INCLUDE_LEVEL__ > 12

此时__INCLUDE_LEVEL__过小，故跳过核心验证逻辑部分，而转到6216行开始include自身的部分

#else
#if S != -1
#include "cpp.c"
#endif
#if S != -1
#include "cpp.c"
#endif
#endif

如此反复，直到当前include的cpp.c文件的__INCLUDE_LEVEL__层数达到13才进入核心验证逻辑，而不再include自身。

这里为什么要反复include源码文件本身这么多次？~~可能是为了拖时间。~~一种可能的情况是核心逻辑的重复执行会造成一种“累加效应”，以实现校验作用。具体是不是这样需要我们进一步分析下去。

首先考虑到程序有大量嵌套的条件判断，我们最好把代码格式化成可读性更强的形式。这里我写了一个python脚本对源码作缩进处理：

import re

content = []
with open('cpp.c', 'r') as f:
    content = f.readlines()

else_pattern = '#else'

if_patterns = [
    '#if ',
    '#ifdef ',
    '#ifndef '
]

endif_patterns = [
    '#endif'
]

line_content = list(content)

relation = {}
S = 0

level = 0
for i in range(len(content)):
    # print(level)
    
    # 如果当前行是endif，则从当前行开始减少一层缩进
    for endif_pattern in endif_patterns:
        if content[i].find(endif_pattern) != -1:
            level -= 1

    # 在每行开头添加对应数量的'\t'用于缩进，当前行为else时，当前行减少一层缩进
    if content[i].find(else_pattern) != -1:
        line_content[i] = '\t' * (level - 1) + content[i];
    else:
        line_content[i] = '\t' * level + content[i];
    
    # 如果当前行由if开头，则从下一行开始增加一层缩进
    for if_pattern in if_patterns:
        if content[i].find(if_pattern) != -1:
            level += 1

with open('cpp_line.c', 'w') as f:
    f.writelines(line_content)

随便节选一部分代码看看效果，这玩意没缩进看起来还是蛮难受的。。。

...
				#ifdef c
					#define R4
					#undef c
				#endif
			#else
				#ifndef c
					#define R4
					#undef c
				#endif
			#endif
		#else
			#ifndef Z4
				#ifdef c
					#undef R4
					#define c
				#endif
...

继续分析程序逻辑，我们得到几点基本认识：

核心逻辑都被包裹在#if S == number形式的语句块中，
我们可以认为，代码用宏定义的方式，实现了为变量赋值的效果。字母 + 0-7范围内的数字，例如A6，可以认为表示一个8位宽数值变量从LSB到MSB顺序的第7位（即第二高位）。某个“变量”的某一“位”处于被宏定义(例如#define A6)或者未被宏定义(例如#undef A6)的状态，便可以认为是这个变量的第几位是0还是1。
每include一次这个文件，都只能执行一个语句块里的内容，所以需要include多次，累加起来实现程序功能。

先尝试分析得到从某个语句块可以跳转到另外某几个语句块的跳转关系，以使我们对程序有一个整体感受。这里我用了脚本提取+手撸的方法，因为写的脚本判断跳转是根据某个语句块里出现#define S number的情况，但实际上有一些这种语句是没有意义的。

# ...
# 前面部分和处理缩进的脚本是一样的

for i in range(len(content)):
    
    for if_pattern in if_patterns:
        if re.match('#if S == (\w{1,2})', content[i]):
                S = int(re.match('#if S == (\w{1,2})', content[i])[1])

    if re.match('#define S (\w{1,2})', content[i]):
            relation[S] = relation.get(S, [])
            relation[S].append(
                int(re.match('#define S (\w{1,2})', content[i])[1])
            )

with open('parse_relation.txt', 'w') as f:
    for item in relation.items():
        f.write(item.__str__() + '\n')

手动处理完后结果如下，元组中第一个元素表示S等于多少时的语句块，第二个元素对应这个语句块可能跳转到的语句块

(0, [24])
(1, [2])
(2, [3])
(3, [4])
(4, [5])
(5, [6, 38])
(6, [7])
(7, [8, 59])
(8, [9])
(9, [10, 59])
(10, ["BUG"]) // 报错 #error "BUG"
(11, [-1])
(12, [13])
(13, [14])
(14, [15, 22])
(15, [16])
(16, [17])
(17, [18, 19])
(18, [19])
(19, [20])
(20, [21])
(21, [14])
(22, [23])
(23, [1])
(24, [25])
(25, [26])
(26, [27])
(27, [28])
(28, [29])
(29, [30])
(30, [31])
(31, [32, 56])
(32, [33])
(33, [34])
(34, [35])
(35, [36])
(36, [37])
(37, [12])
(38, [39])
(39, [40])
(40, [41])
(41, [42])
(42, [43])
(43, [44])
(44, [45])
(45, [46])
(46, [47])
(47, [48])
(48, [49])
(49, [50])
(50, [51])
(51, [52])
(52, [53])
(53, [54])
(54, [55])
(55, [29])
(56, [57, 58])
(57, ["INVALID_FLAG"]) // 报错 #error "INVALID_FLAG"
(58, [-1])

接下来开始逐个分析每个语句块的功能，从结果来说，根据代码总结可以得到语句块实际上实现的若干种功能：

A = 01010101、A = B(赋值，例子随便举的)
A ^= B(异或等于)
A &= B(与等于)
A |= B(或等于)
A ~= A(取反等于)
A += B(加等于)
if A == value then #define S = number1 else #define S = number2(条件跳转)
``(取ROM)

这里先放张图，是程序完整逻辑的草稿，有一些细节是有问题的，主要是提供一个对程序逻辑的直观印象。

逆向程序逻辑时打的草稿

这里选几种功能来分析对应的代码“模式”

赋值

// 赋值为常量，下面一组语句等效于 B = 00100000

#undef B0
#undef B1
#undef B2
#undef B3
#undef B4
#define B5
#undef B6
#undef B7


// 赋值为变量，下面等效于 A = Y

#ifdef Y0
    #define A0
#else
    #undef A0
#endif
#ifdef Y1
    #define A1
#else
    #undef A1
#endif
#ifdef Y2
    #define A2
#else
    #undef A2
#endif
#ifdef Y3
    #define A3
#else
    #undef A3
#endif
#ifdef Y4
    #define A4
#else
    #undef A4
#endif
#ifdef Y5
    #define A5
#else
    #undef A5
#endif
#ifdef Y6
    #define A6
#else
    #undef A6
#endif
#ifdef Y7
    #define A7
#else
    #undef A7
#endif

异或等于

我们可以基于程序对每一位的操作画一张真值表，对于在代码中直接体现出来的情况，记录操作后的结果；如果某种情况没有出现，则用"keep"表示值不变。

C0	A0	final A0
1	1	0
1	0	1
0	0	keep
0	1	keep

整理发现，这不就是异或运算嘛，所以下面这一段代码就是变量的异或等于运算。

// A ^= C

#ifdef C0
    #ifdef A0
        #undef A0
    #else
        #define A0
    #endif
#endif
#ifdef C1
    #ifdef A1
        #undef A1
    #else
        #define A1
    #endif
#endif
#ifdef C2
    #ifdef A2
        #undef A2
    #else
        #define A2
    #endif
#endif
#ifdef C3
    #ifdef A3
        #undef A3
    #else
        #define A3
    #endif
#endif
#ifdef C4
    #ifdef A4
        #undef A4
    #else
        #define A4
    #endif
#endif
#ifdef C5
    #ifdef A5
        #undef A5
    #else
        #define A5
    #endif
#endif
#ifdef C6
    #ifdef A6
        #undef A6
    #else
        #define A6
    #endif
#endif
#ifdef C7
    #ifdef A7
        #undef A7
    #else
        #define A7
    #endif
#endif

与等于、或等于、取反等于三种操作也可以类似用真值表去判断，这里不作分析。但是提一下加等于，因为这不是简单一位和一位进行位运算，而要涉及到进位规则。最开始我错把其分析成了异或等于运算，重新分析发现是个不知道什么运算，用几个例子推了一下，惊觉怎么和加法一样，然后对了一遍全加器的真值表确定这是加法运算。

加等于

加法是需要进位规则的，代码在加等于部分的代码中引入了变量c，不带数字表示第几位而仅仅是一个字母"c"，用来标志进位(carry)。不同于这道题中出现的其他运算，加等于是唯一在运算过程中不同位之间会互相影响的。

依旧是列出真值表，列出表之后很容易看出这套运算是加法，有一点数字逻辑知识可能会反应更快。虽然画真值表来判断位运算感觉有些繁琐，但确实很可靠。

I0	A0	c	final I0	next c
0	0	1	1	0
0	1	0	1	0
1	0	1	0	1
1	1	0	0	1
0	0	0	keep	keep
0	1	1	keep	keep
1	0	0	keep	keep
1	1	1	keep	keep

非常巧妙，代码里只是列出了计算后I0或c可能变化的4种情况，剩下4种就不用列了。

分析到就加法我们心里就知道，这道题有可能以比较容易理解的算法设计了循环，而我们逆向的时候理解起来也可以舒服点了。

// I += A
// 实际上这段代码就是循环变量 I += 1，使循环得以实现的关键一步

#undef c // c初始化为0
#ifndef I0
    #ifndef A0
        #ifdef c
            #define I0
            #undef c
        #endif
    #else
        #ifndef c
            #define I0
            #undef c
        #endif
    #endif
#else
    #ifndef A0
        #ifdef c
            #undef I0
            #define c
        #endif
    #else
        #ifndef c
            #undef I0
            #define c
        #endif
    #endif
#endif
#ifndef I1
    #ifndef A1
        #ifdef c
            #define I1
            #undef c
        #endif
    #else
        #ifndef c
            #define I1
            #undef c
        #endif
    #endif
#else
    #ifndef A1
        #ifdef c
            #undef I1
            #define c
        #endif
    #else
        #ifndef c
            #undef I1
            #define c
        #endif
    #endif
#endif
#ifndef I2
    #ifndef A2
        #ifdef c
            #define I2
            #undef c
        #endif
    #else
        #ifndef c
            #define I2
            #undef c
        #endif
    #endif
#else
    #ifndef A2
        #ifdef c
            #undef I2
            #define c
        #endif
    #else
        #ifndef c
            #undef I2
            #define c
        #endif
    #endif
#endif
#ifndef I3
    #ifndef A3
        #ifdef c
            #define I3
            #undef c
        #endif
    #else
        #ifndef c
            #define I3
            #undef c
        #endif
    #endif
#else
    #ifndef A3
        #ifdef c
            #undef I3
            #define c
        #endif
    #else
        #ifndef c
            #undef I3
            #define c
        #endif
    #endif
#endif
#ifndef I4
    #ifndef A4
        #ifdef c
            #define I4
            #undef c
        #endif
    #else
        #ifndef c
            #define I4
            #undef c
        #endif
    #endif
#else
    #ifndef A4
        #ifdef c
            #undef I4
            #define c
        #endif
    #else
        #ifndef c
            #undef I4
            #define c
        #endif
    #endif
#endif
#ifndef I5
    #ifndef A5
        #ifdef c
            #define I5
            #undef c
        #endif
    #else
        #ifndef c
            #define I5
            #undef c
        #endif
    #endif
#else
    #ifndef A5
        #ifdef c
            #undef I5
            #define c
        #endif
    #else
        #ifndef c
            #undef I5
            #define c
        #endif
    #endif
#endif
#ifndef I6
    #ifndef A6
        #ifdef c
            #define I6
            #undef c
        #endif
    #else
        #ifndef c
            #define I6
            #undef c
        #endif
    #endif
#else
    #ifndef A6
        #ifdef c
            #undef I6
            #define c
        #endif
    #else
        #ifndef c
            #undef I6
            #define c
        #endif
    #endif
#endif
#ifndef I7
    #ifndef A7
        #ifdef c
            #define I7
            #undef c
        #endif
    #else
        #ifndef c
            #define I7
            #undef c
        #endif
    #endif
#else
    #ifndef A7
        #ifdef c
            #undef I7
            #define c
        #endif
    #else
        #ifndef c
            #undef I7
            #define c
        #endif
    #endif
#endif

条件跳转

很容易理解，看看就好

// if R == 0 then S = 59 else S = 8

#undef S
#define S 8
#ifndef R0
    #ifndef R1
        #ifndef R2
            #ifndef R3
                #ifndef R4
                    #ifndef R5
                        #ifndef R6
                            #ifndef R7
                                #undef S
                                #define S 59
                            #endif
                        #endif
                    #endif
                #endif
            #endif
        #endif
    #endif
#endif

取ROM

这里要结合几个宏定义的“函数”来看，程序利用这几个宏定义实现了从ROM中取值的功能。

#define _LD(x, y) ROM_ ## x ## _ ## y
#define LD(x, y) _LD(x, y)
#define _MA(l0, l1, l2, l3, l4, l5, l6, l7) l7 ## l6 ## l5 ## l4 ## l3 ## l2 ## l1 ## l0
#define MA(l0, l1, l2, l3, l4, l5, l6, l7) _MA(l0, l1, l2, l3, l4, l5, l6, l7)
#define l MA(l0, l1, l2, l3, l4, l5, l6, l7)

举一段例子来分析

/*
LD(l, 3) = LD(MA(l0, l1, l2, l3, l4, l5, l6, l7), 3)
	= LD(_MA(l0, l1, l2, l3, l4, l5, l6, l7), 3)
	= LD(l7 ## l6 ## l5 ## l4 ## l3 ## l2 ## l1 ## l0, 3)
	= _LD(l7 ## l6 ## l5 ## l4 ## l3 ## l2 ## l1 ## l0, 3)
	= ROM_ ## l7 ## l6 ## l5 ## l4 ## l3 ## l2 ## l1 ## l0 ## _ ## 3	
*/


#if LD(l, 3)
    #define B3
#else
    #undef B3

/*
设 l = 10000000
则 LD(l, 3) = ROM_10000000_3

#if ROM_10000000_3
	#define B3
#else
    #undef B3
*/

一组完整的例子如下

// l = I

#undef l0
#ifdef I0
    #define l0 1
#else
    #define l0 0
#endif
#undef l1
#ifdef I1
    #define l1 1
#else
    #define l1 0
#endif
#undef l2
#ifdef I2
    #define l2 1
#else
    #define l2 0
#endif
#undef l3
#ifdef I3
    #define l3 1
#else
    #define l3 0
#endif
#undef l4
#ifdef I4
    #define l4 1
#else
    #define l4 0
#endif
#undef l5
#ifdef I5
    #define l5 1
#else
    #define l5 0
#endif
#undef l6
#ifdef I6
    #define l6 1
#else
    #define l6 0
#endif
#undef l7
#ifdef I7
    #define l7 1
#else
    #define l7 0
#endif


// B = ROM(l), l为地址，取ROM中这个地址内的值

#if LD(l, 0)
    #define B0
#else
    #undef B0
#endif
#if LD(l, 1)
    #define B1
#else
    #undef B1
#endif
#if LD(l, 2)
    #define B2
#else
    #undef B2
#endif
#if LD(l, 3)
    #define B3
#else
    #undef B3
#endif
#if LD(l, 4)
    #define B4
#else
    #undef B4
#endif
#if LD(l, 5)
    #define B5
#else
    #undef B5
#endif
#if LD(l, 6)
    #define B6
#else
    #undef B6
#endif
#if LD(l, 7)
    #define B7
#else
    #undef B7
#endif

到这里就整理出了各个语句块的具体功能，接下来我们还要对这些功能进行整理合并，还原得到算法，这里我还原后的C实现如下

#include<stdio.h>

// 下面3段常量是程序里用到的3处ROM片段，注释后是片段基址
const char base1[26] = { // 00000000
    187, 85, 171, 197, 185, 157, 201, 105, 187, 55, 217, 205, 33, 
    179, 207, 207, 159, 9, 181, 61, 235, 127, 87, 161, 235, 135
};
const char base2[26] = { // 00100000
    8, 100, 100, 53, 145, 100, 231, 160, 6, 170, 221, 117, 23, 
    157, 109, 92, 94, 25, 253, 233, 12, 249, 180, 131, 134, 34
};
const char base3[26] = { // 01000000
    250, 123, 27, 186, 30, 180, 179, 88, 198, 243, 140, 144, 59, 
    186, 25, 110, 206, 223, 241, 37, 141, 64, 128, 112, 224, 77 
};
const char flag[26]; // 10000000

int main() {

    char I = 0, M = 0, N = 1, P = 0, Q = 0;

    char A, B, C;

    while(I != 26) {

        A = flag[I];
        B = base1[I];

        char X = 1, Y = 0;
        while(X != 0) {
            if(B & X)
                Y += A;
            X <<= 1;
            A <<= 1;
        }
        A = Y;

        N = M + N; // N_next = M + N
        M = N - M; // M_next = N
        A += M;

        C = base2[I];
        
        A ^= C;
        
        P += A;

        A = base3[I];

        A ^= P;

        // EVERY Time Here A MUST Be 0!!! So We Can Crack Flag Byte By Byte!!! 
        Q |= A; 

        I += 1;
    }

    if(Q == 0)
        printf("Correct!\n");
    else
        printf("Invalid flag!\n");

    return 0;
}

其中，ROM的值是拿脚本提取出来的

import re

content = []
with open('cpp.c', 'r') as f:
    content = f.readlines()

value_map = {}

for line in content:

    p = '#define ROM_(0[0-1]{7})_([0-7]) ([0-1])'

    result = re.match(p, line)
    
    if result:

        # print(type(result[3]), type(value_map.get(result[1], '')), result[1])
        value_map[result[1]] = result[3] + value_map.get(result[1], '')

        if result[2] == '7':

            value_map[result[1]] = int(value_map[result[1]], 2)

with open('rom.txt', 'w') as f:

    for item in value_map.items():

        f.write(f'{item[0]}: {item[1]}\n')

在逆向算法的过程中，我们应当注意到的非常重要的一点是，只要key的任何一位不满足条件，用于判断key正确与否的关键变量Q都会在这一位对应的一轮循环中不可逆地标记key为错误的（具体是因为Q |= A这个运算），这意味着key是可以逐位爆破的。这里我基于上面的c代码小改了一下用于爆破：

#include<stdio.h>

const char dict[65] = {
    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 
    'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 
    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 
    'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 
    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '{', '}', '_'
};

const char base1[26] = { // 00000000
    187, 85, 171, 197, 185, 157, 201, 105, 187, 55, 217, 205, 33, 
    179, 207, 207, 159, 9, 181, 61, 235, 127, 87, 161, 235, 135
};
const char base2[26] = { // 00100000
    8, 100, 100, 53, 145, 100, 231, 160, 6, 170, 221, 117, 23, 
    157, 109, 92, 94, 25, 253, 233, 12, 249, 180, 131, 134, 34
};
const char base3[26] = { // 01000000
    250, 123, 27, 186, 30, 180, 179, 88, 198, 243, 140, 144, 59, 
    186, 25, 110, 206, 223, 241, 37, 141, 64, 128, 112, 224, 77 
};
char flag[26]; // 10000000

int main() {

    for(int flag_index = 0; flag_index != 26; flag_index ++) {

        int cracked_flag = 0;
        for(int dict_index = 0; !cracked_flag; dict_index++) {

            flag[flag_index] = dict[dict_index]; 

            char I = 0, M = 0, N = 1, P = 0, Q = 0;

            char A, B, C;

            while(I != 26) {

                A = flag[I];
                B = base1[I];

                char X = 1, Y = 0;
                while(X != 0) {
                    if(B & X)
                        Y += A;
                    X <<= 1;
                    A <<= 1;
                }
                A = Y;

                N = M + N; // N_ = M + N
                M = N - M; // M_ = N

                A += M;

                C = base2[I];

                A ^= C;

                P += A;

                A = base3[I];

                A ^= P;

                // EVERY Time Here A MUST Be 0!!! So We Can Crack Flag Byte By Byte!!! 
                Q |= A; 

                if(I == flag_index) {
                    if(!Q) 
                        cracked_flag = 1;
                    break;
                }

                I += 1;
            }
        }
    }

    printf("The flag will be printed in the next line: \n");
    printf("%s", flag);

    return 0;
}

爆破结果为CTF{pr3pr0cess0r_pr0fe5sor，加上}得到正确flag。

Misc

filestore

推荐题解

Reverse

cpp