PE指的是windows下的32位可执行文件,又叫PE32。64位的可执行文件成为PE+或者PE32+

PE拓展名如下:(摘自逆向工程核心原理13.2)

种类 主扩展名
可执行系列 EXE、SCR
驱动程序系列 SYS、VSD
库系列 DLL、OCX、CPL、DRV
对象文件系列 OBJ

众所周知,文件拓展名只是影响通过什么东西去打开这个文件,或者怎样解析这个文件,和文件本身没有关系,因此主要作用就是了解一下(

VA&RVA

VA就是Virtual Address,就是我们日常说的虚拟地址,程序运行的时候使用的地址,RVARelative Virtual Address,从某个基地址开始的地址,关系如下
VA = ImageBase+RVA,PE头的信息大多以RVA形式给出

PE文件结构

在这里使用MinGW编译一个很简单的程序

1
2
3
4
5
#include <stdio.h>
int main(){
printf("PE format!\n");
return 0;
}

编译后的二进制文件如下:

维基百科中PE32结构如下

可以在Windows Kit里面的winnt.h中查看具体结构
大致分为:

  • MS-DOS 头
  • MS-DOS 存根
  • PE 文件头
  • 节表
  • 节数据
  • 调试信息

1.DOS头

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
typedef struct _IMAGE_DOS_HEADER {      // DOS .EXE header
WORD e_magic; // Magic number
WORD e_cblp; // Bytes on last page of file
WORD e_cp; // Pages in file
WORD e_crlc; // Relocations
WORD e_cparhdr; // Size of header in paragraphs
WORD e_minalloc; // Minimum extra paragraphs needed
WORD e_maxalloc; // Maximum extra paragraphs needed
WORD e_ss; // Initial (relative) SS value
WORD e_sp; // Initial SP value
WORD e_csum; // Checksum
WORD e_ip; // Initial IP value
WORD e_cs; // Initial (relative) CS value
WORD e_lfarlc; // File address of relocation table
WORD e_ovno; // Overlay number
WORD e_res[4]; // Reserved words
WORD e_oemid; // OEM identifier (for e_oeminfo)
WORD e_oeminfo; // OEM information; e_oemid specific
WORD e_res2[10]; // Reserved words
LONG e_lfanew; // File address of new exe header
} IMAGE_DOS_HEADER, *PIMAGE_DOS_HEADER;

该结构体大小40字节(18*WORD+LONG)重点是e_magice_lfanew,前者是MZ,DOS头魔数,后一个e_lfanew指向了新的exe头的文件偏移,从我们编译出来的文件来看:

第37-40字节的值(小端序)0x80指向PE头。

2.DOS存根

DOS存根就是DOS头下的部分,我们将除了字符串的部分进行反汇编

1
2
3
4
5
6
7
0x0000000000000000:  0E          push cs
0x0000000000000001: 1F pop ds
0x0000000000000002: BA 0E 00 mov dx, 0xe
0x0000000000000005: B4 09 mov ah, 9
0x0000000000000007: CD 21 int 0x21 //print
0x0000000000000009: B8 01 4C mov ax, 0x4c01
0x000000000000000c: CD 21 int 0x21 //exit

可以看到,他把cs移动到ds,把0xe(字符串地址)移动到dx,调用系统调用打印字符串,之后调用exit系统调用退出

3.PE头

1
2
3
4
5
6
7
8
9
10
11
typedef struct _IMAGE_NT_HEADERS64 {
DWORD Signature; //签名 PE 00
IMAGE_FILE_HEADER FileHeader; //PE头
IMAGE_OPTIONAL_HEADER64 OptionalHeader; //拓展PE头
} IMAGE_NT_HEADERS64, *PIMAGE_NT_HEADERS64;

typedef struct _IMAGE_NT_HEADERS {
DWORD Signature; //签名 PE 00
IMAGE_FILE_HEADER FileHeader; //PE头
IMAGE_OPTIONAL_HEADER32 OptionalHeader; //拓展PE头
} IMAGE_NT_HEADERS32, *PIMAGE_NT_HEADERS32;

PE头结构如上,其中IMAGE_FILE_HEADER结构如下

1
2
3
4
5
6
7
8
9
typedef struct _IMAGE_FILE_HEADER {
WORD Machine; //运行的平台
WORD NumberOfSections; //节的数量
DWORD TimeDateStamp; //编译器时间戳
DWORD PointerToSymbolTable; //符号表指针
DWORD NumberOfSymbols; //符号数量
WORD SizeOfOptionalHeader; //拓展PE头大小
WORD Characteristics; //文件属性
} IMAGE_FILE_HEADER, *PIMAGE_FILE_HEADER;
  • Machine 可以在什么机器上运行
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    #define IMAGE_FILE_MACHINE_UNKNOWN           0
    #define IMAGE_FILE_MACHINE_TARGET_HOST 0x0001 // Useful for indicating we want to interact with the host and not a WoW guest.
    #define IMAGE_FILE_MACHINE_I386 0x014c // Intel 386.
    #define IMAGE_FILE_MACHINE_R3000 0x0162 // MIPS little-endian, 0x160 big-endian
    #define IMAGE_FILE_MACHINE_R4000 0x0166 // MIPS little-endian
    #define IMAGE_FILE_MACHINE_R10000 0x0168 // MIPS little-endian
    #define IMAGE_FILE_MACHINE_WCEMIPSV2 0x0169 // MIPS little-endian WCE v2
    #define IMAGE_FILE_MACHINE_ALPHA 0x0184 // Alpha_AXP
    #define IMAGE_FILE_MACHINE_SH3 0x01a2 // SH3 little-endian
    #define IMAGE_FILE_MACHINE_SH3DSP 0x01a3
    #define IMAGE_FILE_MACHINE_SH3E 0x01a4 // SH3E little-endian
    #define IMAGE_FILE_MACHINE_SH4 0x01a6 // SH4 little-endian
    #define IMAGE_FILE_MACHINE_SH5 0x01a8 // SH5
    #define IMAGE_FILE_MACHINE_ARM 0x01c0 // ARM Little-Endian
    #define IMAGE_FILE_MACHINE_THUMB 0x01c2 // ARM Thumb/Thumb-2 Little-Endian
    #define IMAGE_FILE_MACHINE_ARMNT 0x01c4 // ARM Thumb-2 Little-Endian
    #define IMAGE_FILE_MACHINE_AM33 0x01d3
    #define IMAGE_FILE_MACHINE_POWERPC 0x01F0 // IBM PowerPC Little-Endian
    #define IMAGE_FILE_MACHINE_POWERPCFP 0x01f1
    #define IMAGE_FILE_MACHINE_IA64 0x0200 // Intel 64
    #define IMAGE_FILE_MACHINE_MIPS16 0x0266 // MIPS
    #define IMAGE_FILE_MACHINE_ALPHA64 0x0284 // ALPHA64
    #define IMAGE_FILE_MACHINE_MIPSFPU 0x0366 // MIPS
    #define IMAGE_FILE_MACHINE_MIPSFPU16 0x0466 // MIPS
    #define IMAGE_FILE_MACHINE_AXP64 IMAGE_FILE_MACHINE_ALPHA64
    #define IMAGE_FILE_MACHINE_TRICORE 0x0520 // Infineon
    #define IMAGE_FILE_MACHINE_CEF 0x0CEF
    #define IMAGE_FILE_MACHINE_EBC 0x0EBC // EFI Byte Code
    #define IMAGE_FILE_MACHINE_AMD64 0x8664 // AMD64 (K8)
    #define IMAGE_FILE_MACHINE_M32R 0x9041 // M32R little-endian
    #define IMAGE_FILE_MACHINE_ARM64 0xAA64 // ARM64 Little-Endian
    #define IMAGE_FILE_MACHINE_CEE 0xC0EE
  • Characteristics 文件属性
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    #define IMAGE_FILE_RELOCS_STRIPPED           0x0001  // Relocation info stripped from file.
    #define IMAGE_FILE_EXECUTABLE_IMAGE 0x0002 // File is executable (i.e. no unresolved external references).
    #define IMAGE_FILE_LINE_NUMS_STRIPPED 0x0004 // Line nunbers stripped from file.
    #define IMAGE_FILE_LOCAL_SYMS_STRIPPED 0x0008 // Local symbols stripped from file.
    #define IMAGE_FILE_AGGRESIVE_WS_TRIM 0x0010 // Aggressively trim working set
    #define IMAGE_FILE_LARGE_ADDRESS_AWARE 0x0020 // App can handle >2gb addresses
    #define IMAGE_FILE_BYTES_REVERSED_LO 0x0080 // Bytes of machine word are reversed.
    #define IMAGE_FILE_32BIT_MACHINE 0x0100 // 32 bit word machine.
    #define IMAGE_FILE_DEBUG_STRIPPED 0x0200 // Debugging info stripped from file in .DBG file
    #define IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP 0x0400 // If Image is on removable media, copy and run from the swap file.
    #define IMAGE_FILE_NET_RUN_FROM_SWAP 0x0800 // If Image is on Net, copy and run from the swap file.
    #define IMAGE_FILE_SYSTEM 0x1000 // System File.
    #define IMAGE_FILE_DLL 0x2000 // File is a DLL.
    #define IMAGE_FILE_UP_SYSTEM_ONLY 0x4000 // File should only be run on a UP machine
    #define IMAGE_FILE_BYTES_REVERSED_HI 0x8000 // Bytes of machine word are reversed.
    该文件属性为27 000x27 = 0b0010 0111,对应IMAGE_FILE_RELOCS_STRIPPED, IMAGE_FILE_EXECUTABLE_IMAGE, IMAGE_FILE_LINE_NUMS_STRIPPED, IMAGE_FILE_LARGE_ADDRESS_AWARE

PE拓展头

PE32的拓展头如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
typedef struct _IMAGE_OPTIONAL_HEADER {
//
// Standard fields.
//

WORD Magic; //PE32: 10B PE64: 20B
BYTE MajorLinkerVersion;
BYTE MinorLinkerVersion;
DWORD SizeOfCode;
DWORD SizeOfInitializedData;
DWORD SizeOfUninitializedData;
DWORD AddressOfEntryPoint; //程序入口RVA
DWORD BaseOfCode; //数据段RVA
DWORD BaseOfData; //数据段RVA

//
// NT additional fields.
//

DWORD ImageBase; //镜像基址
DWORD SectionAlignment; //内存对齐
DWORD FileAlignment; //文件对齐
WORD MajorOperatingSystemVersion; //版本号相关,以下同
WORD MinorOperatingSystemVersion;
WORD MajorImageVersion;
WORD MinorImageVersion;
WORD MajorSubsystemVersion;
WORD MinorSubsystemVersion;
DWORD Win32VersionValue;
DWORD SizeOfImage; //镜像映射到内存中的大小,必须是内存对齐的整数倍
DWORD SizeOfHeaders; //MS DOS 存根、PE 标头和节标头的组合大小,其向上舍入到 FileAlignment 的倍数。
DWORD CheckSum;
WORD Subsystem; //运行所需子系统
WORD DllCharacteristics; //DLL特征
DWORD SizeOfStackReserve; //保留栈大小
DWORD SizeOfStackCommit; //提交栈大小
DWORD SizeOfHeapReserve; //保留堆大小
DWORD SizeOfHeapCommit; //提交堆大小
DWORD LoaderFlags;
DWORD NumberOfRvaAndSizes; //DataDirectory个数
IMAGE_DATA_DIRECTORY DataDirectory[IMAGE_NUMBEROF_DIRECTORY_ENTRIES]; //可选标头数据目录
} IMAGE_OPTIONAL_HEADER32, *PIMAGE_OPTIONAL_HEADER32;

  • subsystem
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    #define IMAGE_SUBSYSTEM_UNKNOWN              0   // Unknown subsystem.
    #define IMAGE_SUBSYSTEM_NATIVE 1 // Image doesn't require a subsystem.
    #define IMAGE_SUBSYSTEM_WINDOWS_GUI 2 // Image runs in the Windows GUI subsystem.
    #define IMAGE_SUBSYSTEM_WINDOWS_CUI 3 // Image runs in the Windows character subsystem.
    #define IMAGE_SUBSYSTEM_OS2_CUI 5 // image runs in the OS/2 character subsystem.
    #define IMAGE_SUBSYSTEM_POSIX_CUI 7 // image runs in the Posix character subsystem.
    #define IMAGE_SUBSYSTEM_NATIVE_WINDOWS 8 // image is a native Win9x driver.
    #define IMAGE_SUBSYSTEM_WINDOWS_CE_GUI 9 // Image runs in the Windows CE subsystem.
    #define IMAGE_SUBSYSTEM_EFI_APPLICATION 10 //
    #define IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER 11 //
    #define IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER 12 //
    #define IMAGE_SUBSYSTEM_EFI_ROM 13
    #define IMAGE_SUBSYSTEM_XBOX 14
    #define IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION 16
    #define IMAGE_SUBSYSTEM_XBOX_CODE_CATALOG 17
    该程序为WINDOWS_CUI即命令行程序
    DataDirectory中包含导出表,导入表,资源表,证书表等一系列东西链接

文件对其为512字节(0x200)
文件头(DOS头+PE头+节表)开始在文件便宜0的位置,结束于0x400-1,大小0x400即1024,红色部分是对齐的填充

计算可知,文件头区大小为

4.节区头

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
typedef struct _IMAGE_SECTION_HEADER {
BYTE Name[IMAGE_SIZEOF_SHORT_NAME]; //节名称
union {
DWORD PhysicalAddress;
DWORD VirtualSize; //加载到内存中的大小
} Misc;
DWORD VirtualAddress; //镜像RVA
DWORD SizeOfRawData; //文件中节的大小
DWORD PointerToRawData; //节在文件中的偏移
DWORD PointerToRelocations;
DWORD PointerToLinenumbers; //指向表示行号的东西,调试相关
WORD NumberOfRelocations;
WORD NumberOfLinenumbers;
DWORD Characteristics; //节的属性
} IMAGE_SECTION_HEADER, *PIMAGE_SECTION_HEADER;

节区头大小为40字节,注意,节名称没有规定,数据段也可以叫.text,在我的电脑中,Characteristics如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
//
// Section characteristics.
//
// IMAGE_SCN_TYPE_REG 0x00000000 // Reserved.
// IMAGE_SCN_TYPE_DSECT 0x00000001 // Reserved.
// IMAGE_SCN_TYPE_NOLOAD 0x00000002 // Reserved.
// IMAGE_SCN_TYPE_GROUP 0x00000004 // Reserved.
#define IMAGE_SCN_TYPE_NO_PAD 0x00000008 // Reserved.
// IMAGE_SCN_TYPE_COPY 0x00000010 // Reserved.

#define IMAGE_SCN_CNT_CODE 0x00000020 // Section contains code.
#define IMAGE_SCN_CNT_INITIALIZED_DATA 0x00000040 // Section contains initialized data.
#define IMAGE_SCN_CNT_UNINITIALIZED_DATA 0x00000080 // Section contains uninitialized data.

#define IMAGE_SCN_LNK_OTHER 0x00000100 // Reserved.
#define IMAGE_SCN_LNK_INFO 0x00000200 // Section contains comments or some other type of information.
// IMAGE_SCN_TYPE_OVER 0x00000400 // Reserved.
#define IMAGE_SCN_LNK_REMOVE 0x00000800 // Section contents will not become part of image.
#define IMAGE_SCN_LNK_COMDAT 0x00001000 // Section contents comdat.
// 0x00002000 // Reserved.
// IMAGE_SCN_MEM_PROTECTED - Obsolete 0x00004000
#define IMAGE_SCN_NO_DEFER_SPEC_EXC 0x00004000 // Reset speculative exceptions handling bits in the TLB entries for this section.
#define IMAGE_SCN_GPREL 0x00008000 // Section content can be accessed relative to GP
#define IMAGE_SCN_MEM_FARDATA 0x00008000
// IMAGE_SCN_MEM_SYSHEAP - Obsolete 0x00010000
#define IMAGE_SCN_MEM_PURGEABLE 0x00020000
#define IMAGE_SCN_MEM_16BIT 0x00020000
#define IMAGE_SCN_MEM_LOCKED 0x00040000
#define IMAGE_SCN_MEM_PRELOAD 0x00080000

#define IMAGE_SCN_ALIGN_1BYTES 0x00100000 //
#define IMAGE_SCN_ALIGN_2BYTES 0x00200000 //
#define IMAGE_SCN_ALIGN_4BYTES 0x00300000 //
#define IMAGE_SCN_ALIGN_8BYTES 0x00400000 //
#define IMAGE_SCN_ALIGN_16BYTES 0x00500000 // Default alignment if no others are specified.
#define IMAGE_SCN_ALIGN_32BYTES 0x00600000 //
#define IMAGE_SCN_ALIGN_64BYTES 0x00700000 //
#define IMAGE_SCN_ALIGN_128BYTES 0x00800000 //
#define IMAGE_SCN_ALIGN_256BYTES 0x00900000 //
#define IMAGE_SCN_ALIGN_512BYTES 0x00A00000 //
#define IMAGE_SCN_ALIGN_1024BYTES 0x00B00000 //
#define IMAGE_SCN_ALIGN_2048BYTES 0x00C00000 //
#define IMAGE_SCN_ALIGN_4096BYTES 0x00D00000 //
#define IMAGE_SCN_ALIGN_8192BYTES 0x00E00000 //
// Unused 0x00F00000
#define IMAGE_SCN_ALIGN_MASK 0x00F00000

#define IMAGE_SCN_LNK_NRELOC_OVFL 0x01000000 // Section contains extended relocations.
#define IMAGE_SCN_MEM_DISCARDABLE 0x02000000 // Section can be discarded.
#define IMAGE_SCN_MEM_NOT_CACHED 0x04000000 // Section is not cachable.
#define IMAGE_SCN_MEM_NOT_PAGED 0x08000000 // Section is not pageable.
#define IMAGE_SCN_MEM_SHARED 0x10000000 // Section is shareable.
#define IMAGE_SCN_MEM_EXECUTE 0x20000000 // Section is executable.
#define IMAGE_SCN_MEM_READ 0x40000000 // Section is readable.
#define IMAGE_SCN_MEM_WRITE 0x80000000 // Section is writeable.

//
// TLS Characteristic Flags
//
#define IMAGE_SCN_SCALE_INDEX 0x00000001 // Tls index is scaled

之后就都是节数据了

PE从一个盘加载到内存

网上看了一对感觉将的迷迷糊糊的,我自己写吧还是(也有可能是我笨),首先看《逆向工程核心原理》给出的公式
RAW = 文件中的偏移

1
RAW = RVA - VirtualAddress + PointerToRawData

本质上来说就是(VA-ImageBase)(这块就是RVA)-VirtualAddress+PointerToRawData,一个起点是PointerToRawData,一个起点是ImageBase+VirtualAddress,以这个程序为例。
text段的数据如下

1
2
VirtualAddress   = 0x1000
PointerToRawData = 0x400

使用调试器打开程序查看(这里使用的是x64dbg)

可以看到0x401000内容如下(ImageBase=0x400000),对应文件内容地址0x400开始

可以看到完全能对应上