Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e94ec7b

Browse files
authored
Fix imphash issue with empty import names. (#1944)
* Fix imphash issue with empty import names. If an import has an empty name skip processing it. This is consistent with the behavior of pefile (https://github.com/erocarrera/pefile/blob/593d094e35198dad92aaf040bef17eb800c8a373/pefile.py#L5871-L5872). Add a test case which is just the tiny test file with the first import name set to all NULL bytes. I tested that pefile calculates the imphash of it, which matches what YARA now calculates too: >>> import pefile >>> pe = pefile.PE('/Users/wxs/src/yara/tests/data/tiny_empty_import_name') >>> pe.get_imphash() '0eff3a0eb037af8c1ef0bada984d6af5' >>> Fixes #1943 * Add test file forgot in last commit. * Handle invalid import names. If an imported function does not contain ONLY a-zA-Z0-9 and a small subset of special characters it will now be ignored. This aligns us better with pefile, which checks for valid import names and skips them if they are invalid. I've also updated the test file to check that these special characters are handled properly. * Fix test after alignment with pefile. Turns out that the tiny-idata-5200 file is corrupted to the point that pefile doesn't parse it. For example, it finds no imports to hash: ``` >>> pe = pefile.PE('tests/data/tiny-idata-5200') >>> pe.get_imphash() '' >>> ``` We were parsing imports from this file before these changes that were incorrect, so fix the tests to reflect the fact that we parse no imports from this file anymore. As part of this I've split the checks for number of parsed imports and successfully parsed imports into two different counters, which now means we are accurately reflecting when we are able to parse the import table but not the descriptors in it while still making sure we don't parse too many as we have seen before. * Move declarations to align better. Move these declaractions so they are with the rest of the constants. This makes the output of -D easier to read. * Fix memory leak when handling corrupt imports. If we have an invalid import name we need to free the name and continue to the next thunk and function. While fixing this I noticed that if we fail to alloc an IMPORT_FUNCTION* we would end up looping endlessly because we were never incrementing the thunk pointer or function index. Fix it by ALWAYS incrementing those at the end of the loop and conditionally populating the newly allocated node.
1 parent bc0f052 commit e94ec7b

3 files changed

Lines changed: 106 additions & 108 deletions

File tree

libyara/modules/pe/pe.c

Lines changed: 93 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -326,8 +326,8 @@ static void pe_parse_debug_directory(PE* pe)
326326
{
327327
int64_t pcv_hdr_offset = 0;
328328

329-
debug_dir =
330-
(PIMAGE_DEBUG_DIRECTORY) (pe->data + debug_dir_offset + i * sizeof(IMAGE_DEBUG_DIRECTORY));
329+
debug_dir = (PIMAGE_DEBUG_DIRECTORY) (pe->data + debug_dir_offset +
330+
i * sizeof(IMAGE_DEBUG_DIRECTORY));
331331

332332
if (!struct_fits_in_pe(pe, debug_dir, IMAGE_DEBUG_DIRECTORY))
333333
break;
@@ -409,7 +409,8 @@ static const PIMAGE_RESOURCE_DIR_STRING_U parse_resource_name(
409409
if (yr_le32toh(entry->Name) & 0x80000000)
410410
{
411411
const PIMAGE_RESOURCE_DIR_STRING_U pNameString =
412-
(PIMAGE_RESOURCE_DIR_STRING_U) (rsrc_data + (yr_le32toh(entry->Name) & 0x7FFFFFFF));
412+
(PIMAGE_RESOURCE_DIR_STRING_U) (rsrc_data +
413+
(yr_le32toh(entry->Name) & 0x7FFFFFFF));
413414

414415
// A resource directory string is 2 bytes for the length and then a variable
415416
// length Unicode string. Make sure we have at least 2 bytes.
@@ -419,7 +420,9 @@ static const PIMAGE_RESOURCE_DIR_STRING_U parse_resource_name(
419420

420421
// Move past the length and make sure we have enough bytes for the string.
421422
if (!fits_in_pe(
422-
pe, pNameString, sizeof(uint16_t) + yr_le16toh(pNameString->Length) * 2))
423+
pe,
424+
pNameString,
425+
sizeof(uint16_t) + yr_le16toh(pNameString->Length) * 2))
423426
return NULL;
424427

425428
return pNameString;
@@ -806,6 +809,25 @@ static int pe_collect_resources(
806809
return RESOURCE_CALLBACK_CONTINUE;
807810
}
808811

812+
// Function names should have only lowercase, uppercase, digits and a small
813+
// subset of special characters. This is to match behavior of pefile. See
814+
// https://github.com/erocarrera/pefile/blob/593d094e35198dad92aaf040bef17eb800c8a373/pefile.py#L2326-L2348
815+
static int valid_function_name(char* name)
816+
{
817+
if (!strcmp(name, ""))
818+
return 0;
819+
820+
size_t i = 0;
821+
for (char c = name[i]; c != '\x00'; c = name[++i])
822+
{
823+
if (!(c >= 'a' && c <= 'z') && !(c >= 'A' && c <= 'Z') &&
824+
!(c >= '0' && c <= '9') && c != '.' && c != '_' && c != '?' &&
825+
c != '@' && c != '$' && c != '(' && c != ')' && c != '<' && c != '>')
826+
return 0;
827+
}
828+
return 1;
829+
}
830+
809831
static IMPORT_FUNCTION* pe_parse_import_descriptor(
810832
PE* pe,
811833
PIMAGE_IMPORT_DESCRIPTOR import_descriptor,
@@ -814,6 +836,11 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor(
814836
{
815837
IMPORT_FUNCTION* head = NULL;
816838
IMPORT_FUNCTION* tail = NULL;
839+
// This is tracked separately from num_function_imports because that is the
840+
// number of successfully parsed imports, while this is the number of imports
841+
// attempted to be parsed. This allows us to stop parsing on too many imports
842+
// while still accurately recording the number of successfully parsed imports.
843+
int parsed_imports = 0;
817844

818845
int64_t offset = pe_rva_to_offset(
819846
pe, yr_le32toh(import_descriptor->OriginalFirstThunk));
@@ -834,22 +861,24 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor(
834861

835862
while (struct_fits_in_pe(pe, thunks64, IMAGE_THUNK_DATA64) &&
836863
yr_le64toh(thunks64->u1.Ordinal) != 0 &&
837-
*num_function_imports < MAX_PE_IMPORTS)
864+
parsed_imports < MAX_PE_IMPORTS)
838865
{
839866
char* name = NULL;
840867
uint16_t ordinal = 0;
841868
uint8_t has_ordinal = 0;
842869
uint64_t rva_address = 0;
843870

871+
parsed_imports++;
872+
844873
if (!(yr_le64toh(thunks64->u1.Ordinal) & IMAGE_ORDINAL_FLAG64))
845874
{
846875
// If imported by name
847876
offset = pe_rva_to_offset(pe, yr_le64toh(thunks64->u1.Function));
848877

849878
if (offset >= 0)
850879
{
851-
PIMAGE_IMPORT_BY_NAME import =
852-
(PIMAGE_IMPORT_BY_NAME) (pe->data + offset);
880+
PIMAGE_IMPORT_BY_NAME import = (PIMAGE_IMPORT_BY_NAME) (pe->data +
881+
offset);
853882

854883
if (struct_fits_in_pe(pe, import, IMAGE_IMPORT_BY_NAME))
855884
{
@@ -871,6 +900,14 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor(
871900
rva_address = yr_le32toh(import_descriptor->FirstThunk) +
872901
(sizeof(uint64_t) * func_idx);
873902

903+
if (name != NULL && !valid_function_name(name))
904+
{
905+
yr_free(name);
906+
thunks64++;
907+
func_idx++;
908+
continue;
909+
}
910+
874911
if (name != NULL || has_ordinal == 1)
875912
{
876913
IMPORT_FUNCTION* imported_func = (IMPORT_FUNCTION*) yr_calloc(
@@ -879,25 +916,26 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor(
879916
if (imported_func == NULL)
880917
{
881918
yr_free(name);
882-
continue;
883919
}
920+
else
921+
{
922+
imported_func->name = name;
923+
imported_func->ordinal = ordinal;
924+
imported_func->has_ordinal = has_ordinal;
925+
imported_func->rva = rva_address;
926+
imported_func->next = NULL;
884927

885-
imported_func->name = name;
886-
imported_func->ordinal = ordinal;
887-
imported_func->has_ordinal = has_ordinal;
888-
imported_func->rva = rva_address;
889-
imported_func->next = NULL;
890-
891-
if (head == NULL)
892-
head = imported_func;
928+
if (head == NULL)
929+
head = imported_func;
893930

894-
if (tail != NULL)
895-
tail->next = imported_func;
931+
if (tail != NULL)
932+
tail->next = imported_func;
896933

897-
tail = imported_func;
934+
tail = imported_func;
935+
(*num_function_imports)++;
936+
}
898937
}
899938

900-
(*num_function_imports)++;
901939
thunks64++;
902940
func_idx++;
903941
}
@@ -916,15 +954,17 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor(
916954
uint8_t has_ordinal = 0;
917955
uint32_t rva_address = 0;
918956

957+
parsed_imports++;
958+
919959
if (!(yr_le32toh(thunks32->u1.Ordinal) & IMAGE_ORDINAL_FLAG32))
920960
{
921961
// If imported by name
922962
offset = pe_rva_to_offset(pe, yr_le32toh(thunks32->u1.Function));
923963

924964
if (offset >= 0)
925965
{
926-
PIMAGE_IMPORT_BY_NAME import =
927-
(PIMAGE_IMPORT_BY_NAME) (pe->data + offset);
966+
PIMAGE_IMPORT_BY_NAME import = (PIMAGE_IMPORT_BY_NAME) (pe->data +
967+
offset);
928968

929969
if (struct_fits_in_pe(pe, import, IMAGE_IMPORT_BY_NAME))
930970
{
@@ -946,6 +986,14 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor(
946986
rva_address = yr_le32toh(import_descriptor->FirstThunk) +
947987
(sizeof(uint32_t) * func_idx);
948988

989+
if (name != NULL && !valid_function_name(name))
990+
{
991+
yr_free(name);
992+
thunks32++;
993+
func_idx++;
994+
continue;
995+
}
996+
949997
if (name != NULL || has_ordinal == 1)
950998
{
951999
IMPORT_FUNCTION* imported_func = (IMPORT_FUNCTION*) yr_calloc(
@@ -954,25 +1002,26 @@ static IMPORT_FUNCTION* pe_parse_import_descriptor(
9541002
if (imported_func == NULL)
9551003
{
9561004
yr_free(name);
957-
continue;
9581005
}
1006+
else
1007+
{
1008+
imported_func->name = name;
1009+
imported_func->ordinal = ordinal;
1010+
imported_func->has_ordinal = has_ordinal;
1011+
imported_func->rva = rva_address;
1012+
imported_func->next = NULL;
9591013

960-
imported_func->name = name;
961-
imported_func->ordinal = ordinal;
962-
imported_func->has_ordinal = has_ordinal;
963-
imported_func->rva = rva_address;
964-
imported_func->next = NULL;
965-
966-
if (head == NULL)
967-
head = imported_func;
1014+
if (head == NULL)
1015+
head = imported_func;
9681016

969-
if (tail != NULL)
970-
tail->next = imported_func;
1017+
if (tail != NULL)
1018+
tail->next = imported_func;
9711019

972-
tail = imported_func;
1020+
tail = imported_func;
1021+
(*num_function_imports)++;
1022+
}
9731023
}
9741024

975-
(*num_function_imports)++;
9761025
thunks32++;
9771026
func_idx++;
9781027
}
@@ -1072,6 +1121,7 @@ void pe_set_imports(
10721121
static IMPORTED_DLL* pe_parse_imports(PE* pe)
10731122
{
10741123
int64_t offset;
1124+
int parsed_imports = 0; // Number of parsed DLLs
10751125
int num_imports = 0; // Number of imported DLLs
10761126
int num_function_imports = 0; // Total number of functions imported
10771127

@@ -1101,8 +1151,10 @@ static IMPORTED_DLL* pe_parse_imports(PE* pe)
11011151
imports = (PIMAGE_IMPORT_DESCRIPTOR) (pe->data + offset);
11021152

11031153
while (struct_fits_in_pe(pe, imports, IMAGE_IMPORT_DESCRIPTOR) &&
1104-
yr_le32toh(imports->Name) != 0 && num_imports < MAX_PE_IMPORTS)
1154+
yr_le32toh(imports->Name) != 0 && parsed_imports < MAX_PE_IMPORTS)
11051155
{
1156+
parsed_imports++;
1157+
11061158
int64_t offset = pe_rva_to_offset(pe, yr_le32toh(imports->Name));
11071159

11081160
if (offset >= 0)
@@ -1127,7 +1179,6 @@ static IMPORTED_DLL* pe_parse_imports(PE* pe)
11271179
if (functions != NULL)
11281180
{
11291181
imported_dll->name = yr_strdup(dll_name);
1130-
;
11311182
imported_dll->functions = functions;
11321183
imported_dll->next = NULL;
11331184

@@ -1138,6 +1189,7 @@ static IMPORTED_DLL* pe_parse_imports(PE* pe)
11381189
tail->next = imported_dll;
11391190

11401191
tail = imported_dll;
1192+
num_imports++;
11411193
}
11421194
else
11431195
{
@@ -1146,7 +1198,6 @@ static IMPORTED_DLL* pe_parse_imports(PE* pe)
11461198
}
11471199
}
11481200

1149-
num_imports++;
11501201
imports++;
11511202
}
11521203

@@ -3603,6 +3654,10 @@ begin_declarations
36033654
declare_integer("IMAGE_DEBUG_TYPE_MPX");
36043655
declare_integer("IMAGE_DEBUG_TYPE_REPRO");
36053656

3657+
declare_integer("IMPORT_DELAYED");
3658+
declare_integer("IMPORT_STANDARD");
3659+
declare_integer("IMPORT_ANY");
3660+
36063661
declare_integer("is_pe");
36073662
declare_integer("machine");
36083663
declare_integer("number_of_sections");
@@ -3711,10 +3766,6 @@ begin_declarations
37113766
declare_function("imphash", "", "s", imphash);
37123767
#endif
37133768

3714-
declare_integer("IMPORT_DELAYED");
3715-
declare_integer("IMPORT_STANDARD");
3716-
declare_integer("IMPORT_ANY");
3717-
37183769
declare_function("section_index", "s", "i", section_index_name);
37193770
declare_function("section_index", "i", "i", section_index_addr);
37203771
declare_function("exports", "s", "i", exports);

tests/data/tiny_empty_import_name

32 KB
Binary file not shown.

0 commit comments

Comments
 (0)