Skip to content

Commit 0d7884d

Browse files
Fix #958: warn when feof() is used as a while loop condition (#8422)
feof() only returns true after a read has already failed, causing the loop body to execute once more after the last successful read. Read errors also go undetected since feof() does not distinguish them from EOF. --------- Signed-off-by: Francois Berder <fberder@outlook.fr>
1 parent 4a4e12d commit 0d7884d

7 files changed

Lines changed: 235 additions & 5 deletions

File tree

lib/checkers.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ namespace checkers {
101101
{"CheckFunctions::useStandardLibrary","style"},
102102
{"CheckIO::checkCoutCerrMisusage","c"},
103103
{"CheckIO::checkFileUsage",""},
104+
{"CheckIO::checkWrongfeofUsage",""},
104105
{"CheckIO::checkWrongPrintfScanfArguments",""},
105106
{"CheckIO::invalidScanf",""},
106107
{"CheckLeakAutoVar::check","notclang"},

lib/checkio.cpp

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,137 @@ void CheckIOImpl::invalidScanfError(const Token *tok)
507507
CWE119, Certainty::normal);
508508
}
509509

510+
static const Token* findFileReadCall(const Token *start, const Token *end, int varid)
511+
{
512+
const Token* found = Token::findmatch(start, "fgets|fgetc|getc|fread|fscanf (", end);
513+
while (found) {
514+
const std::vector<const Token*> args = getArguments(found);
515+
if (!args.empty()) {
516+
const bool match = (found->str() == "fscanf")
517+
? args.front()->varId() == varid
518+
: args.back()->varId() == varid;
519+
if (match)
520+
return found;
521+
}
522+
found = Token::findmatch(found->next(), "fgets|fgetc|getc|fread|fscanf (", end);
523+
}
524+
return nullptr;
525+
}
526+
527+
void CheckIOImpl::checkWrongfeofUsage()
528+
{
529+
const SymbolDatabase *symbolDatabase = mTokenizer->getSymbolDatabase();
530+
531+
logChecker("CheckIO::checkWrongfeofUsage");
532+
533+
for (const Scope * scope : symbolDatabase->functionScopes) {
534+
for (const Token *tok = scope->bodyStart->next(); tok != scope->bodyEnd; tok = tok->next()) {
535+
// TODO: Handle for loops
536+
if (!Token::Match(tok, "while ( ! feof ( %var% )"))
537+
continue;
538+
539+
// Bail out if we cannot identify file pointer
540+
const int fpVarId = tok->tokAt(5)->varId();
541+
if (fpVarId == 0)
542+
continue;
543+
544+
const Token *endCond = tok->linkAt(1);
545+
const Token *bodyStart;
546+
const Token *bodyEnd;
547+
548+
if (Token::simpleMatch(tok->previous(), "}") && tok->previous()->scope()->type == ScopeType::eDo) {
549+
bodyEnd = tok->previous();
550+
bodyStart = bodyEnd->link();
551+
} else {
552+
if (!Token::simpleMatch(endCond, ") {"))
553+
continue;
554+
bodyEnd = endCond->linkAt(1);
555+
bodyStart = endCond->next();
556+
}
557+
558+
// Bail out if the loop contains control flow (too complex to analyze)
559+
if (Token::findmatch(bodyStart, "return|break|goto|continue|throw", bodyEnd))
560+
continue;
561+
562+
// Bail out if fp is used outside of known file I/O functions.
563+
// If it is passed to an unknown function, reads may occur there.
564+
bool fpUsedElsewhere = false;
565+
for (const Token *t = bodyStart->next(); t && t != bodyEnd; t = t->next()) {
566+
if (t->varId() != fpVarId)
567+
continue;
568+
const Token *p = t->astParent();
569+
while (p && p->str() == ",")
570+
p = p->astParent();
571+
if (!p || !Token::Match(p->astOperand1(), "fgets|fgetc|getc|fread|fscanf|fprintf|fwrite|fputs|fputc|putc")) {
572+
fpUsedElsewhere = true;
573+
break;
574+
}
575+
}
576+
if (fpUsedElsewhere)
577+
continue;
578+
579+
// No file read call in the loop: feof can never become true inside it
580+
const Token *loopFileReadCallTok = findFileReadCall(bodyStart, bodyEnd, fpVarId);
581+
if (!loopFileReadCallTok) {
582+
// TODO: Warn about infinite loop
583+
continue;
584+
}
585+
586+
// Find last file read
587+
const Token *lastLoopFileReadCallTok = loopFileReadCallTok;
588+
while (loopFileReadCallTok) {
589+
lastLoopFileReadCallTok = loopFileReadCallTok;
590+
loopFileReadCallTok = findFileReadCall(lastLoopFileReadCallTok->next(), bodyEnd, fpVarId);
591+
}
592+
593+
// Warn if the destination of the last file read is used after the call before bodyEnd.
594+
// If it is not, the stale buffer is never accessed on the extra iteration at EOF.
595+
596+
if (lastLoopFileReadCallTok->str() == "fgetc" || lastLoopFileReadCallTok->str() == "getc") {
597+
// Warn if the return value feeds into an expression (astParent of the call node)
598+
if (lastLoopFileReadCallTok->astParent() && lastLoopFileReadCallTok->astParent()->astParent())
599+
wrongfeofUsage(getCondTok(tok));
600+
} else {
601+
const std::vector<const Token*> args = getArguments(lastLoopFileReadCallTok);
602+
// Collect destination varIds
603+
std::vector<nonneg int> destVarIds;
604+
if (lastLoopFileReadCallTok->str() == "fscanf") {
605+
// args[0]=fp, args[1]=format, args[2+]=destinations (typically &var)
606+
for (std::size_t i = 2; i < args.size(); ++i) {
607+
const Token *destTok = Token::Match(args[i], "& %var%") ? args[i]->next() : args[i];
608+
if (destTok->varId() != 0)
609+
destVarIds.push_back(destTok->varId());
610+
}
611+
} else {
612+
// Handle fgets, fread
613+
// First argument is the destination buffer
614+
if (!args.empty() && args.front()->varId() != 0)
615+
destVarIds.push_back(args.front()->varId());
616+
}
617+
618+
// Search for any destination use between this call's ';' and endBody
619+
const Token *semiColonTok = lastLoopFileReadCallTok->linkAt(1)->next();
620+
for (const Token *t = semiColonTok; t && t != bodyEnd; t = t->next()) {
621+
if (std::find(destVarIds.begin(), destVarIds.end(), t->varId()) != destVarIds.end()) {
622+
wrongfeofUsage(getCondTok(tok));
623+
break;
624+
}
625+
}
626+
}
627+
}
628+
}
629+
}
630+
631+
void CheckIOImpl::wrongfeofUsage(const Token * tok)
632+
{
633+
reportError(tok, Severity::warning,
634+
"wrongfeofUsage",
635+
"Using feof() as a loop condition causes the last line to be processed twice.\n"
636+
"feof() returns true only after a read has failed due to end-of-file, so the loop "
637+
"body executes once more after the last successful read. Check the return value of "
638+
"the read function instead (e.g. fgets, fread, fscanf).");
639+
}
640+
510641
//---------------------------------------------------------------------------
511642
// printf("%u", "xyz"); // Wrong argument type
512643
// printf("%u%s", 1); // Too few arguments
@@ -2057,6 +2188,7 @@ void CheckIO::runChecks(const Tokenizer &tokenizer, ErrorLogger& errorLogger)
20572188
checkIO.checkWrongPrintfScanfArguments();
20582189
checkIO.checkCoutCerrMisusage();
20592190
checkIO.checkFileUsage();
2191+
checkIO.checkWrongfeofUsage();
20602192
checkIO.invalidScanf();
20612193
}
20622194

@@ -2072,6 +2204,7 @@ void CheckIO::getErrorMessages(ErrorLogger& errorLogger, const Settings &setting
20722204
c.fcloseInLoopConditionError(nullptr, "fp");
20732205
c.seekOnAppendedFileError(nullptr);
20742206
c.incompatibleFileOpenError(nullptr, "tmp");
2207+
c.wrongfeofUsage(nullptr);
20752208
c.invalidScanfError(nullptr);
20762209
c.wrongPrintfScanfArgumentsError(nullptr, "printf",3,2);
20772210
c.invalidScanfArgTypeError_s(nullptr, 1, "s", nullptr);

lib/checkio.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@ class CPPCHECKLIB CheckIOImpl : public CheckImpl {
8282
/** @brief scanf can crash if width specifiers are not used */
8383
void invalidScanf();
8484

85+
/** @brief %Check wrong usage of feof */
86+
void checkWrongfeofUsage();
87+
8588
/** @brief %Checks type and number of arguments given to functions like printf or scanf*/
8689
void checkWrongPrintfScanfArguments();
8790

@@ -127,6 +130,7 @@ class CPPCHECKLIB CheckIOImpl : public CheckImpl {
127130
void seekOnAppendedFileError(const Token *tok);
128131
void incompatibleFileOpenError(const Token *tok, const std::string &filename);
129132
void invalidScanfError(const Token *tok);
133+
void wrongfeofUsage(const Token *tok);
130134
void wrongPrintfScanfArgumentsError(const Token* tok,
131135
const std::string &functionName,
132136
nonneg int numFormat,

man/checkers/wrongfeofUsage.md

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# wrongfeofUsage
2+
3+
**Message**: Using feof() as a loop condition causes the last line to be processed twice.<br/>
4+
**Category**: Correctness<br/>
5+
**Severity**: Warning<br/>
6+
**Language**: C/C++
7+
8+
## Description
9+
10+
`feof()` returns non-zero only after a read operation has failed because the end of file was reached. When used as the sole condition of a loop, the loop body executes one extra time after the last successful read: the read fails silently (or returns partial data), and only then does `feof()` return true and terminate the loop.
11+
12+
This checker matches `while (!feof(fp))` and `do { ... } while (!feof(fp))` loops and warns when all of the following are true:
13+
14+
- The loop body contains at least one file-read call (`fgets`, `fgetc`, `getc`, `fread`, or `fscanf`) on the same file pointer.
15+
- The destination (return value or output buffer) of the **last** file-read call in the loop is used after that call within the same loop iteration.
16+
17+
The checker skips loops that contain a control-flow statement (`return`, `break`, `goto`, `continue`, `throw`) as those are too complex to analyze reliably, and loops where the file pointer appears in a context other than a recognised I/O function (`fgets`, `fgetc`, `getc`, `fread`, `fscanf`, `fprintf`, `fwrite`, `fputs`, `fputc`, `putc`).
18+
19+
## How to fix
20+
21+
Check the return value of the read function directly in the loop condition.
22+
23+
Before:
24+
```c
25+
void process(FILE *fp) {
26+
char line[256];
27+
while (!feof(fp)) { /* wrong: processes last line twice */
28+
fgets(line, sizeof(line), fp);
29+
puts(line);
30+
}
31+
}
32+
```
33+
34+
After:
35+
```c
36+
void process(FILE *fp) {
37+
char line[256];
38+
while (fgets(line, sizeof(line), fp) != NULL) {
39+
puts(line);
40+
}
41+
}
42+
```

releasenotes.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ Major bug fixes & crashes:
55
-
66

77
New checks:
8-
-
8+
- Warn when feof() is used as a while loop condition (wrongfeofUsage).
99

1010
C/C++ support:
1111
-

test/cli/other_test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4318,25 +4318,25 @@ def __test_active_checkers(tmp_path, active_cnt, total_cnt, use_misra=False, use
43184318

43194319

43204320
def test_active_unusedfunction_only(tmp_path):
4321-
__test_active_checkers(tmp_path, 1, 186, use_unusedfunction_only=True)
4321+
__test_active_checkers(tmp_path, 1, 187, use_unusedfunction_only=True)
43224322

43234323

43244324
def test_active_unusedfunction_only_builddir(tmp_path):
43254325
checkers_exp = [
43264326
'CheckUnusedFunctions::check'
43274327
]
4328-
__test_active_checkers(tmp_path, 1, 186, use_unusedfunction_only=True, checkers_exp=checkers_exp)
4328+
__test_active_checkers(tmp_path, 1, 187, use_unusedfunction_only=True, checkers_exp=checkers_exp)
43294329

43304330

43314331
def test_active_unusedfunction_only_misra(tmp_path):
4332-
__test_active_checkers(tmp_path, 1, 386, use_unusedfunction_only=True, use_misra=True)
4332+
__test_active_checkers(tmp_path, 1, 387, use_unusedfunction_only=True, use_misra=True)
43334333

43344334

43354335
def test_active_unusedfunction_only_misra_builddir(tmp_path):
43364336
checkers_exp = [
43374337
'CheckUnusedFunctions::check'
43384338
]
4339-
__test_active_checkers(tmp_path, 1, 386, use_unusedfunction_only=True, use_misra=True, checkers_exp=checkers_exp)
4339+
__test_active_checkers(tmp_path, 1, 387, use_unusedfunction_only=True, use_misra=True, checkers_exp=checkers_exp)
43404340

43414341

43424342
def test_analyzerinfo(tmp_path):

test/testio.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ class TestIO : public TestFixture {
4545
TEST_CASE(seekOnAppendedFile);
4646
TEST_CASE(fflushOnInputStream);
4747
TEST_CASE(incompatibleFileOpen);
48+
TEST_CASE(testWrongfeofUsage); // #958
4849

4950
TEST_CASE(testScanf1); // Scanf without field limiters
5051
TEST_CASE(testScanf2);
@@ -766,6 +767,55 @@ class TestIO : public TestFixture {
766767
ASSERT_EQUALS("[test.cpp:3:16]: (warning) The file '\"tmp\"' is opened for read and write access at the same time on different streams [incompatibleFileOpen]\n", errout_str());
767768
}
768769

770+
void testWrongfeofUsage() { // ticket #958
771+
check("void foo(FILE * fp) {\n"
772+
" while (!feof(fp)) \n"
773+
" {\n"
774+
" char line[100];\n"
775+
" fgets(line, sizeof(line), fp);\n"
776+
" dostuff(line);\n"
777+
" }\n"
778+
"}");
779+
ASSERT_EQUALS("[test.cpp:2:10]: (warning) Using feof() as a loop condition causes the last line to be processed twice. [wrongfeofUsage]\n", errout_str());
780+
781+
check("int foo(FILE *fp) {\n"
782+
" char line[100];\n"
783+
" while (fgets(line, sizeof(line), fp)) {}\n"
784+
" if (!feof(fp))\n"
785+
" return 1;\n"
786+
" return 0;\n"
787+
"}");
788+
ASSERT_EQUALS("", errout_str());
789+
790+
check("void foo(FILE *fp){\n"
791+
" char line[100];\n"
792+
" fgets(line, sizeof(line), fp);\n"
793+
" while (!feof(fp)){\n"
794+
" dostuff(line);\n"
795+
" fgets(line, sizeof(line), fp);"
796+
" }\n"
797+
"}");
798+
ASSERT_EQUALS("", errout_str());
799+
800+
check("void foo(FILE *fp) {\n"
801+
" char line[100];\n"
802+
" do {\n"
803+
" fgets(line, sizeof(line), fp);\n"
804+
" dostuff(line);\n"
805+
" } while (!feof(fp));\n"
806+
"}");
807+
ASSERT_EQUALS("[test.cpp:6:12]: (warning) Using feof() as a loop condition causes the last line to be processed twice. [wrongfeofUsage]\n", errout_str());
808+
809+
check("void foo(FILE *fp) {\n"
810+
" char line[100];\n"
811+
" do {\n"
812+
" dostuff(line);\n"
813+
" fgets(line, sizeof(line), fp);\n"
814+
" } while (!feof(fp));\n"
815+
"}");
816+
ASSERT_EQUALS("", errout_str());
817+
}
818+
769819

770820
void testScanf1() {
771821
check("void foo() {\n"

0 commit comments

Comments
 (0)