Line data Source code
1 : #include "precizer.h"
2 :
3 : /**
4 : * @brief Display statistics for filesystem components
5 : *
6 : */
7 190 : static void display_statistics(
8 : size_t *count_dirs,
9 : size_t *count_files,
10 : size_t *count_symlnks,
11 : size_t const *total_size_in_bytes,
12 : const bool *count_size_of_all_files,
13 : const bool *at_least_one_file_was_shown)
14 : {
15 190 : size_t total_items = *count_dirs + *count_files + *count_symlnks;
16 :
17 190 : bool show_total = false;
18 190 : bool show_complete = false;
19 :
20 190 : if(*count_size_of_all_files == true)
21 : {
22 38 : show_total = true;
23 :
24 152 : } else if(*at_least_one_file_was_shown == true){
25 :
26 110 : show_complete = true;
27 110 : show_total = true;
28 : }
29 :
30 190 : if(show_complete == true)
31 : {
32 110 : slog(EVERY,"File traversal complete\n");
33 : }
34 :
35 190 : if(show_total == true)
36 : {
37 148 : slog(EVERY,"Total size: %s, total items: %zu, dirs: %zu, files: %zu, symlnks: %zu\n",
38 : bkbmbgbtbpbeb(*total_size_in_bytes),
39 : total_items,
40 : *count_dirs,
41 : *count_files,
42 : *count_symlnks);
43 : }
44 190 : }
45 :
46 : /**
47 : * @brief Compare two FTS entries by filename
48 : * @param first Pointer to first FTSENT structure
49 : * @param second Pointer to second FTSENT structure
50 : * @return Integer less than, equal to, or greater than zero if first is found,
51 : * respectively, to be less than, to match, or be greater than second
52 : */
53 3164 : static int compare_by_name(
54 : const FTSENT **first,
55 : const FTSENT **second)
56 : {
57 3164 : return strcmp((*first)->fts_name,(*second)->fts_name);
58 : }
59 :
60 : /**
61 : *
62 : * Traverses a directory recursively and returns
63 : * a struct for each file it encounters
64 : *
65 : */
66 368 : Return file_list(const bool count_size_of_all_files)
67 : {
68 : /// The status that will be passed to return() before exiting.
69 : /// By default, the function worked without errors.
70 368 : Return status = SUCCESS;
71 :
72 : // Don't do anything
73 368 : if(config->compare == true)
74 : {
75 64 : return(status);
76 : }
77 :
78 304 : if(config->progress == false && count_size_of_all_files == true)
79 : {
80 : // Don't do anything
81 114 : return(status);
82 : }
83 :
84 : // Flags that reflect the presence of any changes
85 : // since the last research
86 :
87 : // Print traversal/update banners only once
88 190 : bool first_iteration = true;
89 :
90 : // Prevent duplicate --ignore info messages
91 190 : bool ignore_showed_once = false;
92 :
93 : // Prevent duplicate --include info messages
94 190 : bool include_showed_once = false;
95 :
96 : // Prevent duplicate lock-checksum info messages
97 190 : bool lock_checksum_showed_once = false;
98 :
99 : // Track whether any output was produced
100 190 : bool at_least_one_file_was_shown = false;
101 :
102 : // Signals integrity issues for locked files
103 190 : bool lock_checksum_violation_detected = false;
104 :
105 190 : FTS *file_systems = NULL;
106 190 : FTSENT *p = NULL;
107 :
108 190 : int fts_options = FTS_PHYSICAL;
109 :
110 190 : if(config->start_device_only == true)
111 : {
112 2 : fts_options |= FTS_XDEV;
113 : }
114 :
115 190 : size_t count_files = 0,count_dirs = 0,count_symlnks = 0,total_size_in_bytes = 0;
116 :
117 190 : if((file_systems = fts_open(config->paths,fts_options,compare_by_name)) == NULL)
118 : {
119 0 : slog(ERROR,"fts_open() error\n");
120 0 : fts_close(file_systems);
121 0 : provide(FAILURE);
122 : }
123 :
124 : /*
125 : * Determine the absolute path prefix.
126 : * We are only interested in relative paths in the database.
127 : * To obtain a relative path, trim the prefix from the absolute path.
128 : */
129 190 : char *runtime_root = NULL;
130 : #if 0 // Old multiPATH solution
131 : /**
132 : * Index of the path prefix
133 : * All full runtime paths are stored in the table "paths".
134 : * A real path can be retrieved due to its index ID
135 : */
136 : sqlite3_int64 runtime_root_index = -1;
137 : #endif
138 :
139 : // Limit recursion to the depth determined in config->maxdepth
140 190 : if(config->maxdepth > -1)
141 : {
142 4 : slog(EVERY,"Recursion depth limited to: %d\n",config->maxdepth);
143 : }
144 :
145 190 : if(count_size_of_all_files == true && config->progress == true)
146 : {
147 38 : slog(EVERY,"File system traversal initiated to calculate file count and storage usage\n");
148 : }
149 :
150 190 : bool continue_the_loop = true;
151 :
152 : // Allocate space for a memory structure
153 190 : create(unsigned char,file_buffer);
154 :
155 190 : if(count_size_of_all_files == false)
156 : {
157 152 : status = resize(file_buffer,file_buffer_memory());
158 :
159 152 : if(SUCCESS != status)
160 : {
161 0 : provide(status);
162 : }
163 : }
164 :
165 21336 : while((p = fts_read(file_systems)) != NULL && continue_the_loop == true)
166 : {
167 : /* Interrupt the loop smoothly */
168 : /* Interrupt when Ctrl+C */
169 21146 : if(global_interrupt_flag == true)
170 : {
171 0 : break;
172 : }
173 :
174 21146 : if(count_size_of_all_files == false)
175 : {
176 : /* Get absolute path prefix from FTSENT structure and current runtime path */
177 16826 : if(p->fts_level == FTS_ROOTLEVEL)
178 : {
179 304 : size_t new_size = (size_t)(p->fts_pathlen + 1) * sizeof(char);
180 :
181 : // All below run once per new path prefix
182 304 : char *tmp = (char *)realloc(runtime_root,new_size);
183 :
184 304 : if(NULL == tmp)
185 : {
186 0 : report("Memory allocation failed, requested size: %zu bytes",new_size);
187 0 : status = FAILURE;
188 0 : break;
189 : } else {
190 304 : runtime_root = tmp;
191 : }
192 :
193 : // Remember temporary string in long-lasting variable
194 304 : strcpy(runtime_root,p->fts_path);
195 :
196 : // Remove unnecessary trailing slash at the end of the directory path
197 304 : remove_trailing_slash(runtime_root);
198 :
199 : #if 0 // Old multiPATH solution
200 :
201 : // If several paths were passed as arguments,
202 : // then the counting of the path prefix index
203 : // will start from zero
204 : if(SUCCESS != (status = db_get_runtime_root_index(config,
205 : runtime_root,
206 : &runtime_root_index)))
207 : {
208 : continue_the_loop = false;
209 : break;
210 : }
211 : #endif
212 : }
213 : }
214 :
215 21146 : switch(p->fts_info)
216 : {
217 9320 : case FTS_D:
218 : {
219 9320 : if(SUCCESS != verify_directory_access(file_systems,p,runtime_root))
220 : {
221 0 : status = FAILURE;
222 0 : continue_the_loop = false;
223 0 : break;
224 : }
225 9320 : count_dirs++;
226 9320 : break;
227 : }
228 2490 : case FTS_F:
229 : {
230 : // Limit recursion to the depth determined in config->maxdepth
231 2490 : if(config->maxdepth > -1 && p->fts_level > config->maxdepth + 1)
232 : {
233 792 : break;
234 : }
235 :
236 2482 : CmpctStat stat = {0};
237 :
238 2482 : (void)stat_copy(p->fts_statp,&stat);
239 :
240 2482 : total_size_in_bytes += (size_t)stat.st_size;
241 2482 : count_files++;
242 :
243 2482 : if(runtime_root == NULL)
244 : {
245 514 : continue;
246 : }
247 :
248 : /* Write all columns from DB row to the structure DBrow
249 : and clean the structure to prevent reuse */
250 1968 : DBrow _dbrow = {0};
251 1968 : DBrow *dbrow = &_dbrow;
252 :
253 1968 : const char *relative_path = extract_relative_path(p->fts_path,runtime_root);
254 :
255 : /* Get all file's metadata from the database */
256 : #if 0 // Old multiPATH solution
257 : run(db_read_file_data_from(dbrow,&runtime_root_index,relative_path));
258 : #else
259 1968 : run(db_read_file_data_from(dbrow,relative_path));
260 : #endif
261 :
262 1968 : if(SUCCESS != status)
263 : {
264 0 : continue_the_loop = false;
265 0 : break;
266 : }
267 :
268 1968 : const bool path_known = dbrow->relative_path_already_in_db == true;
269 :
270 1968 : const bool has_saved_offset = dbrow->saved_offset > 0;
271 :
272 : // Validate if size, creation and modification time of a
273 : // file has not changed since last scanning.
274 : // Default value is:
275 1968 : Changed metadata_of_scanned_and_saved_files = NOT_EQUAL;
276 :
277 : // Tracks if the current relative path already has a DB entry
278 1968 : if(path_known == true)
279 : {
280 : // Validate if size, creation and modification time of a
281 : // file has not changed since last scanning.
282 934 : metadata_of_scanned_and_saved_files = compare_file_metadata_equivalence(&(dbrow->saved_stat),&stat);
283 : }
284 :
285 1968 : const bool metadata_identical = metadata_of_scanned_and_saved_files == IDENTICAL;
286 :
287 1968 : const bool metadata_changed = metadata_identical == false;
288 :
289 : // Flag that marks files matched by the checksum lock pattern
290 1968 : bool locked_checksum_file = false;
291 :
292 1968 : LockChecksum lock_checksum_response = match_checksum_lock_pattern(relative_path,&lock_checksum_showed_once);
293 :
294 1968 : if(FAIL_REGEXP_LOCK_CHECKSUM == lock_checksum_response)
295 : {
296 0 : slog(ERROR,"Fail lock-checksum REGEXP for a string: %s\n",relative_path);
297 0 : status = FAILURE;
298 0 : continue_the_loop = false;
299 0 : break;
300 1968 : } else if(LOCK_CHECKSUM == lock_checksum_response){
301 90 : locked_checksum_file = true;
302 : }
303 :
304 : // Indicates that the checksum-locked file has already been fully hashed and recorded
305 3936 : bool lock_checksum_ready = locked_checksum_file == true
306 90 : && path_known == true
307 2058 : && has_saved_offset == false;
308 :
309 : // Used to skip files whose metadata and checksum are already up to date
310 1968 : bool unchanged_and_complete = path_known == true
311 934 : && metadata_identical == true
312 2902 : && has_saved_offset == false;
313 :
314 1968 : if(unchanged_and_complete == true && !(config->rehash_locked == true && lock_checksum_ready == true))
315 : {
316 : // Relative path already in DB and doesn't require any change
317 : break;
318 : }
319 :
320 : // Derived flags to qualify the type of metadata change
321 1210 : bool size_changed = (metadata_of_scanned_and_saved_files & SIZE_CHANGED) != 0;
322 :
323 1210 : bool timestamps_changed = (metadata_of_scanned_and_saved_files & (STATUS_CHANGED_TIME | MODIFICATION_TIME_CHANGED)) != 0;
324 :
325 1210 : bool timestamps_only_changed = path_known == true
326 176 : && metadata_changed == true
327 168 : && config->watch_timestamps == false
328 126 : && size_changed == false
329 1386 : && has_saved_offset == false;
330 :
331 : // Decision whether to rehash the file contents using
332 : // the SHA512 algorithm. Defaults to Yes, rehash"
333 1210 : bool rehash = true;
334 :
335 1210 : if(timestamps_only_changed == true)
336 : {
337 : // ctime/mtime changed only: update DB without rehash
338 94 : rehash = false;
339 : }
340 :
341 1210 : if(lock_checksum_ready == true && config->rehash_locked == true)
342 : {
343 12 : rehash = true;
344 : }
345 :
346 1210 : sqlite3_int64 offset = 0; // Offset bytes
347 1210 : SHA512_Context mdContext = {0};
348 :
349 : /* For a file which had been changed before creation
350 : of its checksum has been already finished */
351 1210 : bool rehashing_from_the_beginning = false;
352 :
353 : // Can we resume hashing from a previous partial state?
354 1210 : bool can_resume_partial_hash = has_saved_offset == true
355 1210 : && metadata_changed == false;
356 :
357 : // Indicates that a previous partial hash is now invalid and must restart
358 1210 : bool partial_hash_invalidated = has_saved_offset == true
359 1210 : && metadata_changed == true;
360 :
361 1210 : if(can_resume_partial_hash == true)
362 : {
363 : // Continue hashing
364 0 : offset = dbrow->saved_offset;
365 0 : memcpy(&mdContext,&(dbrow->saved_mdContext),sizeof(SHA512_Context));
366 :
367 1210 : } else if(partial_hash_invalidated == true){
368 : /* The SHA512 hashing of the file had not been
369 : finished previously and the file has been changed */
370 0 : rehashing_from_the_beginning = true;
371 : }
372 :
373 : // The file is available for reading
374 1210 : FileAccessStatus access_status = FILE_ACCESS_DENIED;
375 :
376 : /* Check file access */
377 1210 : access_status = file_check_access(p->fts_path,
378 1210 : (size_t)p->fts_pathlen);
379 :
380 1210 : if(access_status == FILE_ACCESS_ERROR)
381 : {
382 0 : status = FAILURE;
383 0 : continue_the_loop = false;
384 0 : break;
385 : }
386 :
387 1210 : bool is_readable = (access_status == FILE_ACCESS_ALLOWED);
388 :
389 : // Marks zero-length files to avoid unnecessary hashing
390 1210 : bool zero_size_file = false;
391 :
392 : /**
393 : * Indicates files that cannot be read/seeks (e.g. sysfs)
394 : *
395 : * On some special file systems (such as /sys, which has
396 : * the SYSFS_MAGIC constant == 0x62656572), standard
397 : * file operations like fopen, fseek, and lseek
398 : * cannot be used for reading and seeking.
399 : * While information about the file itself will be
400 : * recorded in the primary database, due to the
401 : * nature of such files, their hash sum is never
402 : * read and is stored as NULL
403 : */
404 1210 : bool wrong_file_type = false;
405 :
406 1210 : if(p->fts_statp->st_size == 0)
407 : {
408 2 : zero_size_file = true;
409 2 : rehash = false;
410 : }
411 :
412 : // Captures files explicitly skipped or forced by regexp filters
413 : // Ignored with --ignore= or admitted with --include=
414 1210 : bool ignore = false;
415 :
416 : // Included with --include=
417 1210 : bool include = false;
418 :
419 : /* PCRE2 regexp to include the file */
420 1210 : Include match_include_response = match_include_pattern(relative_path,&include_showed_once);
421 :
422 1210 : if(DO_NOT_INCLUDE == match_include_response)
423 : {
424 : /* PCRE2 regexp to ignore the file */
425 :
426 1210 : Ignore match_ignore_response = match_ignore_pattern(relative_path,&ignore_showed_once);
427 :
428 1210 : if(IGNORE == match_ignore_response)
429 : {
430 14 : ignore = true;
431 :
432 1196 : } else if(FAIL_REGEXP_IGNORE == match_ignore_response){
433 0 : slog(ERROR,"Fail ignore REGEXP for a string: %s\n",relative_path);
434 0 : status = FAILURE;
435 0 : continue_the_loop = false;
436 0 : break;
437 : }
438 :
439 0 : } else if(FAIL_REGEXP_INCLUDE == match_include_response){
440 0 : slog(ERROR,"Fail include REGEXP for a string: %s\n",relative_path);
441 0 : status = FAILURE;
442 0 : continue_the_loop = false;
443 0 : break;
444 0 : } else if(INCLUDE == match_include_response){
445 0 : include = true;
446 : }
447 :
448 : // Ensure checksum-locked files are tracked even if matched by ignore pattern
449 1210 : if(ignore == true && locked_checksum_file == true && path_known == false)
450 : {
451 0 : ignore = false;
452 : }
453 :
454 : // Locked checksum files must not diverge once sealed
455 1210 : bool lock_checksum_violation = lock_checksum_ready == true
456 1224 : && (size_changed == true
457 14 : || (config->watch_timestamps == true
458 8 : && config->rehash_locked == false
459 2 : && timestamps_changed == true));
460 :
461 : // Timestamps drift on a locked file may be ignored depending on config
462 1210 : bool locked_timestamp_drift_only = lock_checksum_ready == true
463 20 : && config->watch_timestamps == false
464 12 : && config->rehash_locked == false
465 6 : && timestamps_changed == true
466 1230 : && size_changed == false;
467 :
468 1210 : if(locked_timestamp_drift_only == true)
469 : {
470 2 : break;
471 : }
472 :
473 : // Print out of a file name and its changes
474 1208 : show_relative_path(relative_path,
475 : &metadata_of_scanned_and_saved_files,
476 : dbrow,
477 : &stat,
478 : &first_iteration,
479 : &rehashing_from_the_beginning,
480 : &ignore,
481 : &include,
482 : &locked_checksum_file,
483 : &lock_checksum_violation,
484 : &at_least_one_file_was_shown,
485 : &rehash,
486 : &count_size_of_all_files,
487 : &is_readable,
488 : &zero_size_file);
489 :
490 1208 : if(is_readable != true)
491 : {
492 2 : break;
493 : }
494 :
495 1206 : if(ignore == true)
496 : {
497 14 : break;
498 : }
499 :
500 : /* When a checksum-locked file changed;
501 : blocks rehash/DB update and flags corruption */
502 1192 : if(lock_checksum_violation == true)
503 : {
504 8 : lock_checksum_violation_detected = true;
505 8 : break;
506 : }
507 :
508 : // Buffer for current file SHA512 digest
509 1184 : unsigned char sha512[SHA512_DIGEST_LENGTH] = {0};
510 :
511 1184 : if(rehash == true)
512 : {
513 1092 : run(sha512sum(p->fts_path,
514 : (size_t)p->fts_pathlen,
515 : file_buffer,
516 : sha512,
517 : &offset,
518 : &mdContext,
519 : &wrong_file_type));
520 :
521 1092 : if(TRIUMPH & status)
522 : {
523 : /* If the sha512sum has been interrupted smoothly when Ctrl+C */
524 1092 : if(offset > 0 && global_interrupt_flag == true)
525 : {
526 0 : slog(EVERY,"SHA512 checksum for the file %s has been"
527 : " gracefully interrupted at byte: %s\n",
528 : relative_path,
529 : bkbmbgbtbpbeb((size_t)offset));
530 : }
531 :
532 : } else {
533 0 : continue_the_loop = false;
534 0 : break;
535 : }
536 :
537 : } else {
538 92 : memcpy(&sha512,&(dbrow->sha512),sizeof(sha512));
539 : }
540 :
541 1184 : bool locked_checksum_mismatch = false; // Detects corruption when rehashing locked files
542 :
543 1184 : if(config->rehash_locked == true && lock_checksum_ready == true
544 10 : && rehash == true && (TRIUMPH & status)
545 10 : && wrong_file_type == false && zero_size_file == false
546 10 : && offset == 0)
547 : {
548 10 : if(memcmp(sha512,dbrow->sha512,SHA512_DIGEST_LENGTH) != 0)
549 : {
550 0 : locked_checksum_mismatch = true;
551 : }
552 : }
553 :
554 1184 : if(locked_checksum_mismatch == true)
555 : {
556 0 : lock_checksum_violation_detected = true;
557 0 : slog(EVERY|UNDECOR,RED "checksum locked, data corruption detected" RESET " %s\n",relative_path);
558 0 : break;
559 : }
560 :
561 1184 : if(path_known == true)
562 : {
563 : /* Update in DB */
564 :
565 312 : bool allow_locked_update = lock_checksum_violation == false
566 156 : && (locked_checksum_file == false || config->rehash_locked == true);
567 :
568 156 : bool should_update_db = path_known == true
569 156 : && allow_locked_update == true
570 468 : && (offset > dbrow->saved_offset
571 156 : || (has_saved_offset == true && offset == 0)
572 156 : || metadata_changed == true);
573 :
574 156 : if(should_update_db == true)
575 : {
576 : /* Update record in DB */
577 148 : if(TRIUMPH & status)
578 : {
579 148 : status = db_update_the_record_by_id(&(dbrow->ID),
580 : &offset,
581 : sha512,
582 : &stat,
583 : &mdContext,
584 : &zero_size_file,
585 : &wrong_file_type);
586 :
587 148 : if(SUCCESS != status)
588 : {
589 0 : continue_the_loop = false;
590 0 : break;
591 : }
592 : }
593 : }
594 :
595 : } else {
596 :
597 : /* Insert into DB */
598 1028 : if(TRIUMPH & status)
599 : {
600 : #if 0 // Old multiPATH solution
601 : status = db_insert_the_record(&runtime_root_index,
602 : relative_path,
603 : &offset,
604 : sha512,
605 : &stat,
606 : &mdContext,
607 : &zero_size_file,
608 : &wrong_file_type);
609 : #else
610 1028 : status = db_insert_the_record(relative_path,
611 : &offset,
612 : sha512,
613 : &stat,
614 : &mdContext,
615 : &zero_size_file,
616 : &wrong_file_type);
617 : #endif
618 :
619 1028 : if(SUCCESS != status)
620 : {
621 0 : continue_the_loop = false;
622 0 : break;
623 : }
624 : }
625 : }
626 :
627 : /**
628 : * Interrupt the loop smoothly
629 : * Interrupt when Ctrl+C
630 : */
631 1184 : if(global_interrupt_flag == true)
632 : {
633 0 : break;
634 : }
635 : }
636 1184 : break;
637 16 : case FTS_SL:
638 16 : count_symlnks++;
639 16 : break;
640 9320 : default:
641 9320 : break;
642 : }
643 : }
644 :
645 190 : del(file_buffer);
646 :
647 190 : free(runtime_root);
648 :
649 190 : fts_close(file_systems);
650 :
651 : // Display statistics for filesystem components
652 190 : if(SUCCESS == status)
653 : {
654 190 : display_statistics(&count_dirs,
655 : &count_files,
656 : &count_symlnks,
657 : &total_size_in_bytes,
658 : &count_size_of_all_files,
659 : &at_least_one_file_was_shown);
660 : }
661 :
662 190 : if(lock_checksum_violation_detected == true)
663 : {
664 6 : slog(ERROR,BOLD "Caution! Data corruption detected for checksum-locked file!" RESET "\n");
665 :
666 6 : if(SUCCESS == status)
667 : {
668 6 : status = WARNING;
669 : }
670 : }
671 :
672 190 : provide(status);
673 : }
|