Skip to content

Commit edc3118

Browse files
committed
Refactoring: Use rhash_update_fd() to hash files
1 parent 0187df9 commit edc3118

File tree

5 files changed

+83
-24
lines changed

5 files changed

+83
-24
lines changed

ChangeLog

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
Sat 19 Apr 2025 Aleksey Kravchenko
2+
* Speed up file reading while hashing a file
3+
14
Tue 15 Apr 2025 Aleksey Kravchenko
25
* Bugfix: Fix processing of ./ directory
36
* Support --blake3 option for the BLAKE3 hash function

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ $(RHASH_BINARY): $(OBJECTS) $(LIBRHASH_PATH)
157157
# NOTE: dependences were generated by 'gcc -Ilibrhash -MM *.c'
158158
# we are using plain old makefile style to support BSD make
159159
calc_sums.o: calc_sums.c calc_sums.h common_func.h file.h hash_check.h \
160-
file_set.h hash_print.h output.h parse_cmdline.h rhash_main.h \
160+
file_set.h hash_print.h output.h parse_cmdline.h platform.h rhash_main.h \
161161
win_utils.h librhash/rhash.h librhash/rhash_torrent.h
162162
$(CC) -c $(CFLAGS) $< -o $@
163163

calc_sums.c

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "hash_print.h"
55
#include "output.h"
66
#include "parse_cmdline.h"
7+
#include "platform.h"
78
#include "rhash_main.h"
89
#include "win_utils.h"
910
#include "librhash/rhash.h"
@@ -207,15 +208,15 @@ static void re_init_rhash_context(struct file_info* info)
207208
*/
208209
int calc_sums(struct file_info* info)
209210
{
210-
FILE* fd = 0;
211+
int fd = -1;
211212
int res;
212213

213214
assert(info->file);
214215
if (FILE_ISSTDIN(info->file)) {
215-
fd = stdin;
216+
fd = 0;
216217
#ifdef _WIN32
217218
/* using 0 instead of _fileno(stdin). _fileno() is undefined under 'gcc -ansi' */
218-
if (setmode(0, _O_BINARY) < 0)
219+
if (setmode(fd, _O_BINARY) < 0)
219220
return -1;
220221
#endif
221222
} else {
@@ -230,9 +231,9 @@ int calc_sums(struct file_info* info)
230231
return 0;
231232

232233
if (!FILE_ISDATA(info->file)) {
233-
fd = file_fopen(info->file, FOpenRead | FOpenBin);
234+
fd = file_open(info->file, FOpenReadBin);
234235
/* quietly skip unreadble files */
235-
if (!fd)
236+
if (fd < 0)
236237
return -1;
237238
}
238239
}
@@ -248,7 +249,7 @@ int calc_sums(struct file_info* info)
248249
if (percents_output->update != 0) {
249250
rhash_set_callback(info->rctx, (rhash_callback_t)percents_output->update, info);
250251
}
251-
res = rhash_file_update(info->rctx, fd);
252+
res = rhash_update_fd(info->rctx, fd, RHASH_MAX_FILE_SIZE);
252253
}
253254
if (res != -1 && !opt.bt_batch_file)
254255
rhash_final(info->rctx, 0); /* finalize hashing */
@@ -257,8 +258,8 @@ int calc_sums(struct file_info* info)
257258
info->size = info->rctx->msg_size - info->msg_offset;
258259
rhash_data.total_size += info->size;
259260

260-
if (fd && !FILE_ISSTDIN(info->file))
261-
fclose(fd);
261+
if (fd >= 0 && !FILE_ISSTDIN(info->file))
262+
close(fd);
262263
return res;
263264
}
264265

file.c

Lines changed: 69 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ static int file_statw(file_t* file);
291291
* @param dir_path (nullable) directory path to prepend to printable path
292292
* @param print_path printable path, which encoding shall be detected
293293
* @param init_flags bit flags, helping to detect the encoding
294-
* @return encoding on success, -1 on fail with error code stored in errno
294+
* @return encoding on success, -1 on failure with error code stored in errno
295295
*/
296296
static int detect_path_encoding(file_t* file, wchar_t* dir_path, const char* print_path, unsigned init_flags)
297297
{
@@ -354,7 +354,7 @@ static int detect_path_encoding(file_t* file, wchar_t* dir_path, const char* pri
354354
* @param prepend_dir the directory to prepend to the print_path, to construct the file path, can be NULL
355355
* @param print_path the printable representation of the file path
356356
* @param init_flags initialization flags
357-
* @return 0 on success, -1 on fail with error code stored in errno
357+
* @return 0 on success, -1 on failure with error code stored in errno
358358
*/
359359
int file_init_by_print_path(file_t* file, file_t* prepend_dir, const char* print_path, unsigned init_flags)
360360
{
@@ -617,7 +617,7 @@ void file_swap(file_t* first, file_t* second)
617617
* @param str the string to insert into/append to the source file path
618618
* @param operation the operation determinating how to modify the file path, can be one of the values
619619
* FModifyAppendSuffix, FModifyInsertBeforeExtension, FModifyRemoveExtension, FModifyGetParentDir
620-
* @return allocated and modified file path on success, NULL on fail
620+
* @return allocated and modified file path on success, NULL on failure
621621
*/
622622
static char* get_modified_path(const char* path, const char* str, int operation)
623623
{
@@ -654,7 +654,7 @@ static char* get_modified_path(const char* path, const char* str, int operation)
654654
* @param str the string to insert into/append to the source file path
655655
* @param operation the operation determinating how to modify the file path, can be one of the values
656656
* FModifyAppendSuffix, FModifyInsertBeforeExtension, FModifyRemoveExtension, FModifyGetParentDir
657-
* @return allocated and modified file path on success, NULL on fail
657+
* @return allocated and modified file path on success, NULL on failure
658658
*/
659659
static tpath_t get_modified_tpath(ctpath_t path, const char* str, int operation)
660660
{
@@ -694,7 +694,7 @@ static tpath_t get_modified_tpath(ctpath_t path, const char* str, int operation)
694694
* @param str the string to insert into/append to the source file path
695695
* @param operation the operation to do on src file, can be one of the values
696696
* FModifyAppendSuffix, FModifyInsertBeforeExtension, FModifyRemoveExtension, FModifyGetParentDir
697-
* @return 0 on success, -1 on fail with error code stored in errno
697+
* @return 0 on success, -1 on failure with error code stored in errno
698698
*/
699699
int file_modify_path(file_t* dst, file_t* src, const char* str, int operation)
700700
{
@@ -729,7 +729,7 @@ int file_modify_path(file_t* dst, file_t* src, const char* str, int operation)
729729
* Retrieve file information (type, size, mtime) into file_t fields.
730730
*
731731
* @param file the file information
732-
* @return 0 on success, -1 on fail with error code stored in errno
732+
* @return 0 on success, -1 on failure with error code stored in errno
733733
*/
734734
static int file_statw(file_t* file)
735735
{
@@ -777,7 +777,7 @@ static int file_statw(file_t* file)
777777
*
778778
* @param file the file information
779779
* @param fstat_flags bitmask consisting of FileStatModes bits
780-
* @return 0 on success, -1 on fail with error code stored in errno
780+
* @return 0 on success, -1 on failure with error code stored in errno
781781
*/
782782
int file_stat(file_t* file, int fstat_flags)
783783
{
@@ -822,11 +822,65 @@ int file_stat(file_t* file, int fstat_flags)
822822
}
823823

824824
/**
825-
* Open the file and return its decriptor.
825+
* Open the file and return POSIX file descriptor.
826826
*
827-
* @param file the file information, including the path
828-
* @param fopen_flags bitmask consisting of FileFOpenModes bits
829-
* @return file descriptor on success, NULL on fail with error code stored in errno
827+
* On Windows, the file is always opened in binary mode (`_O_BINARY`)
828+
* and with `_SH_DENYNO` (shared read/write access).
829+
* In read-only mode (`FOpenRead`), the file is opened with a hint for sequential access:
830+
* - Windows: Uses `_O_SEQUENTIAL`.
831+
* - POSIX: Uses `posix_fadvise(POSIX_FADV_SEQUENTIAL)` if available.
832+
*
833+
* @param file file information, including the path (must not be NULL)
834+
* @param open_flags bitmask of `FOpenRead`, `FOpenWrite`
835+
* @return POSIX file descriptor on success, -1 on failure with error code stored in errno
836+
*/
837+
int file_open(file_t* file, int open_flags)
838+
{
839+
const int possible_oflags[4] = {
840+
#if defined(_WIN32)
841+
0, _O_RDONLY | _O_BINARY | _O_SEQUENTIAL,
842+
_O_WRONLY | _O_BINARY, _O_RDWR | _O_BINARY
843+
#else
844+
# if !defined(O_BINARY)
845+
# define O_BINARY 0
846+
# endif
847+
0, O_RDONLY | O_BINARY, O_WRONLY | O_BINARY, O_RDWR | O_BINARY
848+
#endif
849+
};
850+
const int oflags = possible_oflags[open_flags & FOpenRW];
851+
int fd;
852+
assert((open_flags & FOpenRW) != 0);
853+
if (!file->real_path) {
854+
errno = EINVAL;
855+
return -1;
856+
}
857+
#ifdef _WIN32
858+
{
859+
fd = _wsopen(file->real_path, oflags, _SH_DENYNO, 0);
860+
if (fd < 0 && errno == EINVAL)
861+
errno = ENOENT;
862+
return fd;
863+
}
864+
#else
865+
fd = open(file->real_path, oflags, 0);
866+
# if _POSIX_C_SOURCE >= 200112L && defined(POSIX_FADV_SEQUENTIAL)
867+
if (fd >= 0)
868+
posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL);
869+
# endif /* _POSIX_C_SOURCE >= 200112L && defined(POSIX_FADV_SEQUENTIAL) */
870+
return fd;
871+
#endif
872+
}
873+
874+
/**
875+
* Open the file and return a standard C file stream pointer.
876+
*
877+
* On Windows, the file is opened with shared access (`_SH_DENYNO`)
878+
* In read-only mode (`FOpenRead`), a hint for sequential access is applied.
879+
*
880+
* @param file the file information, including the path (must not be NULL)
881+
* @param fopen_flags bitmask of `FOpenRead`, `FOpenWrite`, and optionally `FOpenBin`.
882+
* Must include at least one of `FOpenRead` or `FOpenWrite`.
883+
* @return valid `FILE*` stream on success, NULL on failure with error code stored in errno
830884
*/
831885
FILE* file_fopen(file_t* file, int fopen_flags)
832886
{
@@ -868,7 +922,7 @@ FILE* file_fopen(file_t* file, int fopen_flags)
868922
*
869923
* @param from the source file
870924
* @param to the destination path
871-
* @return 0 on success, -1 on fail with error code stored in errno
925+
* @return 0 on success, -1 on failure with error code stored in errno
872926
*/
873927
int file_rename(const file_t* from, const file_t* to)
874928
{
@@ -889,7 +943,7 @@ int file_rename(const file_t* from, const file_t* to)
889943
* Rename a given file to *.bak, if it exists.
890944
*
891945
* @param file the file to move
892-
* @return 0 on success, -1 on fail with error code stored in errno
946+
* @return 0 on success, -1 on failure with error code stored in errno
893947
*/
894948
int file_move_to_bak(file_t* file)
895949
{
@@ -962,7 +1016,7 @@ int file_is_readable(file_t* file)
9621016
*
9631017
* @param list the file_list_t structure to initialize
9641018
* @param file the file to open
965-
* @return 0 on success, -1 on fail with error code stored in errno
1019+
* @return 0 on success, -1 on failure with error code stored in errno
9661020
*/
9671021
int file_list_open(file_list_t* list, file_t* file)
9681022
{
@@ -1040,7 +1094,7 @@ struct WIN_DIR_t
10401094
* Open directory iterator for reading the directory content.
10411095
*
10421096
* @param dir_path directory path
1043-
* @return pointer to directory stream, NULL on fail with error code stored in errno
1097+
* @return pointer to directory stream, NULL on failure with error code stored in errno
10441098
*/
10451099
WIN_DIR* win_opendir(const char* dir_path)
10461100
{

file.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ enum FileFOpenModes {
132132
FOpenWriteBin = FOpenWrite | FOpenBin,
133133
FOpenRWBin = FOpenRW | FOpenBin,
134134
};
135+
int file_open(file_t* file, int open_flags);
135136
FILE* file_fopen(file_t* file, int fopen_flags);
136137

137138
int file_rename(const file_t* from, const file_t* to);

0 commit comments

Comments
 (0)