/* * Copyright (c) 2014, 2015, 2018 Jonas 'Sortie' Termansen. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * sort.c * Sort, merge, or sequence check text files. */ #include #include #include #include #include #include #include #include // TODO: Implement all the features mandated by POSIX. // TODO: Implement the useful GNU extensions. static size_t pick_uniform(size_t upper) { if ( upper < 2 ) return 0; size_t minimum = -upper % upper; size_t selection; do arc4random_buf(&selection, sizeof(selection)); while ( selection < minimum ); return selection % upper; } static int flip_comparison(int rel) { return rel < 0 ? 1 : 0 < rel ? -1 : 0; } static int indirect_compare(int (*compare)(const char*, const char*), const void* a_ptr, const void* b_ptr) { const char* a = *(const char* const*) a_ptr; const char* b = *(const char* const*) b_ptr; return compare(a, b); } static int compare_line(const char* a, const char* b) { return strcoll(a, b); } static int indirect_compare_line(const void* a_ptr, const void* b_ptr) { return indirect_compare(compare_line, a_ptr, b_ptr); } static int compare_line_reverse(const char* a, const char* b) { return flip_comparison(compare_line(a, b)); } static int indirect_compare_line_reverse(const void* a_ptr, const void* b_ptr) { return indirect_compare(compare_line_reverse, a_ptr, b_ptr); } static int compare_version(const char* a, const char* b) { return strverscmp(a, b); } static int indirect_compare_version(const void* a_ptr, const void* b_ptr) { return indirect_compare(compare_version, a_ptr, b_ptr); } static int compare_version_reverse(const char* a, const char* b) { return flip_comparison(compare_version(a, b)); } static int indirect_compare_version_reverse(const void* a_ptr, const void* b_ptr) { return indirect_compare(compare_version_reverse, a_ptr, b_ptr); } struct input_stream { const char* const* files; size_t files_current; size_t files_length; FILE* current_file; const char* last_file_path; uintmax_t last_line_number; }; static char* read_line(FILE* fp, const char* fpname, int delim) { char* line = NULL; size_t line_size = 0; ssize_t amount = getdelim(&line, &line_size, delim, fp); if ( amount < 0 ) { free(line); if ( ferror(fp) ) err(2, "read: %s", fpname); return NULL; } if ( (unsigned char) line[amount-1] == (unsigned char) delim ) line[amount-1] = '\0'; return line; } static char* read_input_stream_line(struct input_stream* is, int delim) { if ( !is->files_length ) { char* result = read_line(stdin, "", delim); is->last_file_path = "-"; if ( result ) is->last_line_number++; return result; } while ( is->files_current < is->files_length ) { const char* path = is->files[is->files_current]; if ( !is->current_file ) { is->last_line_number = 0; if ( !strcmp(path, "-") ) is->current_file = stdin; else if ( !(is->current_file = fopen(path, "r")) ) err(2, "%s", path); } char* result = read_line(is->current_file, path, delim); if ( !result ) { if ( is->current_file != stdin ) fclose(is->current_file); is->current_file = NULL; is->files_current++; continue; } is->last_file_path = path; is->last_line_number++; return result; } return NULL; } static char** read_input_stream_lines(size_t* result_num_lines, struct input_stream* is, int delim) { char** lines = NULL; size_t lines_used = 0; size_t lines_length = 0; char* line; while ( (line = read_input_stream_line(is, delim)) ) { if ( lines_used == lines_length ) { size_t old_lines_length = lines_length ? lines_length : 64; char** new_lines = (char**) reallocarray(lines, old_lines_length, 2 * sizeof(char*)); if ( !new_lines ) err(2, "malloc"); lines = new_lines; lines_length = 2 * old_lines_length; } lines[lines_used++] = line; } return *result_num_lines = lines_used, lines; } static void compact_arguments(int* argc, char*** argv) { for ( int i = 0; i < *argc; i++ ) { while ( i < *argc && !(*argv)[i] ) { for ( int n = i; n < *argc; n++ ) (*argv)[n] = (*argv)[n+1]; (*argc)--; } } } int main(int argc, char* argv[]) { setlocale(LC_ALL, ""); bool check = false; bool check_quiet = false; bool merge = false; const char* output = NULL; bool random = false; bool reverse = false; bool unique = false; bool version_sort = false; bool zero_terminated = false; for ( int i = 1; i < argc; i++ ) { const char* arg = argv[i]; if ( arg[0] != '-' || !arg[1] ) continue; argv[i] = NULL; if ( !strcmp(arg, "--") ) break; if ( arg[1] != '-' ) { char c; while ( (c = *++arg) ) switch ( c ) { case 'C': check = true, check_quiet = true; break; case 'c': check = true, check_quiet = false; break; case 'm': merge = true; break; case 'o': if ( !*(output = arg + 1) ) { if ( i + 1 == argc ) errx(2, "option requires an argument -- 'o'"); output = argv[i+1]; argv[++i] = NULL; } arg = "o"; break; case 'R': random = true; break; case 'r': reverse = true; break; case 'u': unique = true; break; case 'V': version_sort = true; break; case 'z': zero_terminated = true; break; default: errx(2, "unknown option -- '%c'", c); } } else if ( !strcmp(arg, "--check") || !strcmp(arg, "--check=diagnose-first") ) check = true, check_quiet = false; else if ( !strcmp(arg, "--check=quiet") || !strcmp(arg, "--check=silent") ) check = true, check_quiet = true; else if ( !strcmp(arg, "--merge") ) merge = true; else if ( !strncmp(arg, "--output=", strlen("--output=")) ) output = arg + strlen("--output="); else if ( !strcmp(arg, "--output") ) { if ( i + 1 == argc ) errx(2, "option '--output' requires an argument"); output = argv[i+1]; argv[++i] = NULL; } else if ( !strcmp(arg, "--random-sort") ) random = true; else if ( !strcmp(arg, "--reverse") ) reverse = true; else if ( !strcmp(arg, "--unique") ) unique = true; else if ( !strcmp(arg, "--version-sort") ) version_sort = true; else if ( !strcmp(arg, "--zero-terminated") ) zero_terminated = true; else errx(2, "unknown option: %s", arg); } compact_arguments(&argc, &argv); if ( check_quiet && output ) errx(1, "the -C and -o options are incompatible"); if ( check && output ) errx(1, "the -c and -o options are incompatible"); if ( check_quiet && random ) errx(1, "the -C and -R options are incompatible"); if ( check && random ) errx(1, "the -c and -R options are incompatible"); int delim = zero_terminated ? '\0' : '\n'; int (*compare)(const char*, const char*); int (*qsort_compare)(const void*, const void*); if ( version_sort && reverse ) compare = compare_version_reverse, qsort_compare = indirect_compare_version_reverse; else if ( version_sort ) compare = compare_version, qsort_compare = indirect_compare_version; else if ( reverse ) compare = compare_line_reverse, qsort_compare = indirect_compare_line_reverse; else compare = compare_line, qsort_compare = indirect_compare_line; struct input_stream is; memset(&is, 0, sizeof(is)); is.files = (const char* const*) (argv + 1); is.files_current = 0; is.files_length = argc - 1; if ( check ) { int needed_relation = unique ? 1 : 0; char* prev_line = NULL; char* line; while ( (line = read_input_stream_line(&is, delim)) ) { if ( prev_line && compare(line, prev_line) < needed_relation ) { if ( check_quiet ) return 1; errx(1, "%s:%ju: disorder: %s", is.last_file_path, is.last_line_number, line); } free(prev_line); prev_line = line; } free(prev_line); } else { (void) merge; size_t lines_used = 0; char** lines = read_input_stream_lines(&lines_used, &is, delim); if ( !random || unique ) qsort(lines, lines_used, sizeof(*lines), qsort_compare); if ( random ) { if ( unique ) { size_t o = 0; for ( size_t i = 0; i < lines_used; i++ ) { if ( o && compare(lines[i], lines[o - 1]) == 0 ) continue; lines[o++] = lines[i]; } lines_used = o; } for ( size_t i = 0; i < lines_used; i++ ) { size_t left = lines_used - i; size_t choice = i + pick_uniform(left); if ( choice != i ) { char* tmp = lines[i]; lines[i] = lines[choice]; lines[choice] = tmp; } } } if ( output && !freopen(output, "w", stdout) ) err(2, "%s", output); for ( size_t i = 0; i < lines_used; i++ ) { if ( unique && i && compare(lines[i-1], lines[i]) == 0 ) continue; if ( fputs(lines[i], stdout) == EOF || fputc(delim, stdout) == EOF ) err(2, "%s", output ? output : ""); } if ( fflush(stdout) == EOF ) err(2, "%s", output ? output : ""); } return 0; }