/* * Copyright (c) 2014, 2015 Jonas 'Sortie' Termansen. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * sort.c * Sort, merge, or sequence check text files. */ #include #include #include #include #include #include #include #include // TODO: Implement all the features mandated by POSIX. // TODO: Implement the useful GNU extensions. int flip_comparison(int rel) { return rel < 0 ? 1 : 0 < rel ? -1 : 0; } int indirect_compare(int (*compare)(const char*, const char*), const void* a_ptr, const void* b_ptr) { const char* a = *(const char* const*) a_ptr; const char* b = *(const char* const*) b_ptr; return compare(a, b); } int compare_line(const char* a, const char* b) { return strcoll(a, b); } int indirect_compare_line(const void* a_ptr, const void* b_ptr) { return indirect_compare(compare_line, a_ptr, b_ptr); } int compare_line_reverse(const char* a, const char* b) { return flip_comparison(compare_line(a, b)); } int indirect_compare_line_reverse(const void* a_ptr, const void* b_ptr) { return indirect_compare(compare_line_reverse, a_ptr, b_ptr); } int compare_version(const char* a, const char* b) { return strverscmp(a, b); } int indirect_compare_version(const void* a_ptr, const void* b_ptr) { return indirect_compare(compare_version, a_ptr, b_ptr); } int compare_version_reverse(const char* a, const char* b) { return flip_comparison(compare_version(a, b)); } int indirect_compare_version_reverse(const void* a_ptr, const void* b_ptr) { return indirect_compare(compare_version_reverse, a_ptr, b_ptr); } struct input_stream { const char* const* files; size_t files_current; size_t files_length; FILE* current_file; const char* last_file_path; uintmax_t last_line_number; bool result_status; }; char* read_line(FILE* fp, const char* fpname, int delim) { char* line = NULL; size_t line_size = 0; ssize_t amount = getdelim(&line, &line_size, delim, fp); if ( amount < 0 ) { free(line); if ( ferror(fp) ) error(0, errno, "read: `%s'", fpname); return NULL; } if ( (unsigned char) line[amount-1] == (unsigned char) delim ) line[amount-1] = '\0'; return line; } char* read_input_stream_line(struct input_stream* is, int delim) { if ( !is->files_length ) { char* result = read_line(stdin, "", delim); if ( ferror(stdin) ) is->result_status = false; is->last_file_path = "-"; is->last_line_number++; return result; } while ( is->files_current < is->files_length ) { const char* path = is->files[is->files_current]; if ( !is->current_file ) { is->last_line_number = 0; if ( !strcmp(path, "-") ) is->current_file = stdin; else if ( !(is->current_file = fopen(path, "r")) ) { error(0, errno, "`%s'", path); is->result_status = false; is->files_current++; continue; } } char* result = read_line(is->current_file, path, delim); if ( !result ) { if ( ferror(is->current_file) ) { error(0, errno, "reading: `%s'", path); is->result_status = false; } if ( is->current_file != stdin ) fclose(is->current_file); is->current_file = NULL; is->files_current++; continue; } is->last_file_path = path; is->last_line_number++; return result; } return NULL; } char** read_input_stream_lines(size_t* result_num_lines, struct input_stream* is, int delim) { char** lines = NULL; size_t lines_used = 0; size_t lines_length = 0; char* line; while ( (line = read_input_stream_line(is, delim)) ) { if ( lines_used == lines_length ) { size_t new_lines_length = lines_length ? 2 * lines_length : 64; size_t new_lines_size = sizeof(char*) * new_lines_length; char** new_lines = (char**) realloc(lines, new_lines_size); if ( !new_lines ) { error(0, errno, "realloc"); free(line); is->result_status = false; return *result_num_lines = lines_used, lines; } lines = new_lines; lines_length = new_lines_length; } lines[lines_used++] = line; } return *result_num_lines = lines_used, lines; } static void compact_arguments(int* argc, char*** argv) { for ( int i = 0; i < *argc; i++ ) { while ( i < *argc && !(*argv)[i] ) { for ( int n = i; n < *argc; n++ ) (*argv)[n] = (*argv)[n+1]; (*argc)--; } } } static void help(FILE* fp, const char* argv0) { fprintf(fp, "Usage: %s [OPTION]... [FILE]...\n", argv0); fprintf(fp, "Write sorted concatenation of all FILE(s) to standard output.\n"); fprintf(fp, "\n"); fprintf(fp, "Mandatory arguments to long options are mandatory for short options too.\n"); fprintf(fp, "Ordering options:\n"); fprintf(fp, "\n"); #if 0 fprintf(fp, " -b, --ignore-leading-blanks ignore leading blanks\n"); fprintf(fp, " -d, --dictionary-order consider only blanks and alphanumeric characters\n"); fprintf(fp, " -f, --ignore-case fold lower case to upper case characters\n"); fprintf(fp, " -g, --general-numeric-sort compare according to general numerical value\n"); fprintf(fp, " -i, --ignore-nonprinting consider only printable characters\n"); fprintf(fp, " -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n"); fprintf(fp, " -h, --human-numeric-sort compare human readable numbers (e.g., 2K 1G)\n"); fprintf(fp, " -n, --numeric-sort compare according to string numerical value\n"); fprintf(fp, " -R, --random-sort sort by random hash of keys\n"); fprintf(fp, " --random-source=FILE get random bytes from FILE\n"); #endif fprintf(fp, " -r, --reverse reverse the result of comparisons\n"); #if 0 fprintf(fp, " --sort=WORD sort according to WORD:\n"); fprintf(fp, " general-numeric -g, human-numeric -h, month -M,\n"); fprintf(fp, " numeric -n, random -R, version -V\n"); #endif fprintf(fp, " -V, --version-sort natural sort of (version) numbers within text\n"); fprintf(fp, "\n"); fprintf(fp, "Other options:\n"); fprintf(fp, "\n"); #if 0 fprintf(fp, " --batch-size=NMERGE merge at most NMERGE inputs at once;\n"); fprintf(fp, " for more use temp files\n"); #endif fprintf(fp, " -c, --check, --check=diagnose-first check for sorted input; do not sort\n"); fprintf(fp, " -C, --check=quiet, --check=silent like -c, but do not report first bad line\n"); #if 0 fprintf(fp, " --compress-program=PROG compress temporaries with PROG;\n"); fprintf(fp, " decompress them with PROG -d\n"); fprintf(fp, " --debug annotate the part of the line used to sort,\n"); fprintf(fp, " and warn about questionable usage to stderr\n"); fprintf(fp, " --files0-from=F read input from the files specified by\n"); fprintf(fp, " NUL-terminated names in file F;\n"); fprintf(fp, " If F is - then read names from standard input\n"); fprintf(fp, " -k, --key=POS1[,POS2] start a key at POS1 (origin 1), end it at POS2\n"); fprintf(fp, " (default end of line). See POS syntax below\n"); #endif fprintf(fp, " -m, --merge merge already sorted files; do not sort\n"); fprintf(fp, " -o, --output=FILE write result to FILE instead of standard output\n"); #if 0 fprintf(fp, " -s, --stable stabilize sort by disabling last-resort comparison\n"); fprintf(fp, " -S, --buffer-size=SIZE use SIZE for main memory buffer\n"); fprintf(fp, " -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n"); fprintf(fp, " -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or /tmp;\n"); fprintf(fp, " multiple options specify multiple directories\n"); fprintf(fp, " --parallel=N change the number of sorts run concurrently to N\n"); #endif fprintf(fp, " -u, --unique with -c, check for strict ordering;\n"); fprintf(fp, " without -c, output only the first of an equal run\n"); fprintf(fp, " -z, --zero-terminated end lines with 0 byte, not newline\n"); fprintf(fp, " --help display this help and exit\n"); fprintf(fp, " --version output version information and exit\n"); fprintf(fp, "\n"); #if 0 fprintf(fp, "POS is F[.C][OPTS], where F is the field number and C the character position\n"); fprintf(fp, "in the field; both are origin 1. If neither -t nor -b is in effect, characters\n"); fprintf(fp, "in a field are counted from the beginning of the preceding whitespace. OPTS is\n"); fprintf(fp, "one or more single-letter ordering options, which override global ordering\n"); fprintf(fp, "options for that key. If no key is given, use the entire line as the key.\n"); fprintf(fp, "\n"); fprintf(fp, "SIZE may be followed by the following multiplicative suffixes:\n"); fprintf(fp, "%% 1%% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n"); fprintf(fp, "\n"); #endif fprintf(fp, "With no FILE, or when FILE is -, read standard input.\n"); fprintf(fp, "\n"); fprintf(fp, "*** WARNING ***\n"); fprintf(fp, "The locale specified by the environment affects sort order.\n"); fprintf(fp, "Set LC_ALL=C to get the traditional sort order that uses\n"); fprintf(fp, "native byte values.\n"); } static void version(FILE* fp, const char* argv0) { fprintf(fp, "%s (Sortix) %s\n", argv0, VERSIONSTR); } int main(int argc, char* argv[]) { setlocale(LC_ALL, ""); bool check = false; bool check_quiet = false; bool merge = false; const char* output = NULL; bool reverse = false; bool unique = false; bool version_sort = false; bool zero_terminated = false; const char* argv0 = argv[0]; for ( int i = 1; i < argc; i++ ) { const char* arg = argv[i]; if ( arg[0] != '-' || !arg[1] ) continue; argv[i] = NULL; if ( !strcmp(arg, "--") ) break; if ( arg[1] != '-' ) { char c; while ( (c = *++arg) ) switch ( c ) { case 'c': check = true; break; case 'C': check = check_quiet = false; break; case 'm': merge = true; break; case 'o': if ( !*(output = arg + 1) ) { if ( i + 1 == argc ) { error(0, 0, "option requires an argument -- 'o'"); fprintf(stderr, "Try `%s --help' for more information.\n", argv[0]); exit(125); } output = argv[i+1]; argv[++i] = NULL; } arg = "o"; break; case 'r': reverse = true; break; case 'u': unique = true; break; case 'V': version_sort = true; break; case 'z': zero_terminated = true; break; default: fprintf(stderr, "%s: unknown option -- '%c'\n", argv0, c); help(stderr, argv0); exit(1); } } else if ( !strcmp(arg, "--help") ) help(stdout, argv0), exit(0); else if ( !strcmp(arg, "--version") ) version(stdout, argv0), exit(0); else if ( !strcmp(arg, "--check") || !strcmp(arg, "--check=diagnose-first") ) check = true, check_quiet = false; else if ( !strcmp(arg, "--check=quiet") || !strcmp(arg, "--check=silent") ) check = true, check_quiet = true; else if ( !strcmp(arg, "--merge") ) merge = true; else if ( !strncmp(arg, "--output=", strlen("--output=")) ) output = arg + strlen("--output="); else if ( !strcmp(arg, "--output") ) { if ( i + 1 == argc ) { error(0, 0, "option '--output' requires an argument"); fprintf(stderr, "Try `%s --help' for more information.\n", argv[0]); exit(125); } output = argv[i+1]; argv[++i] = NULL; } else if ( !strcmp(arg, "--reverse") ) reverse = true; else if ( !strcmp(arg, "--unique") ) unique = true; else if ( !strcmp(arg, "--version-sort") ) version_sort = true; else if ( !strcmp(arg, "--zero-terminated") ) zero_terminated = true; else { fprintf(stderr, "%s: unknown option: %s\n", argv0, arg); help(stderr, argv0); exit(1); } } compact_arguments(&argc, &argv); if ( output && !freopen(output, "w", stdout) ) error(2, errno, "`%s'", output); int delim = zero_terminated ? '\0' : '\n'; int (*compare)(const char*, const char*); int (*qsort_compare)(const void*, const void*); if ( version_sort && reverse ) compare = compare_version_reverse, qsort_compare = indirect_compare_version_reverse; else if ( version_sort ) compare = compare_version, qsort_compare = indirect_compare_version; else if ( reverse ) compare = compare_line_reverse, qsort_compare = indirect_compare_line_reverse; else compare = compare_line, qsort_compare = indirect_compare_line; struct input_stream is; memset(&is, 0, sizeof(is)); is.files = (const char* const*) (argv + 1); is.files_current = 0; is.files_length = argc - 1; is.result_status = true; if ( check ) { int needed_relation = unique ? 1 : 0; char* prev_line = NULL; char* line; while ( (line = read_input_stream_line(&is, delim)) ) { if ( prev_line && compare(line, prev_line) < needed_relation ) { if ( !check_quiet ) error(0, errno, "%s:%ju: disorder: %s", is.last_file_path, is.last_line_number, line); exit(1); } free(prev_line); prev_line = line; } free(prev_line); } else { (void) merge; size_t lines_used = 0; char** lines = read_input_stream_lines(&lines_used, &is, delim); qsort(lines, lines_used, sizeof(*lines), qsort_compare); for ( size_t i = 0; i < lines_used; i++ ) { if ( unique && i && compare(lines[i-1], lines[i]) == 0 ) continue; fputs(lines[i], stdout); fputc(delim, stdout); } } return is.result_status ? 0 : 2; }