Add -R option to sort(1).

This commit is contained in:
Jonas 'Sortie' Termansen 2018-04-08 19:33:17 +02:00
parent a209c89233
commit 3654b370f1
2 changed files with 65 additions and 3 deletions

View File

@ -6,7 +6,7 @@
.Nd sort lines of text .Nd sort lines of text
.Sh SYNOPSIS .Sh SYNOPSIS
.Nm .Nm
.Op Fl CcmruVz .Op Fl CcmRruVz
.Op Fl o Ar path .Op Fl o Ar path
.Ar .Ar
.Sh DESCRIPTION .Sh DESCRIPTION
@ -46,6 +46,16 @@ This option is incompatible with
.Fl C .Fl C
and and
.Fl c . .Fl c .
.It Fl R , \-random-sort
Sort the lines randomly with an uniform distribution, where all permutations are
equally likely.
This option is incompatible with
.Fl C
and
.Fl c .
If
.Fl u ,
don't write duplicate lines to the output.
.It Fl r , \-reverse .It Fl r , \-reverse
Compare the lines in reverse order. Compare the lines in reverse order.
.It Fl u , \-unique .It Fl u , \-unique
@ -123,12 +133,18 @@ which is currently partially implemented in this implementation of
.Nm . .Nm .
.Pp .Pp
The The
.Fl V .Fl R , V ,
and and
.Fl z .Fl z
options, as well as the long options, are extensions also found in GNU options, as well as the long options, are extensions also found in GNU
coreutils. coreutils.
.Pp .Pp
Unlike GNU coreutils,
.Fl R
will not remove duplicates unless
.Fl u
is passed.
.Pp
As an extension, the As an extension, the
.Fl C .Fl C
and and

View File

@ -29,6 +29,17 @@
// TODO: Implement all the features mandated by POSIX. // TODO: Implement all the features mandated by POSIX.
// TODO: Implement the useful GNU extensions. // TODO: Implement the useful GNU extensions.
static size_t pick_uniform(size_t upper)
{
if ( upper < 2 )
return 0;
size_t minimum = -upper % upper;
size_t selection;
do arc4random_buf(&selection, sizeof(selection));
while ( selection < minimum );
return selection % upper;
}
static int flip_comparison(int rel) static int flip_comparison(int rel)
{ {
return rel < 0 ? 1 : 0 < rel ? -1 : 0; return rel < 0 ? 1 : 0 < rel ? -1 : 0;
@ -194,6 +205,7 @@ int main(int argc, char* argv[])
bool check_quiet = false; bool check_quiet = false;
bool merge = false; bool merge = false;
const char* output = NULL; const char* output = NULL;
bool random = false;
bool reverse = false; bool reverse = false;
bool unique = false; bool unique = false;
bool version_sort = false; bool version_sort = false;
@ -225,6 +237,7 @@ int main(int argc, char* argv[])
} }
arg = "o"; arg = "o";
break; break;
case 'R': random = true; break;
case 'r': reverse = true; break; case 'r': reverse = true; break;
case 'u': unique = true; break; case 'u': unique = true; break;
case 'V': version_sort = true; break; case 'V': version_sort = true; break;
@ -250,6 +263,8 @@ int main(int argc, char* argv[])
output = argv[i+1]; output = argv[i+1];
argv[++i] = NULL; argv[++i] = NULL;
} }
else if ( !strcmp(arg, "--random-sort") )
random = true;
else if ( !strcmp(arg, "--reverse") ) else if ( !strcmp(arg, "--reverse") )
reverse = true; reverse = true;
else if ( !strcmp(arg, "--unique") ) else if ( !strcmp(arg, "--unique") )
@ -268,6 +283,10 @@ int main(int argc, char* argv[])
errx(1, "the -C and -o options are incompatible"); errx(1, "the -C and -o options are incompatible");
if ( check && output ) if ( check && output )
errx(1, "the -c and -o options are incompatible"); errx(1, "the -c and -o options are incompatible");
if ( check_quiet && random )
errx(1, "the -C and -R options are incompatible");
if ( check && random )
errx(1, "the -c and -R options are incompatible");
int delim = zero_terminated ? '\0' : '\n'; int delim = zero_terminated ? '\0' : '\n';
@ -319,7 +338,34 @@ int main(int argc, char* argv[])
size_t lines_used = 0; size_t lines_used = 0;
char** lines = read_input_stream_lines(&lines_used, &is, delim); char** lines = read_input_stream_lines(&lines_used, &is, delim);
qsort(lines, lines_used, sizeof(*lines), qsort_compare); if ( !random || unique )
qsort(lines, lines_used, sizeof(*lines), qsort_compare);
if ( random )
{
if ( unique )
{
size_t o = 0;
for ( size_t i = 0; i < lines_used; i++ )
{
if ( o && compare(lines[i], lines[o - 1]) == 0 )
continue;
lines[o++] = lines[i];
}
lines_used = o;
}
for ( size_t i = 0; i < lines_used; i++ )
{
size_t left = lines_used - i;
size_t choice = i + pick_uniform(left);
if ( choice != i )
{
char* tmp = lines[i];
lines[i] = lines[choice];
lines[choice] = tmp;
}
}
}
if ( output && !freopen(output, "w", stdout) ) if ( output && !freopen(output, "w", stdout) )
err(2, "%s", output); err(2, "%s", output);