Question about sorting perl

I have some huge log files that I need to sort. All entries have a 32-bit hex number, which is the sort key I want to use. some records are one liner, for example

bla bla bla  0x97860afa bla bla  

others are a bit more complicated, start with the same type of line above and expand to a block of lines marked with curly braces, as an example below. In this case, the whole block should go to the position determined by the hexadecimal nbr. Block Example -

 bla bla bla  0x97860afc bla bla  
     bla bla {  
         blabla  
            bla bla {  
                bla     
            }  
        }  

Maybe I can figure it out, but maybe there is a simple perl or awk solution that will save me half a day.


Passing comments from OP:

, , , : , "", , , a "0xNNNNNNNN" ( ) , "0xNNNNNNNN" (, , N-). .

+3
5

- ( ):

my $line;
my $lastkey;
my %data;
while($line = <>) {
    chomp $line;
    if ($line =~ /\b(0x\p{AHex}{8})\b/) {
        # Begin a new entry
        my $unique_key = $1 . $.; # cred to [Brian Gerard][1] for uniqueness
        $data{$1} = $line;
        $lastkey = $unique_key;
    } else {
        # Continue an old entry
        $data{$lastkey} .= $line;
    }
}

print $data{$_}, "\n" for (sort { $a <=> $b } keys %data);

, "" , , , . , , , .

, , , .

+2

:

sub to_sort_form {
    my $buffer = $_[0];
    my ( $id ) = $buffer =~ m/(0x\p{AHex}{8})/; # grab the first candidate
    return "$id-:-$buffer";
    $_[0] = '';
}

sub to_source { 
    my $val = shift;
    my ( $record ) = $val =~ m/-:-(.*)/;
    $record =~ s/\$--\^/\n/g;
    return $record;
}

my $sortex = Sort::External->new(
      mem_threshold   => 1024**2 * 16     # default: 1024**2 * 8 (8 MiB)
    , cache_size      => 100_000          # default: undef (disabled) 
    , sortsub         => sub { $Sort::External::a cmp $Sort::External::b }
    , working_dir     => $temp_directory  # default: see below
);

my $id;
my $buffer = <>;
chomp $buffer;
while ( <> ) { 
    my ( $indent ) = m/^(\s*)\S/;
    unless ( length $indent ) {
        $sortex->feed( to_sort_form( $buffer ));
    }
    chomp;
    $buffer .= $_ . '$--^';
}
$sortex->feed( to_sort_form( $buffer ));
$sortex->finish;

while ( defined( $_ = $sortex->fetch ) ) {
    print to_source( $_ );
}

:

  • '$--^' .
  • .
+2

, TLP. , , . while ( , ymmv, ..):

my $currentInFile        = "";
my $currentOutFileHandle = "";

while if-else

if ($currentInFile ne $ARG) {
    if (fileno($currentOutFileHandle)) {
        if (!close($currentOutFileHandle)) {
            # whatever you want to do if you can't close the previous output file
        }
    }
    my $newOutFile = $ARG . ".tagged";
    if (!open($currentOutFileHandle, ">", $newOutFile)) {
        # whatever you want to do if you can't open a new output file for writing
    }
}

if (...conditional from TLP...) {
    # add more zeroes if the files really are that large :)
    $lastkey = $1 . " " . sprintf("%0.10d", $.);
}

if (fileno($currentOutFileHandle)) {
    print $currentOutFileHandle $lastkey . "\t" . $line;
}
else {
    # whatever you want to do if $currentOutFileHandle gone screwy
}

foo.log.tagged foo.log, ; .tagged , "0xNNNNNNNN LLLLLLLLLL\t" (LLLLLLLLLLL → ), . sort(1) , --temporary-directory, , /tmp temp, , . - :

sort --output=/my/new/really.big.file --temporary-directory=/scratch/dir/on/roomy/partition *.tagged

:

perl -pi -e 's/^[^\t]+\t//' /my/new/really.big.file

FWIW, , , 10 2, - , .

+2

()

perl -wne'BEGIN{ $key = " " x 10 }' \
    -e '$key = $1 if /(0x[0-9a-f]{8})/;' \
    -e 'printf "%s%.10d%s", $key, $., $_' \
    inputfile \
    | LC_ALL=C sort \
    | perl -wpe'substr($_,0,20,"")'
0

TLP . , pos, , , . - . , stackoverflow .

#!/usr/bin/perl -w
my $line;
my $lastkey;
my %data;
while($line = <>) {
  chomp $line;
  if ($line =~ /\b(0x\p{AHex}{8})\b/) {
    # Begin a new entry
    #my $unique_key = $1 . $.; # cred to [Brian Gerard][1] for uniqueness
    my $unique_key = hex($1);
    $data{$unique_key} = $line;
    $lastkey = $unique_key;
  } else {
    # Continue an old entry
    $data{$lastkey} .= $line;
  }
}
print $data{$_}, "\n" for (sort { $a <=> $b } keys %data);
0

All Articles