#!/usr/bin/perl #=============================================================================== # # FILE: tab # # USAGE: "tab" or "untab" # # DESCRIPTION: This will turn comma seperated input from stdin into a text table. # It can also then convert it back if program is invoked as "untab". # # As well as being used on the command line, tab/untab can be # used from within vi, and can work on tables that are commented # out with # or //. # # OPTIONS: # ------------------------------------------------------------------------- # | Command/Option | Purpose | # |----------------|------------------------------------------------------| # | tab | Reads from stdin and tabulates comma seperated input | # | tab <-t> | Tabulates input and assumes first row are titles | # | tab <-h> | Prints this help | # | tab <-nb> | Tabulates without a border | # | tab <-fw X> | Wrap fields greater than X big don't break words | # | tab <-fs X> | Wrap fields greater than X big and break words | # | tab <-vp X> | Vertically pad table by X lines | # | tab <-hp X> | Horizontally pad fields by X chars | # | tab <-b X> | Tabulates with a border made from char X | # |----------------|------------------------------------------------------| # | untab | Reads from stdin and untabulates table input | # | untab <-b X> | Untabulate a table with border char X | # | untab <-nb> | Untabulate a borderless table | # ------------------------------------------------------------------------- # # REQUIREMENTS: --- # BUGS: --- # NOTES: --- # AUTHOR: Ben Staniford (BTS), # COMPANY: # VERSION: 1.0 # CREATED: 23/08/07 11:53:19 BST # REVISION: --- #=============================================================================== # TODO # 1. Make tab and untab keep existing indentation including inside comments # 2. Store the comment regexp only once in a global variable # 3. Allow facility to set the delimiter on the command line use strict; use warnings; #Default values (Normally set from cmd line) my $HPADDING=1; #How much horizontal padding my $VPADDING=0; #How much vertical padding my $VBORDER="|"; #What is our vertical border? my $HBORDER="-"; #What is our horizontal border/divider? my $wrapped_line_vpad=1; #Should we vertically pad fields that have been word wrapped? my $break_words_on_wrap=1; #Should we break words when wrapping my $field_wrap_boundary=0; #How big should a field be before we wrap it? #Globals my @max_field_sizes=(); my $max_col_count=0; my $comment_char=""; my $titles=0; #Funcs sub tabulate(); sub get_fields($); sub print_header_footer(); sub add_padding($); sub untabulate(); sub add_field_empty_space($$); sub print_usage(); sub wrap_oversized_fields(@); sub print_table_divider($); #No STDERR under any circumstances open (STDERR, ">/dev/null"); #Arguments my $args = join ' ', @ARGV; if ($args =~ /-t/) { $titles=1; } if ($args =~ /-nb/) { $VBORDER=""; $HBORDER=""; $HPADDING=2;} if ($args =~ /-b\s+(\S)/) { $VBORDER=$1; $HBORDER=$1; $HPADDING=1;} if ($args =~ /-fs\s+(\S+)/) { $field_wrap_boundary=$1; $break_words_on_wrap=0;} if ($args =~ /-fw\s+(\S+)/) { $field_wrap_boundary=$1; $break_words_on_wrap=1;} if ($args =~ /-vp\s+(\S+)/) { $VPADDING=$1} if ($args =~ /-hp\s+(\S+)/) { $HPADDING=$1} elsif ($args =~ /-h/) { print_usage(); exit 0; } #If we're invoked as "untab", i.e. via symlink, do the inverse of normal behavior #Note, untab uses most of the same program arguments above. chomp(my $PROGRAM_NAME = `basename $0`); if ($PROGRAM_NAME eq "untab") { untabulate(); } else { tabulate(); } exit 0; # ------------------------------------------------------------------ # | Name | Purpose | Parameters | # |--------------|------------------------------------|------------| # | tabulate() | Main function that tabulates stdin | | # ------------------------------------------------------------------ sub tabulate() { #Step 1, load the data into a list my @table=; #Step 2, If we have field length restrictions, reorder the table as #needed. Note, this can't be untabbed. if ($field_wrap_boundary != 0) { @table = wrap_oversized_fields(@table); } #Step 3, calculate the number of rows and columns from the input as well as the #maximum field size for each column. Also, work out if this table is in a comment. for my $line (@table) { chomp $line; my @fields = get_fields($line); my $counter=0; #Work out if the data is inside a comment if ($counter==0 && $line=~/^(\#|\/\/)/) { $comment_char=$1; } $line =~ s/^$comment_char//; for my $field (@fields) { if (!defined $max_field_sizes[$counter]) { $max_field_sizes[$counter] = 0; } if ($max_field_sizes[$counter] < length($field)) { $max_field_sizes[$counter] = length($field); } $counter++; } if ($counter > $max_col_count) { $max_col_count=$counter; } } #Step 4, print out the table print_header_footer(); my $lcounter=0; for my $line (@table) { chomp $line; my @fields = get_fields($line); if ($comment_char ne "") { print "$comment_char "; } if ($VBORDER ne "") { print $VBORDER.add_padding(" "); } $lcounter++; my $counter=0; #Print fields for my $field (@fields) { print "$field".add_field_empty_space(length($field), $counter).add_padding(" "); print $VBORDER.add_padding(" "); $counter++; } #Print any empty fields (if they exist) if ($counter < $max_col_count) { for (my $i=0;$i<($max_col_count - $counter);$i++) { print add_field_empty_space(0, $counter+$i).add_padding(" "); print $VBORDER.add_padding(" "); } } print "\n"; if ($VPADDING==1) { print_table_divider(" "); } if ($titles && $lcounter==1) { #print_header_footer(); print_table_divider($HBORDER); } } print_header_footer(); } # ----------------------------------------------------------------------------- # | Name | Purpose | Parameters | # |-----------------------|------------------------|--------------------------| # | print_table_divider() | Print out a divider in | Character divider should | # | | the table | made from | # ----------------------------------------------------------------------------- sub print_table_divider($) { my $divider_char = shift; if ($divider_char eq $HBORDER && $HBORDER eq "") { return; } if ($comment_char ne "") { print "$comment_char "; } for my $size (@max_field_sizes) { print $VBORDER.add_padding($divider_char); for (my $i=0;$i<$size;$i++) { print $divider_char; } print "".add_padding($divider_char); } print $VBORDER."\n"; } # ---------------------------------------------------------------------------- # | Name | Purpose | Parameters | # ---------------------------------------------------------------------------- # | print_header_footer() | Print out the tables header/footer | | # ---------------------------------------------------------------------------- sub print_header_footer() { my $divider_char = $HBORDER; if ($divider_char eq $HBORDER && $HBORDER eq "") { return; } if ($comment_char ne "") { print "$comment_char "; } for my $size (@max_field_sizes) { print $HBORDER.add_padding($divider_char); for (my $i=0;$i<$size;$i++) { print $divider_char; } print "".add_padding($divider_char); } print $HBORDER."\n"; } # ------------------------------------------------------------------------------ # | Name | Purpose | Parameters | # ------------------------------------------------------------------------------ # | add_field_empty_space() | Print out the field spacer | Field Length (int) | # | | | Field Number (int) | # ------------------------------------------------------------------------------ sub add_field_empty_space($$) { my $ret=""; my $field_length=shift; my $field_number=shift; my $empty_space_size=$max_field_sizes[$field_number] - $field_length; for (my $i=0;$i<$empty_space_size;$i++) { $ret.=" "; } return $ret; } # ----------------------------------------------------------------------------- # | Name | Purpose | Parameters | # |---------------|------------------------------|----------------------------| # | add_padding | Print out the padding string | Padding character (string) | # ----------------------------------------------------------------------------- sub add_padding($) { my $padding_char = shift; my $ret=""; for (my $i=0;$i<$HPADDING;$i++) { $ret.=$padding_char; } return $ret; } # ----------------------------------------------------------------------------- # | Name | Purpose | Parameters | # |--------------|--------------------------------------|---------------------| # | get_fields | Extract a list of fields from a line | Input line (string) | # ----------------------------------------------------------------------------- sub get_fields($) { my $line=shift; my @fields = split ',',$line; my @ret=(); for my $field (@fields) { $field =~ s/^\s*//; $field =~ s/\s*$//; push @ret, $field; } return @ret; } # ----------------------------------------------------------------------------------- # | Name | Purpose | Parameters | # |----------------|---------------------------------------------------|------------| # | untabulate() | Perform the inverse function and untabulate stdin | | # ----------------------------------------------------------------------------------- sub untabulate() { my $counter=0; while () { chomp; #Work out if the data is inside a comment if ($counter==0 && $_=~/^(\#|\/\/)/) { $comment_char=$1; } #Handle a borderless table specifically if ($HBORDER eq "" && $VBORDER eq "") { s/\s{2,200}/,/g; s/,$//; #This is a table with a border } else { my $hb_regexp="\\$HBORDER"; my $vb_regexp="\\$VBORDER"; s/^$hb_regexp*$//g; s/^(?:$hb_regexp|$vb_regexp)*$//; s/^$comment_char\s*$hb_regexp*$//g; s/\s*$vb_regexp\s*/,/g; } s/^$comment_char\,/$comment_char/; for (my $i=0;$i<20;$i++) { s/^\,//; s/\,$//; } s/,/, /g; #If you want spaces as the default after commas if ($_ !~ /$comment_char\s/) { s/$comment_char/$comment_char /; } if ($_ !~ /^\s*$/) { print "$_\n"; } $counter++; } } # ---------------------------------------------------------------------------------------- # | Name | Purpose | Parameters | # |---------------------------|------------------------------------------|---------------| # | wrap_oversized_fields() | Wrap fields that are more than specified | List of lines | # | | size. This works by rewriting the | | # | | comma seperated data so that extra lines | | # | | are made. For this reason this | | # | | function cannot easily be undone by " | | # | | untab" | | # ---------------------------------------------------------------------------------------- sub wrap_oversized_fields(@) { my @table=@_; my @ret; #Go through each line in the table for my $line (@table) { my @overflow_buffer=(); chomp $line; my $lcounter=0; #Work out if the data is inside a comment if ($lcounter==0 && $line=~/^(\#|\/\/)/) { $comment_char=$1; } $line =~ s/^$comment_char//; my @fields = get_fields($line); my @overflow_fields = (); my $fcounter = 0; #Go through fields in each line looking for fields that must be split for my $field (@fields) { if (length($field) > $field_wrap_boundary) { my $temp; #Wrap and preserve words if ($break_words_on_wrap) { while ($field =~ /^(.{0,$field_wrap_boundary}\b)/) { $overflow_fields[$fcounter].=",$1"; $field=$'; } #Wrap and split words } else { while ($temp = substr($field, 0, $field_wrap_boundary)) { $temp =~ s/^\s*//; $overflow_fields[$fcounter].=",$temp"; $field=substr($field, $field_wrap_boundary,length($field)); } } } else { $overflow_fields[$fcounter]=$field; } $fcounter ++; } #Build the extra lines that must be put back into @table using the #@overflow_fields table $fcounter=0; my $keep_processing=1; while ($keep_processing) { $keep_processing=0; my $counter = 0; for (@overflow_fields) { my $field, my $remainder; if( ($field, $remainder) = /^,(.*?)(,.*)$/) { my $a=1; } else { $field=$_; $field=~s/^,//; } if ($field ne "") { $keep_processing = 1; } #Put any extra lines we make into the overflow buffer so that #that can be added into our return result. $overflow_buffer[$fcounter].=",$field"; $overflow_fields[$counter]=$remainder; $counter++; } $fcounter++; } #Put the contents of the overflow buffer into our return result for $line (@overflow_buffer) { #print "OB: $line\n"; $line =~ s/^,//; if ($line !~ /^,*$/) { push @ret, $line; } elsif ($wrapped_line_vpad==1) { push @ret, $line; } } $lcounter++; } return @ret; } sub print_usage() { print < | Tabulates input and assumes first row are titles | | tab <-h> | Prints this help | | tab <-nb> | Tabulates without a border | | tab <-fw X> | Wrap fields greater than X big don't break words | | tab <-fs X> | Wrap fields greater than X big and break words | | tab <-vp X> | Vertically pad table by X lines | | tab <-hp X> | Horizontally pad fields by X chars | | tab <-b X> | Tabulates with a border made from char X | |----------------|------------------------------------------------------| | untab | Reads from stdin and untabulates table input | | untab <-b X> | Untabulate a table with border char X | | untab <-nb> | Untabulate a borderless table | ------------------------------------------------------------------------- END }