#!/usr/bin/env perl use strict; use warnings; my $min_word_length = 2; my $max_word_length = 60; ############################################################################### ############################################################################### my @files; foreach my $f (@ARGV) { if (-r $f) { push(@files,$f); } } if (!@files) { die(usage()); } my $ret = {}; foreach my $file (@files) { my $ok = open(my $fh,"<",$file) || die("Cannot open $file\n"); #print STDERR "Processing $file\n"; while (my $line = <$fh>) { my @words = split(/\s+/,$line); foreach (@words) { my $word = lc($_); # Remove any trailing punctuation for words at the end of a sentence $word =~ s/[,.!?]$//g; # If the word still has any non-word chars in it, or digits skip them # Usually this is apostrophes like "wouldn't" if ($word =~ /\W|\d|_/) { next; } my $len = length($word); # If the word lengths meets our requirements put it in the list if ($len >= $min_word_length && $len <= $max_word_length) { $ret->{$word}++; } } } } my @words = sort(keys(%$ret)); print join("\n",@words); print "\n"; ############################################################################### ############################################################################### sub argv { my $ret = {}; for (my $i = 0; $i < scalar(@ARGV); $i++) { # If the item starts with "-" it's a key if ((my ($key) = $ARGV[$i] =~ /^--?([a-zA-Z_]\w*)/) && ($ARGV[$i] !~ /^-\w\w/)) { # If the next item does not start with "--" it's the value for this item if (defined($ARGV[$i + 1]) && ($ARGV[$i + 1] !~ /^--?\D/)) { $ret->{$key} = $ARGV[$i + 1]; # Bareword like --verbose with no options } else { $ret->{$key}++; } } } # We're looking for a certain item if ($_[0]) { return $ret->{$_[0]}; } return $ret; } sub trim { if (wantarray) { my @ret; foreach (@_) { push(@ret,scalar(trim($_))); } return @ret; } else { my $s = shift(); if (length($s) == 0) { return ""; } $s =~ s/^\s*//; $s =~ s/\s*$//; return $s; } } # Debug print variable using either Data::Dump::Color (preferred) or Data::Dumper # Creates methods k() and kd() to print, and print & die respectively BEGIN { if (eval { require Data::Dump::Color }) { *k = sub { Data::Dump::Color::dd(@_) }; } else { require Data::Dumper; *k = sub { print Data::Dumper::Dumper(\@_) }; } sub kd { k(@_); printf("Died at %2\$s line #%3\$s\n",caller()); exit(15); } } sub usage { return "Usage: $0 [input.txt]\n"; }