Browse Source

small improvements

themage
theMage 8 years ago
parent
commit
34c492e303
1 changed files with 57 additions and 24 deletions
  1. 57 24
      Text-Histogram/lib/Text/Histogram.pm

+ 57 - 24
Text-Histogram/lib/Text/Histogram.pm

@ -9,6 +9,16 @@ use base qw(Exporter);
9 9
10 10
our @EXPORT_OK = qw(histogram);
11 11
12
my @scales = (1, 2, 5, 10, 25, 50, 100, 250, 500);
13
push @scales, map { ( 1 * $_, 2.5 * $_, 5 * $_) } (
14
		1000, 10_000, 100_000
15
	);
16
17
my @binsizes = (1, 2, 5, 10, 25, 50, 100, 250, 500);
18
push @binsizes, map { ( 1 * $_, 2.5 * $_, 5 * $_ ) } (
19
		1000, 10_000, 100_000
20
	);
21
12 22
sub histogram {
13 23
	my ($data, $opts) = @_;
14 24
@ -16,13 +26,16 @@ sub histogram {
16 26
		$data = [@_];
17 27
		$opts = {};
18 28
	}
29
	my $pts = scalar @$data;
19 30
	$opts->{bins} ||= 8;
31
	$opts->{bins} = $pts if $pts < $opts->{bins};
20 32
	$opts->{histogram_size} ||= 50;
21 33
22 34
	my $vcnt = scalar @$data;
23 35
	my @data = sort { $a <=> $b } @$data;
24 36
25
	my ($min, $max, $rmin, $rmax) = _check_outliers($vcnt, $opts, @data);
37
	my ($min, $max, $rmin, $rmax, $pmin, $pmax)
38
			= _check_outliers($vcnt, $opts, @data);
26 39
27 40
	my ($scale, $binsize, %bins)
28 41
			= _get_frequency($min,$max,$rmin,$rmax, $opts, \@data);
@ -50,7 +63,7 @@ sub histogram {
50 63
	if ($max != $rmax) {
51 64
		my $freq = _ceil(($bins{'max'}||0)/$scale);
52 65
		$hist.= sprintf "%8d %-${hsize}s - %6d\n",
53
				$rmax+1,
66
				$pmax,
54 67
				"#" x $freq,
55 68
				($bins{'max'}||0);	
56 69
	}
@ -65,7 +78,7 @@ sub _get_frequency {
65 78
	$bins-- if $rmax != $max;
66 79
	my $hsize = $opts->{histogram_size};
67 80
68
	my $binsize = _ceil( ($rmax - $rmin) / $bins );
81
	my $binsize = _best_scale( ($rmax - $rmin) / $bins, @binsizes );
69 82
70 83
	for my $v (@$data) {
71 84
		if ( $v < $rmin ) {
@ -86,7 +99,8 @@ sub _get_frequency {
86 99
		$maxval = $value if $value > $maxval;
87 100
	}
88 101
89
	$scale = _ceil($maxval/$hsize) if $maxval>$hsize;
102
	$scale = _best_scale($maxval/$hsize, @scales)
103
		if $maxval>$hsize;
90 104
91 105
	return $scale, $binsize, %bins;
92 106
}
@ -109,20 +123,24 @@ sub _check_outliers {
109 123
	my $val = $data[0];
110 124
111 125
	my $c = 0;
112
	my $binsize = ($tmax - $tmin) / ($bins * 2);
113
	my ($rmin,$rmax) = (0, 0);
126
	my $bn = $bins > 2 ? $bins - 2 : 2;
127
	my $bs = ($tmax - $tmin) / $bn;
128
	my $binsize = _best_scale($bs, @binsizes);
129
	;
130
	my ($rmin, $rmax) = (0, 0);
131
	my ($pmin, $pmax) = (0, 0);
114 132
	while ( ($tmin != $rmin) or ($tmax != $rmax) ) {
115 133
		$rmin = $tmin;
116 134
		$rmax = $tmax;
117
		my $val = $data[0];
135
		$val = $data[0];
118 136
		for my $i (1..$cnt) {
119 137
			# point with more than half the size of a bin are grouped
120 138
			# in a big bin, in the beginning.
121 139
			$c = $data[$i] - $val;
122
			if ( $c > $binsize) {
140
			if ( $c > $binsize ) {
123 141
				$tmin = $data[$i];
124 142
				$val = $data[$i];
125
				$binsize = ($tmax - $tmin) / ($bins * 2);
143
				$binsize = ($tmax - $tmin) / $bn;
126 144
			}
127 145
			last if $i >= $cnt;
128 146
		}
@ -134,13 +152,24 @@ sub _check_outliers {
134 152
			if ($c > $binsize) {
135 153
				$tmax = $v1;
136 154
				$val = $v1;
137
				$binsize = ($tmax - $tmin) / ($bins * 2);
155
				$binsize = _best_scale(($tmax - $tmin) / $bn, @binsizes);;
138 156
			}
157
			$val = $v1;
139 158
			last if $i > $cnt;
140 159
		}
141 160
	}
142 161
143
	return ($min, $max, $rmin, $rmax);
162
	return ($min, $max, $rmin, $rmax, $pmin, $pmax);
163
}
164
165
sub _best_scale {
166
	my ($val, @opts) = @_;
167
168
	for my $opt (@opts) {
169
		return $opt if $opt > $val;
170
	}
171
172
	return 99_999_999_999;
144 173
}
145 174
146 175
1; # End of Text::Histogram
@ -157,21 +186,22 @@ Version 0.01
157 186
158 187
=head1 SYNOPSIS
159 188
160
Quick summary of what the module does.
189
	use Text::Histogram qw(histogram);
161 190
162
Perhaps a little code snippet.
191
	print histogram([1,2,3,4,5,2,3,2,1,3,4,5]);
163 192
164
    use Text::Histogram;
193
=head1 EXPORT
165 194
166
    my $foo = Text::Histogram->new();
167
    ...
195
=head2 histogram(\@data, [\%opts]);
168 196
169
=head1 EXPORT
197
Text::Histogram exports the sub histogram, that takes an arrayref with
198
the point to create the histogram from and an optional hashref of options.
170 199
171
A list of functions that can be exported.  You can delete this section
172
if you don't export anything, such as for a purely object-oriented module.
200
the optional hash can have the following options:
173 201
174
=head1 SUBROUTINES/METHODS
202
=over 4
203
204
=back
175 205
176 206
=head1 AUTHOR
177 207
@ -181,10 +211,14 @@ Marco Neves, C<< <neves at cpan.org> >>
181 211
182 212
Please report any bugs or feature requests to 
183 213
C<bug-text-histogram at rt.cpan.org>, or through the web interface 
184
at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Text-Histogram>. 
214
at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Text-Histogram>.
215
185 216
I will be notified, and then you'll automatically be notified of 
186 217
progress on your bug as I make changes.
187 218
219
You can also use github: https://github.com/themage/perl-text-histogram
220
or http://www.magick-source.net/projects/text-histogram
221
188 222
=head1 SUPPORT
189 223
190 224
You can find documentation for this module with the perldoc command.
@ -193,6 +227,8 @@ You can find documentation for this module with the perldoc command.
193 227
194 228
You can also look for information at:
195 229
230
http://www.magick-source.net/projects/text-histogram/wiki
231
196 232
=over 4
197 233
198 234
=item * RT: CPAN's request tracker (report bugs here)
@ -213,10 +249,8 @@ L<http://search.cpan.org/dist/Text-Histogram/>
213 249
214 250
=back
215 251
216
217 252
=head1 ACKNOWLEDGEMENTS
218 253
219
220 254
=head1 LICENSE AND COPYRIGHT
221 255
222 256
Copyright 2011 Marco Neves.
@ -227,6 +261,5 @@ by the Free Software Foundation; or the Artistic License.
227 261
228 262
See http://dev.perl.org/licenses/ for more information.
229 263
230
231 264
=cut
232 265