#!/usr/bin/perl -w
#
# stackcollapse-ibmjava.pl	collapse jstack samples into single lines.
#
# Parses Java stacks generated by IBM Java with methods separated by semicolons, 
# and then a space and an occurrence count.
#
# USAGE: ./stackcollapse-ibmjava.pl infile > outfile
#
# Example input:
#
#  NULL           
#  1XMTHDINFO     Thread Details
#  NULL           
#  NULL
#  3XMTHREADINFO      "Default Executor-thread-149164" J9VMThread:0x0000000008132B00, j9thread_t:0x000000001A810B90, java/lang/Thread:0x0000000712BE8E48, state:R, prio=5
#  3XMJAVALTHREAD            (java/lang/Thread getId:0x3493E, isDaemon:true)
#  3XMTHREADINFO1            (native thread ID:0x3158, native priority:0x5, native policy:UNKNOWN, vmstate:R, vm thread flags:0x00000001)
#  3XMCPUTIME               CPU usage total: 0.421875000 secs, user: 0.343750000 secs, system: 0.078125000 secs, current category="Application"
#  3XMHEAPALLOC             Heap bytes allocated since last GC cycle=0 (0x0)
#  3XMTHREADINFO3           Java callstack:
#  4XESTACKTRACE                at java/net/SocketInputStream.socketRead0(Native Method)
#  4XESTACKTRACE                at java/net/SocketInputStream.socketRead(SocketInputStream.java:127(Compiled Code))
#  4XESTACKTRACE                at java/net/SocketInputStream.read(SocketInputStream.java:182(Compiled Code))
#  4XESTACKTRACE                at java/net/SocketInputStream.read(SocketInputStream.java:152(Compiled Code))
#  4XESTACKTRACE                at java/io/FilterInputStream.read(FilterInputStream.java:144(Compiled Code))
#  ...
#  4XESTACKTRACE                at java/lang/Thread.run(Thread.java:785(Compiled Code))
#
# Example output:
#
#  Default Executor-thread-149164;java/lang/Thread.run;java/net/SocketInputStream/read;java/net/SocketInputStream.socketRead0 1
#
#
# Copyright 2014 Federico Juinio.  All rights reserved.
#
#  This program is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License
#  as published by the Free Software Foundation; either version 2
#  of the License, or (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software Foundation,
#  Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
#  (http://www.gnu.org/copyleft/gpl.html)
#
# 23-Aug-2023   Federico Juinio created this based from stackcollapse-jstack.pl

use strict;

use Getopt::Long;

# tunables
my $include_tname = 1;		# include thread names in stacks
my $include_tid = 0;		# include thread IDs in stacks
my $shorten_pkgs = 0;		# shorten package names
my $help = 0;

sub usage {
	die <<USAGE_END;
USAGE: $0 [options] infile > outfile\n
	--include-tname
	--no-include-tname # include/omit thread names in stacks (default: include)
	--include-tid
	--no-include-tid   # include/omit thread IDs in stacks (default: omit)
	--shorten-pkgs
	--no-shorten-pkgs  # (don't) shorten package names (default: don't shorten)

	eg,
	$0 --no-include-tname stacks.txt > collapsed.txt
USAGE_END
}

GetOptions(
	'include-tname!'  => \$include_tname,
	'include-tid!'    => \$include_tid,
	'shorten-pkgs!'   => \$shorten_pkgs,
	'help'            => \$help,
) or usage();
$help && usage();


# internals
my %collapsed;

sub remember_stack {
	my ($stack, $count) = @_;
	$collapsed{$stack} += $count;
}

my @stack;
my $tname;
my $state = "?";

foreach (<>) {
	next if m/^#/;
	chomp;

	if (m/^3XMTHREADINFO3           Native callstack:/) {
		# save stack
		if (defined $tname) { unshift @stack, $tname; }
		remember_stack(join(";", @stack), 1) if @stack;
		undef @stack;
		undef $tname;
		$state = "?";
		next;
	}

	# look for thread header line and parse thread name and state
	if (/^3XMTHREADINFO      "([^"]*).* state:(.*), /) {
		my $name = $1;
		if ($include_tname) {
			$tname = $name;
		}
		$state = $2;
	# special handling for "Anonymous native threads"
	} elsif (/3XMTHREADINFO      Anonymous native thread/) {
		$tname = "Anonymous native thread";
	# look for thread id
	} elsif (/^3XMTHREADINFO1            \(native thread ID:([^ ]*), native priority/) {
		if ($include_tname && $include_tid) {
			$tname = $tname . "-" . $1
		}
	# collect stack frames
	} elsif (/^4XESTACKTRACE                at ([^\(]*)/) {
		my $func = $1;
		if ($shorten_pkgs) {
			my ($pkgs, $clsFunc) = ( $func =~ m/(.*\.)([^.]+\.[^.]+)$/ );
			$pkgs =~ s/(\w)\w*/$1/g;
			$func = $pkgs . $clsFunc;
		}
		unshift @stack, $func;

	}
}

foreach my $k (sort { $a cmp $b } keys %collapsed) {
	print "$k $collapsed{$k}\n";
}
