#!/usr/bin/perl -w

# The purpose of this script is to reformat the results of orthoMCL.

use strict;
use warnings;

if ($#ARGV ne "0") { 
  print "perl $0 the_orthomcl_group_file\n";
  exit;
}

my $infile_grp    = shift;
my ($genes, $taxa, $group, $string, $output);
my (@tmp, @array, @prefix);
open(IN, $infile_grp)||die ":$!";
open(OUT, ">$infile_grp.v1.4_format")||die ":$!";
while (<IN>) {
  chomp;
  if (/^(\S+)\:\s+/) {
    @array  = ();
    @prefix = ();
    $output = "";
    $group  = $1;
    $string = $';
    @tmp    = split(/\s+/, $string);
    $genes  = $#tmp+1;
    for (my $i=0; $i<=$#tmp; $i++) {
      @array = split(/\|/, $tmp[$i]);
      push(@prefix, $array[0]);
      $output .= " ".join("|", @array[1..$#array])."(".$array[0].")";
    }
    $taxa = uniq(@prefix);
    print OUT $group."(".$genes." genes,".$taxa." taxa):".$output."\n";
  }
}
close(OUT)||die ":$!";
close(IN)||die ":$!";

sub uniq {
  my @array = @_;
  my %KEYS  = ();
  my ($id, $num);
  foreach $id (@array) {
    $KEYS{$id} = 1;
  }
  @array = keys(%KEYS);
  $num   = $#array+1;
  return $num;
}