#!/bin/bash
#Bash Script to generate pool. 
#It will take two inputs: 1st is the number of document you want to include in your pool from every retrieval result (for example 75)
#and the 2nd is the directory that has the results of terrier retrieval with .res files()for example ./var/results
# NOTE: Results from ALL .res files in the specified directory will be added
#       to the pool, so ensure that there are no unnecessary .res files in that
#       directory.
i=1
opt=.txt
echo -e Give the number of top documents you want to include from each run :-
read top_document
#top_document=80
echo -e "Enter the path of the directory that stores the output of terrier with various models (all .res files in this directory will be used) :-"
read filepath
FILE_NAME=$(ls -1 $filepath/*.res);
#FILE_NAME=/x/dipa/fire/runs_submitted/ben/fusion_by_avg_of_ranks.fusion
for filename in $FILE_NAME
do
	#echo top_docment=$top_docment
	#awk '{ if( $4<50) print "4=" $4 "   top_docment="$top_docment;}' $filename
	awk -v ntop=$top_document 'BEGIN{n = ntop}{ if($4<n) print $1"."$3"  "$1"  "$2"  "$3"  "$4"  " $5"  "$6;}' $filename  > $i$opt
	i=`expr $i + 1`;
done

############################################################################
i=`expr $i - 1`;
if [ $i -gt 1 ]
then
	for((j=1;j<=$i;j++))
	do 
		if [ $j -eq 1 ]
		then
			mv 1.txt xxxxxxxx.txt
		else
			cat xxxxxxxx.txt $j$opt > xxxxxxxxxx.txt
			rm -f xxxxxxxx.txt
			mv xxxxxxxxxx.txt xxxxxxxx.txt
			rm -f xxxxxxxxxx.txt $j$opt
		fi
	done
	sort -n xxxxxxxx.txt >  $filepath/Pool_before_uniq_all_file.txt
	rm -f xxxxxxxx.txt
	awk '{print $2"  "$1"  "$4 ;}' $filepath/Pool_before_uniq_all_file.txt > xxxxxxxx.txt
	uniq xxxxxxxx.txt $filepath/Pool_after_uniq_all_file.txt
	rm -f xxxxxxxx.txt
fi

#############################################################################

l=1
k=`expr $i - 1`;
for unique_file in $FILE_NAME
do
l=1
	for filename in $FILE_NAME
	do
		if [ "$filename" != "$unique_file" ]
		then
		opt=.txt
		awk -v ntop=$top_document 'BEGIN{n = ntop}{ if($4<n) print $1"."$3"  "$1"  "$2"  "$3"  "$4"  " $5"  "$6;}' $filename  > $l"_uniq"$opt
		l=`expr $l + 1`;
		fi
	done

######

	if [ $k -gt 1 ]
	then
		
		for((j=1;j<$k;j++))
		do 
			if [ $j -eq 1 ]
			then
				mv 1_uniq.txt xxxxxxxx.txt
			else
				cat xxxxxxxx.txt $j"_uniq"$opt > xxxxxxxxxx.txt
				rm -f xxxxxxxx.txt
				mv xxxxxxxxxx.txt xxxxxxxx.txt
				rm -f xxxxxxxxxx.txt $j"_uniq"$opt
			fi
		done
	
		echo $unique_file > unique_file.txt
		x=$(sed -e 's_[/|a-zA-Z0-9]*/__g' unique_file.txt);# | sed -e 's/.res//g');
		#echo unique_file=$unique_file
		sort -n xxxxxxxx.txt >  $filepath/Pool_before_uniq_excluding_$x.txt
		rm -f xxxxxxxx.txt
		awk '{print $2"  "$1"  "$4 ;}' $filepath/Pool_before_uniq_excluding_$x.txt > xxxxxxxx.txt
		uniq xxxxxxxx.txt $filepath/Pool_after_uniq_$x.txt
		rm -f xxxxxxxx.txt

		awk '{print $2;}' $filepath/Pool_after_uniq_all_file.txt > $filepath/all_$x.txt
		awk '{print $2;}' $filepath/Pool_after_uniq_$x.txt > $filepath/excluding_$x.txt	

		comm  -23 $filepath/all_$x.txt $filepath/excluding_$x.txt > $filepath/unique_document_for_$x.txt 
		rm -f 1.txt 2.txt xxxxxxxx.txt $filepath/Pool_before_uniq_excluding_$x.txt 
		rm -rf $filepath/all_$x.txt $filepath/excluding_$x.txt $filepath/Pool_after_uniq_$x.txt unique_file.txt

	else ########################

	
		echo $unique_file > unique_file.txt
		x=$(sed -e 's_[/|a-zA-Z0-9]*/__g' unique_file.txt);#  | sed -e 's/.res//g');
		awk '{print $2;}' $filepath/Pool_after_uniq_all_file.txt > $filepath/all_$x.txt
		awk -v ntop=$top_document 'BEGIN{n = ntop}{ if($4<n) print $1"."$3}'  $unique_file | sort -n > $filepath/excluding_$x.txt
	
		comm -23 $filepath/all_$x.txt $filepath/excluding_$x.txt > $filepath/unique_document_for_$x.txt 

		rm -rf $filepath/all_$x.txt $filepath/excluding_$x.txt
	fi

	rm -f 2.txt unique_file.txt xxxxxxxx.txt
	rm -rf $filepath/all*.txt $filepath/excluding*.txt

done

if [ ! -d $filepath/output ]
then
	mkdir $filepath/output
fi
mv $filepath/*.txt $filepath/output/
rm -rf *_uniq.txt
echo -e output is in $filepath/output/