#!/bin/csh -f

################################################################################
# May, 02 2011                                              TRELLET Mikael     #
#                              haddock_calculation.py                          #
#                                                                              #
#     Perform i-RMSD, l-RMSD and FNAT calculations based on HADDOCK output     #
#                       and generates graphics associated                      #
################################################################################

#######################################
##  Initialise some shell variables  ##
##       and check arguments         ##
#######################################


if ( $#argv > 0 ) then
        cp $1 .
        set refe=$1:t
endif


set ANADIR = "CAPRI_analysis" # Directory where all results will be stored
set WDIR=$PWD/.. # Working directory

#################################################################
# Move to the working directory and create analysis directory   #
#################################################################

cd $WDIR
mkdir $ANADIR
cd $ANADIR
set WDIR=$PWD

set PDB_LIST = file.nam # List of PDB files
set REFE=`head -1 $WDIR/../$PDB_LIST` # Reference file to calculate RMSD and FNAT, by default lowest energy structure
set LOG_FILE=Results_analysis.out # Output file in order to check script processing
set SCRIPTS_DIR=$WDIR/../analysis/ # Scripts directory, every scripts used in this script will be stored in this directory
set CONTACTS_LIST=${PDB_LIST}_contacts # List of contacts file generated for each PDB file
set REFE_CONTACTS=${REFE:r}.contacts # Contact file of the reference PDB file

echo 'Reference: '$REFE > ${WDIR}/$LOG_FILE
echo 'Working directory: '$WDIR >> ${WDIR}/$LOG_FILE
echo 'List of PDB files: '$PDB_LIST >> ${WDIR}/$LOG_FILE
echo 'Analysis directory: '$ANADIR >> ${WDIR}/$LOG_FILE
echo 'Scripts directory: '$SCRIPTS_DIR >> ${WDIR}/$LOG_FILE
echo 'Reference contact file (created later on): '$REFE_CONTACTS >> ${WDIR}/$LOG_FILE
echo 'Contacts list: '$CONTACTS_LIST >> ${WDIR}/$LOG_FILE

### Write header in haddock.out
echo 'Starts analysis script...' >> ${WDIR}/$LOG_FILE


#################################################
####  Fraction native contacts calculations  ####
#################################################

# Check PDB reference format, comparing to a complex generated by HADDOCK (default: complex_1w.pdb)
if ( $?refe ) then
        $HADDOCKTOOLS/pdb_segid-to-chain `ls ../*.pdb | head -1` > model
        python $SCRIPTS_DIR/format_PDB.py $SCRIPTS_DIR/$refe model
        rm -f model
        set refe="reference.pdb"
        mv reference.pdb $SCRIPTS_DIR/
endif

# Calculates reference contacts
if ( $?refe ) then
        $HADDOCKTOOLS/pdb_chain-to-segid $SCRIPTS_DIR/$refe > tmp
        cat tmp > $SCRIPTS_DIR/$refe
        $SCRIPTS_DIR/contact_inter tmp 5 | sort | uniq > ${refe:r}".contacts"
        cp $SCRIPTS_DIR/$refe ../
endif
rm -f tmp

# Check PDB integrity (presence of ANI residues for instance)

foreach i ( ../*.pdb )
    egrep -v "ANI|DAN" $i > tmp
    mv tmp $i
end

# Creation of contacts file for fcc calculation
echo 'Contacts files creation...' >> $WDIR/$LOG_FILE

sed -i '/^ *$/d' ../$PDB_LIST
foreach i (`cat ../$PDB_LIST`)
        $SCRIPTS_DIR/contact_inter ../$i 5 | sort | uniq > ${i:r}".contacts"
        echo ${i:r}".contacts" >> $CONTACTS_LIST
end

echo 'DONE' >> ${WDIR}/$LOG_FILE
echo 'FNAT calculation...' >> ${WDIR}/$LOG_FILE

### Adding the reference structure in the PDB contacts list ###
if ( $?refe ) then
        set REFE_CONTACTS = ${refe:r}.contacts
endif

python $SCRIPTS_DIR/calc_fcc.py $REFE_CONTACTS `cat $CONTACTS_LIST` > file.nam_fnat
sort -nrk3 file.nam_fnat > file.nam_fnat-sorted


#### Checking of fnat results ####
if (! -s file.nam_fnat-sorted ) then
        echo "The fnat file hasn't been generated, control your reference file" >> ${WDIR}/$LOG_FILE
        echo "Analysis aborted" >> $WDIR/$LOG_FILE
        exit 1
else
        echo "fnat file ok" >> ${WDIR}/$LOG_FILE
endif
echo 'DONE' >> ${WDIR}/$LOG_FILE

### Cleaning the directory ###
mkdir contacts
mv -f *.contacts contacts/
mv -f ../*.contacts contacts/


##############################
####  i-RMSD calculation  ####
##############################

echo 'i-RMSD calculation...' >> ${WDIR}/$LOG_FILE
cd ..

# RMSD calculation script needs SEGID and CHAINID to work
foreach i ( *.pdb )
        $HADDOCKTOOLS/pdb_segid-to-chain $i > tmp
        cat tmp > $i
end
rm -f tmp

if ( $?refe ) then
        echo $refe > tmp
        cat $PDB_LIST >> tmp
        set PDB_LIST='file.nam_ref'
        cat tmp > $PDB_LIST
endif
rm -f tmp

$SCRIPTS_DIR/RMSD_Calc.sh $PDB_LIST interface > i_RMSD_log.out # interface RMSD calculation script

### Removing reference file line to take only models into account ###
if ( $?refe ) then
        sed '2d' i-RMSD.dat > tmp
        sed '2d' i-RMSD-sorted.dat > tmp2
        cat tmp > i-RMSD.dat
        cat tmp2 > i-RMSD-sorted.dat
        rm -f tmp tmp2
endif

mv i*RMSD* $ANADIR/

if ( `tail -1 $ANADIR/i-RMSD.dat | awk '{print $2}'` == "" ) then
        echo "There is a problem with your irmsd file, it hasn't been generated or it is empty" >> ${WDIR}/$LOG_FILE
        echo "Analysis aborted" >> $WDIR/$LOG_FILE
        exit 1
else
        echo "irmsd file ok" >> ${WDIR}/$LOG_FILE
endif
echo 'DONE' >> ${WDIR}/$LOG_FILE


##############################
####  l-RMSD calculation  ####
##############################

echo 'l-RMSD calculation...' >> ${WDIR}/$LOG_FILE

$SCRIPTS_DIR/RMSD_Calc.sh $PDB_LIST ligand > l_RMSD_log.out # interface RMSD calculation script

### Removing reference file line to take only models into account ###
if ( $?refe ) then
        sed '2d' l-RMSD.dat > tmp
        sed '2d' l-RMSD-sorted.dat > tmp2
        cat tmp > l-RMSD.dat
        cat tmp2 > l-RMSD-sorted.dat
        rm -f tmp tmp2
        mv file.nam_ref $ANADIR/
endif

mv l*RMSD* $ANADIR/
/rm rmsd.temp


if ( `tail -1 $ANADIR/l-RMSD.dat | awk '{print $2}'` == "" ) then
        echo "There is a problem with your lrmsd file, it hasn't been generated or it is empty\n Analysis aborted " >> ${WDIR}/$LOG_FILE
        exit 1
else
        echo "lrmsd file ok" >> ${WDIR}/$LOG_FILE
endif

echo 'DONE' >> ${WDIR}/$LOG_FILE

cd $ANADIR
# Clusters analysis
if ( -s ../clusters.stat ) then
        set nb_clust=`more ../clusters.stat | grep file.nam | wc -l`
else
        set nb_clust="0"
endif

################################
####  Creation of graphics  ####
################################

echo 'Graphics drawing...' >> ${WDIR}/$LOG_FILE

cp $SCRIPTS_DIR/Graph_rmsd.R .

python $SCRIPTS_DIR/get_RMSD.py

if (! -s complex_HS_irmsd_lrmsd_fnat.list ) then
        echo "The results file providing plot information hasn't been generated" >> ${WDIR}/$LOG_FILE
        echo "Analysis aborted" >> $WDIR/$LOG_FILE
        exit 1
else
        echo "Values compiled file ok" >> ${WDIR}/$LOG_FILE
endif

if ( -s ../clusters.stat ) then
        python $SCRIPTS_DIR/get_RMSD_centers.py $nb_clust
endif

egrep -v "Nan" complex_HS_irmsd_lrmsd_fnat.list > complex_clustered.list

python $SCRIPTS_DIR/get_RMSD_graph.py $nb_clust

# Conversion of eps files generated by R to png files
set convert_path="/usr/bin/convert"
foreach i ( *.eps )
    $convert_path -rotate 90 $i ${i:r}.png
end

echo 'DONE' >> ${WDIR}/$LOG_FILE

#CAPRI stars calculation
echo 'CAPRI stars generation...' >> ${WDIR}/$LOG_FILE
if ( $?refe ) then
        python $SCRIPTS_DIR/star_ana.py complex_HS_irmsd_lrmsd_fnat.list
endif

if (! -s no_of_stars ) then
        echo "It was impossible to read or compute complex_HS_irmsd_lrmsd_fnat.list file, no stars calculation..." >> ${WDIR}/$LOG_FILE
        echo "Analysis aborted" >> $WDIR/$LOG_FILE
	exit 1
else
        echo "Stars generated" >> ${WDIR}/$LOG_FILE
endif


# Last cleaning session
rm -f ../$1
#rm -f ../reference.pdb
rm -f ../*.contacts
rm -f ../*profit.out

exit 0
