|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +#### supported methods are: scriber ispred4 sppider cons_ppisp meta_ppisp predictprotein psiver csm_potential scannet whiscy predus2 #### |
| 4 | + |
| 5 | +if [ $1 -gt "0" ]; then |
| 6 | + echo "Number of retries is "$1 |
| 7 | + sed -i "s/NUM_RETRIES = 12/NUM_RETRIES = $1/g" /opt/cport/src/cport/modules/*.py |
| 8 | + sed -i "s/NUM_RETRIES = 24/NUM_RETRIES = $1/g" /opt/cport/src/cport/modules/*.py |
| 9 | + sed -i "s/NUM_RETRIES = 36/NUM_RETRIES = $1/g" /opt/cport/src/cport/modules/*.py |
| 10 | + sed -i "s/NUM_RETRIES = 300/NUM_RETRIES = $1/g" /opt/cport/src/cport/modules/*.py |
| 11 | +fi |
| 12 | + |
| 13 | +. /data/config |
| 14 | + |
| 15 | +cd /opt |
| 16 | +ls /data/$input_dir > /opt/list |
| 17 | +while read name; do |
| 18 | + . /data/config |
| 19 | + grep "^ATOM" /data/$input_dir/$name > /opt/tmp0 |
| 20 | + protein_size=$(tail -1 /opt/tmp0 | cut -c 23-26 | sed 's/ //g') |
| 21 | + for single_method in $method |
| 22 | + do |
| 23 | + touch /opt/$name.result |
| 24 | + cport /opt/tmp0 $chain --pred $single_method && cp /opt/cport_*.csv /opt/tmp1 |
| 25 | + |
| 26 | + mkdir -p /data/Results/$name/$single_method && cp /opt/cport_*.csv /data/Results/$name/$single_method && mv /data/Results/$name/$single_method/cport_tmp0.csv /data/Results/$name/$single_method/cport.$name.csv |
| 27 | + cat /opt/$name.result /opt/tmp1 > /opt/tmp2 && rm /opt/$name.result && mv /opt/tmp2 /opt/$name.result && rm /opt/tmp1 |
| 28 | + rm /opt/cport_*.csv |
| 29 | + done |
| 30 | + position=1 |
| 31 | + while [ $position -le $protein_size ] |
| 32 | + do |
| 33 | + sed -i "s/$/\,\-/g" /opt/$name.result |
| 34 | + position=$(( $position +1 )) |
| 35 | + done |
| 36 | + ct=$(( $protein_size + 1 )) |
| 37 | + cut -f1-$ct -d',' /opt/$name.result > /opt/$name.result_a && rm /opt/$name.result && mv /opt/$name.result_a /opt/$name.result |
| 38 | + rm /opt/tmp0 |
| 39 | + position=1 |
| 40 | + while [ $position -le $protein_size ]; |
| 41 | + do |
| 42 | + echo $position >> /opt/position |
| 43 | + position=$(( $position +1 )) |
| 44 | + done |
| 45 | + header=$(cat /opt/position | tr '\n' ',') |
| 46 | + echo "Position,"$header >> /opt/$name.final_result_position |
| 47 | + while read line1; do |
| 48 | + read line2 |
| 49 | + |
| 50 | + b=1 |
| 51 | + a=$(echo $line1 | cut -f2 -d',') |
| 52 | + while [ $a -gt $b ]; do |
| 53 | + c=$(echo $c"-,") |
| 54 | + d=$(echo $line2 | cut -f2- -d',') |
| 55 | + e=$(echo $line2 | cut -f1 -d',') |
| 56 | + b=$(( b + 1 )) |
| 57 | + done |
| 58 | + line2mod=$(echo $e","$c$d) |
| 59 | + if [ $a -eq "1" ]; then |
| 60 | + line2=$line2 |
| 61 | + else |
| 62 | + line2=$line2mod |
| 63 | + fi |
| 64 | + a=1; b=1; c=""; d=""; e="" |
| 65 | + |
| 66 | + echo $line2 >> /opt/$name.final_result |
| 67 | + done < /opt/$name.result |
| 68 | +cut -f1-$ct -d',' /opt/$name.final_result > /opt/$name.final_result_a && cat /opt/$name.final_result_position /opt/$name.final_result_a > /data/Results/$name.final_result.csv && rm /opt/$name.final_result_position /opt/$name.final_result_a /opt/$name.final_result |
| 69 | + touch /opt/$name.result.csv |
| 70 | + rm /opt/$name.result.csv |
| 71 | + cp /data/Results/$name.final_result.csv /opt/$name.final_result.csv |
| 72 | + f=$(cat /opt/$name.final_result.csv | wc -l) |
| 73 | + #transpose file |
| 74 | + sed 's/\,/ /g' /opt/$name.final_result.csv | awk ' |
| 75 | + { |
| 76 | + for (i=1; i<=NF; i++) { |
| 77 | + a[NR,i] = $i |
| 78 | + } |
| 79 | + } |
| 80 | + NF>p { p = NF } |
| 81 | + END { |
| 82 | + for(j=1; j<=p; j++) { |
| 83 | + str=a[1,j] |
| 84 | + for(i=2; i<=NR; i++){ |
| 85 | + str=str" "a[i,j]; |
| 86 | + } |
| 87 | + print str |
| 88 | + } |
| 89 | + }' > /opt/tmp3 |
| 90 | + |
| 91 | + g=$(head -1 /opt/tmp3) |
| 92 | + echo "Detected "$f" columns with headers '"$g"'" |
| 93 | + echo $g" score" > /opt/tmp4 |
| 94 | + sed -i 's/predictor/residue/g' /opt/tmp4 |
| 95 | + tail -n+2 /opt/tmp3 | sed 's/A/1/g; s/-/0/g; s/P/2/g' > /opt/tmp5 |
| 96 | + rm /opt/tmp3 |
| 97 | + sed -i 's/0\.50/0\.51/g' /opt/tmp5 # to force rounding 0,50 to 1 rather than to 0 |
| 98 | + cat /opt/tmp5 | awk '{for (i=1; i<=NF; i++) printf "%.0f%s", $i, (i==NF?RS:FS)}' >> /opt/tmp6 && rm /opt/tmp5 |
| 99 | + |
| 100 | + while read line; do |
| 101 | + |
| 102 | + line1=$(echo $line | cut -f2- -d' ') |
| 103 | + |
| 104 | + active=$(echo $line1 | sed 's/ /\n/g' | grep -c '1') |
| 105 | + passive=$(echo $line1 | sed 's/ /\n/g' | grep -c '2') |
| 106 | + undetermined=$(echo $line1 | sed 's/ /\n/g' | grep -c '0') |
| 107 | + |
| 108 | + if [ $active -gt $passive ] && [ $active -ge $undetermined ]; then |
| 109 | + echo "1" >> /opt/tmp7 |
| 110 | + elif [ $passive -gt $active ] && [ $passive -ge $undetermined ]; then |
| 111 | + echo "2" >> /opt/tmp7 |
| 112 | + else |
| 113 | + echo "0" >> /opt/tmp7 |
| 114 | + fi |
| 115 | + done < /opt/tmp6 |
| 116 | + |
| 117 | + paste -d " " /opt/tmp6 /opt/tmp7 > /opt/tmp8 |
| 118 | + rm /opt/tmp6 /opt/tmp7 |
| 119 | + cat /opt/tmp4 /opt/tmp8 > /opt/tmp9 && rm /opt/tmp4 /opt/tmp8 |
| 120 | + sed -i 's/ /\,/g' /opt/tmp9 |
| 121 | + mkdir -p /data/Active_and_passive/ |
| 122 | + mv /opt/tmp9 /data/Active_and_passive/$name.results.csv |
| 123 | + grep '\,1$' /data/Active_and_passive/$name.results.csv | cut -f1 -d',' | tr "\n" "," | sed 's/\,$//g' > /data/Active_and_passive/$name.active_sites |
| 124 | + grep '\,2$' /data/Active_and_passive/$name.results.csv | cut -f1 -d','| tr "\n" "," | sed 's/\,$//g' > /data/Active_and_passive/$name.passive_sites |
| 125 | + rm /opt/* |
| 126 | +done < /opt/list |
| 127 | +rm /opt/list |
| 128 | + |
| 129 | + |
| 130 | + |
| 131 | + |
| 132 | + |
| 133 | + |
| 134 | + |
| 135 | + |
| 136 | + |
| 137 | + |
| 138 | + |
0 commit comments