#!/bin/bash DATA=. SCRIPTS=../../../scripts # Change this to the directory where Circos is installed CIRCOS=/home/martink/work/circos/svn/ # Parse the segmental duplications into a Circos link format. Ignore all duplications on # random or unanchored chromosomes. echo "Creating Circos segmental duplication links" cat $DATA/segdup.human.txt | grep -v \# | cut -d $'\t' -f 2,3,4,8,9,10 | grep -v -i 'random\|chrun\|_alt\|none' | sed 's/chr/hs/g' > track.segdup.all.txt # Histogram of sizes echo "Creating histogram of segmental duplication size (in kb)" cat track.segdup.all.txt | awk '{print ($3-$2-1)/1000}' | histogram.v2 -min 1 -max 20 -binsize 1 > histogram.segdup.size.txt # Get a list of unique chromosomes from the link track ... # ... search for the chromosome at start of line, add link size, sort by size, add i=NR where NR # is the awk record number and append to file tmp.txt echo "Creating Circos segmental duplication links with size rank for each chromosome" for chr in `cat track.segdup.all.txt | cut -d $'\t' -f 1 | sort -u` ; do grep -w ^$chr track.segdup.all.txt | awk 'BEGIN { OFS="\t" } {print $3-$2,$0}' | sort -nr | awk 'BEGIN { OFS="\t" } {print $0,"sizerank="NR}' >> tmp.txt done # Sort the indexed link by size (first field), remove the field and output to a track file cat tmp.txt | sort -nr | cut -d $'\t' -f 2- > track.segdup.indexed.txt \rm tmp.txt