#!/usr/bin/perl # Very simple format converter: Convert blast hits to a BED file that can be loaded as a custom track # in the UCSC browser. # Jason de Koning, 2010. http://jasondk.org/Teaching.html # Filename for your blast output (in tabular format; specify with '-m 8') $file = shift; # Need your template information for here: # (If you obtained your data following the example in class, # this information is on the first line of your template sequence file). $templateChromosome = "chr1"; $startPositionOnChr = 131970000; $endPositionOnChr = 131995000; # Open input and output files open(IN, $file); # The output file has '.bed' appended to the end open(OUT, ">" . $file . ".bed"); # This information will tell the genome browser to center its display on your template region print OUT "browser position " . $templateChromosome . ":" . $startPositionOnChr . "-" . $endPositionOnChr . "\n"; print OUT "browser hide all\n"; print OUT "track name=hits description=\"Hits from de novo assembly experiment\" visibility=2\n"; # Now lets create a track for each blast hit while ($line = ) { chomp($line); @tokens = split('\t', $line); # BLAST will give both forward- and reverse-strand hits - we will annotate these with different colors # (the last field in the line specifies color. See: http://genome.ucsc.edu/FAQ/FAQformat.html#format1 ) # if the end coordinate of a hit is less than the start coord, it is a reverse strand hit if ($tokens[8] < $tokens[9]) { # Forward strand hit print OUT $templateChromosome . "\t" . ( $tokens[8] + $startPositionOnChr ) . "\t" . ( $tokens[9] + $startPositionOnChr ) . "\t$tokens[0]\t1000\n"; } else { # Reverse strand hit - we need to give the coords to UCSC in the opposite orientation print OUT $templateChromosome . "\t" . ( $tokens[9] + $startPositionOnChr ) . "\t" . ( $tokens[8] + $startPositionOnChr ) . "\t$tokens[0]\t500\n"; } } close OUT; close IN;