jhli515
05-18-2005, 08:41 AM
Dear All,
I am a Perl beginner .
I could not solve this small problems I am trying to work with. I will appreciate your help.
The input file for the script is 1.txt below
The script I have worked so far is below.
The objects are
1. Any GQ is not 1, change all the Allele1 and Allele 2 columns into nil. ( I have managed this object in the script)
2. Check the same sample & marker name in UD1 whether have same Allele1 and Allele 2 or not.( I think I should use hash but cannot do it)
3. Give an ouput file format as UD1 without any duplicated sample & marker names. Format markers's name in a column ie. see 2.exl below
Thank you
Jin
==========Script =====================
$GenemapperFile ="c:/1.txt";
open (FILE, "<$GenemapperFile") or die "Unable to open the file $GenemapperFile;$!";
$tableheader=readline(FILE);
while (<FILE>)
{
$size = length $line;
#print "The size of each line is $size\n"; # output size of line
@x=split(/\t/);
push @SampleFile, $x[0];
push @SampleName, $x[1];
push @SampleID, $x[2];
push @RunName, $x[3];
push @Panel, $x[4];
push @Marker, $x[5];
push @Dye, $x[6];
push @SNP, $x[7];
push @Allele1, $x[8];
push @Allele2, $x[9];
push @Size1, $x[10];
push @Size2, $x[11];
push @Height1, $x[12];
push @Height2, $x[13];
push @PeakArea1, $x[14];
push @PeakArea2, $x[15];
push @DataPoint1, $x[16];
push @DataPoint2, $x[17];
push @Mutation1, $x[18];
push @Mutation2, $x[19];
push @AEComment1, $x[20];
push @AEComment2, $x[21];
push @ADO, $x[22];
push @AE, $x[23];
push @OMIT, $x[24];
push @OS, $x[25];
push @SHP, $x[26];
push @OBA, $x[27];
push @SPA, $x[28];
push @SP, $x[29];
push @BIN, $x[30];
push @PHR, $x[31];
push @LPH, $x[32];
push @SPU, $x[33];
push @AN, $x[34];
push @BD, $x[35];
push @DP, $x[36];
push @NB, $x[37];
push @CC, $x[38];
push @OVL, $x[39];
push @XTLK, $x[40];
push @GQ, $x[41];
push @UD1, $x[42];
push @UD2, $x[43];
push @UD3, $x[44];
push @CV, $x[45];
}
close FILE;
#@Matrix=(\@SampleFile,\@SampleName,\@SampleID,\@RunName,\@Panel,\@Marker,\@Dye,\@SNP,\@Allele1,\@Al lele2,\@Size1,\@Size2,\@Height1,\@Height2,\@PeakArea1,\@PeakArea2,\@DataPoint1,\@DataPoint2,\@Mutati on1,\@Mutation2,\@AEComment1,\@AEComment2,\@ADO,\@AE,\@OMIT,\@OS,\@SHP,\@OBA,\@SPA,\@SP,\@BIN,\@PHR, \@LPH,\@SPU,\@AN,\@BD,\@DP,\@NB,\@CC,\@OVL,\@XTLK,\@GQ,\@UD1,\@UD2,\@UD3,\@CV);
#print "test4 @Matrix\n";
#$MatrixRef=\@Matrix;
#print $MatrixRef;
for ($i=0;$i<@GQ;++$i)
{
if ($GQ[$i]!=1)
{
$Allele1[$i]="";
$Allele2[$i]="";
}
#Test the GQ not =1 the genotype should be nil
# print "$UD1[$i]\t"."$Allele1[$i]\t"."$Allele2[$i]\n";
# create a array to have sample name and marker as well
$UD1marker=$UD1[$i]."-".$Marker[$i];
push @UD1Marker, $UD1marker ;
# create a hash: key: sample name and marker ; value: allele1 and allele2
$UD1MarkerAllele1{$UD1Marker[$i]}=$Allele1[$i];
$UD1MarkerAllele2{$UD1Marker[$i]}=$Allele2[$i];
}
I am a Perl beginner .
I could not solve this small problems I am trying to work with. I will appreciate your help.
The input file for the script is 1.txt below
The script I have worked so far is below.
The objects are
1. Any GQ is not 1, change all the Allele1 and Allele 2 columns into nil. ( I have managed this object in the script)
2. Check the same sample & marker name in UD1 whether have same Allele1 and Allele 2 or not.( I think I should use hash but cannot do it)
3. Give an ouput file format as UD1 without any duplicated sample & marker names. Format markers's name in a column ie. see 2.exl below
Thank you
Jin
==========Script =====================
$GenemapperFile ="c:/1.txt";
open (FILE, "<$GenemapperFile") or die "Unable to open the file $GenemapperFile;$!";
$tableheader=readline(FILE);
while (<FILE>)
{
$size = length $line;
#print "The size of each line is $size\n"; # output size of line
@x=split(/\t/);
push @SampleFile, $x[0];
push @SampleName, $x[1];
push @SampleID, $x[2];
push @RunName, $x[3];
push @Panel, $x[4];
push @Marker, $x[5];
push @Dye, $x[6];
push @SNP, $x[7];
push @Allele1, $x[8];
push @Allele2, $x[9];
push @Size1, $x[10];
push @Size2, $x[11];
push @Height1, $x[12];
push @Height2, $x[13];
push @PeakArea1, $x[14];
push @PeakArea2, $x[15];
push @DataPoint1, $x[16];
push @DataPoint2, $x[17];
push @Mutation1, $x[18];
push @Mutation2, $x[19];
push @AEComment1, $x[20];
push @AEComment2, $x[21];
push @ADO, $x[22];
push @AE, $x[23];
push @OMIT, $x[24];
push @OS, $x[25];
push @SHP, $x[26];
push @OBA, $x[27];
push @SPA, $x[28];
push @SP, $x[29];
push @BIN, $x[30];
push @PHR, $x[31];
push @LPH, $x[32];
push @SPU, $x[33];
push @AN, $x[34];
push @BD, $x[35];
push @DP, $x[36];
push @NB, $x[37];
push @CC, $x[38];
push @OVL, $x[39];
push @XTLK, $x[40];
push @GQ, $x[41];
push @UD1, $x[42];
push @UD2, $x[43];
push @UD3, $x[44];
push @CV, $x[45];
}
close FILE;
#@Matrix=(\@SampleFile,\@SampleName,\@SampleID,\@RunName,\@Panel,\@Marker,\@Dye,\@SNP,\@Allele1,\@Al lele2,\@Size1,\@Size2,\@Height1,\@Height2,\@PeakArea1,\@PeakArea2,\@DataPoint1,\@DataPoint2,\@Mutati on1,\@Mutation2,\@AEComment1,\@AEComment2,\@ADO,\@AE,\@OMIT,\@OS,\@SHP,\@OBA,\@SPA,\@SP,\@BIN,\@PHR, \@LPH,\@SPU,\@AN,\@BD,\@DP,\@NB,\@CC,\@OVL,\@XTLK,\@GQ,\@UD1,\@UD2,\@UD3,\@CV);
#print "test4 @Matrix\n";
#$MatrixRef=\@Matrix;
#print $MatrixRef;
for ($i=0;$i<@GQ;++$i)
{
if ($GQ[$i]!=1)
{
$Allele1[$i]="";
$Allele2[$i]="";
}
#Test the GQ not =1 the genotype should be nil
# print "$UD1[$i]\t"."$Allele1[$i]\t"."$Allele2[$i]\n";
# create a array to have sample name and marker as well
$UD1marker=$UD1[$i]."-".$Marker[$i];
push @UD1Marker, $UD1marker ;
# create a hash: key: sample name and marker ; value: allele1 and allele2
$UD1MarkerAllele1{$UD1Marker[$i]}=$Allele1[$i];
$UD1MarkerAllele2{$UD1Marker[$i]}=$Allele2[$i];
}