I have two FASTA files:
file1.fasta
>foo
ATCGGGG
>bar
CCCCCC
file2.fasta
>qux
ATCGGAAA
What I want to do now is to concatenating them into one file that results:
>foo
ATCGGGG
>bar
CCCCCC
>qux
ATCGGAAA
Thus preserving the name of each sequence that started with ">". Currently my code below replace that name with index, namely:
>0
ATCGGGG
>1
CCCCCC
>0
ATCGGAAA
What's the right way to modify my code below?
#include <iostream>
#include <vector>
#include <fstream>
#include <sstream>
#include<stdio.h>
#include<string>
using namespace std;
#define MAX_LINE_SIZE 1024
int mk_joint_file(char *ctrlFile, char *tgtFile, char *outFile){
char s[MAX_LINE_SIZE];
FILE *ofp = fopen(outFile,"w");
FILE *cfp = fopen(ctrlFile,"r");
FILE *tfp = fopen(tgtFile,"r");
// char *p;
int flg=false;
int line=0;
while(fgets(s,MAX_LINE_SIZE,cfp) != NULL){
if(s[0]=='>'){
flg=true;
fprintf(ofp,">%d\n",line);
line++;
}else{
if(flg==true){
fprintf(ofp,"%s",s);
}
flg=false;
}
}
flg=false;
line=0;
while(fgets(s,MAX_LINE_SIZE,tfp) != NULL){
if(s[0]=='>'){
flg=true;
fprintf(ofp,">%d\n",line);
line++;
}else{
if(flg==true)
fprintf(ofp,"%s",s);
flg=false;
}
}
fclose(cfp);
fclose(tfp);
fclose(ofp);
return(0);
}
int main(int argc, char **argv)
{
string ifname_control = argv[1];
string ifname_target = argv[2];
string ofname = "newjoin.txt";
mk_joint_file((char *)ifname_control.c_str(), (char *)ifname_target.c_str(), (char *)ofname.c_str());
}
Is it any harder than just changing these lines
fprintf(ofp,">%d\n",line);
to
// TODO check fgets() handling of EOL - may not need the \n
fprintf(ofp, %s\n", s);
just change line 29 and 40 to
fprintf(ofp,"%s",s);
copy file1.fasta+file2.fasta newjoin.txt
- Blastfurnace 2012-04-04 03:29