function tex2txt(infile, outfile) % Clear the most of the TeX markup from the tex file % % tex2txt(infile, outfile) % % It reads a .tex file, removes all the formulae $x$ and $$x$$ % repleces '~' for ' ', '---' for '-', gathers paragraphs together % % Example % tex2txt('tex2txt.txt') % % http://strijov.com % just to show an example switch nargin case 1 outfile = 'testout.txt'; case 0 infile = 'tex2txt.txt' outfile = 'testout.txt'; end [sts] = textread(infile,'%s','delimiter','\n'); % read the text file flag=0; disp('1. remove comments and outline formulas'); for i=1:length(sts) s = sts{i}; if length(s) > 0 idx = strfind(s,'%'); if length(idx) >0 s(idx(1):end) = []; end end [s, flag] = strclear(s,'$$',flag); sts{i} = s; end disp('2. remove inline formulas'); for i=1:length(sts) s = sts{i}; [s, flag] = strclear(s,'$',flag); sts{i} = s; end disp('2a. remove equations'); for i=1:length(sts) s = sts{i}; [s, flag] = strclear(s,'{equation}',flag); sts{i} = s; end disp('3. replace ~ for space and --- for - and space,. for,.'); for i=1:length(sts) s = sts{i}; idx = strfind(s,'~'); s(idx) = ' '; idx = strfind(s,'---'); s(idx) = []; idx = strfind(s,'--'); s(idx) = []; idx = strfind(s,' .'); s(idx) = []; idx = strfind(s,' ,'); s(idx) = []; sts{i} = s; end disp('4. gather the paragraphs'); cnt=0; out={}; for i=1:length(sts) s = sts{i}; if isempty(s) cnt = cnt + 1; out{cnt}=''; else out{cnt}=[out{cnt}, ' ', s]; end end disp('5. remove double spaces'); flag = 1; while flag flag = 0; for i=1:length(out) s = out{i}; idx = strfind(s,' '); if ~isempty(idx), flag = 1; end s(idx) = []; out{i} = s; end disp('pass...'); end disp('6. write the output file'); fid=fopen(outfile,'w+'); % dicard existing content, if any for i=1:length(out) s = out{i}; % disp(s); fprintf(fid,'%s\n',s); end fclose(fid); disp('done!') return function [str,flag] = strclear(str,pattern,flag) n = length(pattern); idx = strfind(str,pattern); tmp=zeros(1,length(str)); start = 1; while ~isempty(idx) if flag tmp(start:idx(1)+n-1) = 1; % 1 to remove end flag = swch(flag); start = idx(1); idx(1)=[]; end if flag, tmp(start:end) = 1; end % if there were no flag but the idx is not empty idx = find(tmp==1); str(idx) = []; return function flag = swch(flag) % a simple switch function if flag > 0 flag = 0; else flag = 1; end return