DGtalTools  1.5.beta
statisticsEstimators.cpp
1 
31 #include <iostream>
32 #include <fstream>
33 #include <sstream>
34 #include <string>
35 #include <cmath>
36 #include <math.h>
37 #include <limits>
38 #include <boost/foreach.hpp>
39 #include <boost/tokenizer.hpp>
40 
41 #include "CLI11.hpp"
42 
43 #include "DGtal/base/Common.h"
44 
45 using namespace DGtal;
46 
47 
105 bool LoadingStringFromFile( std::ifstream & file, std::string & value )
106 {
107  if( file.good() )
108  {
109  std::getline( file, value );
110  return true;
111  }
112  return false;
113 }
114 
122 void split( const std::string & s, char delim, std::vector< std::string > & elems )
123 {
124  std::stringstream ss( s );
125  std::string item;
126  while( std::getline( ss, item, delim ))
127  {
128  elems.push_back( item );
129  }
130 }
131 
142 int ComputeStatistics ( const std::string & inputdata1,
143  const std::string & inputdata2,
144  const unsigned int & idColumnData1,
145  const unsigned int & idColumnData2,
146  const bool & isMongeMean,
147  std::ofstream & output )
148 {
149  std::ifstream file1( inputdata1.c_str() );
150  std::ifstream file2( inputdata2.c_str() );
151 
152  double absd1d2;
153  double L1 = 0.0;
154  double L2 = 0.0;
155  double Linf = 0.0;
156 
157  std::string s1, s2;
158  double v1, v2;
159  double h = - std::numeric_limits<double>::max();
160 
161  unsigned int nb_elements = 0;
162  bool finish = false;
163  while(( LoadingStringFromFile( file1, s1 ) && LoadingStringFromFile( file2, s2 )) && !finish )
164  {
165  while ( s1[ 0 ] == '#' )
166  {
167  std::size_t p = s1.find( "# h = " );
168  if ( p != std::string::npos )
169  {
170  h = atof((s1.erase( p, 5 )).c_str());
171  }
172  if( ! LoadingStringFromFile( file1, s1 ) )
173  {
174  s1 = "NA";
175  finish = true;
176  }
177  }
178 
179  while ( s2[ 0 ] == '#' )
180  {
181  if( ! LoadingStringFromFile( file2, s2 ) )
182  {
183  s2 = "NA";
184  finish = true;
185  }
186  }
187 
188  if ( s1 == "NA" || s1 == "-nan" || s1 == "-inf" || s1 == "inf" || s1 == "" || s1 == " " )
189  continue;
190  if ( s2 == "NA" || s2 == "-nan" || s2 == "-inf" || s2 == "inf" || s2 == "" || s2 == " " )
191  continue;
192 
193  std::vector< std::string > elems1;
194  split( s1, ' ', elems1 );
195  std::vector< std::string > elems2;
196  split( s2, ' ', elems2 );
197 
198  if( elems1.size() <= idColumnData1 )
199  {
200  std::cerr << "Can't found " << idColumnData1 << " column on file1 (" << inputdata1 << "). Is the file/column exist ?" << std::endl;
201  continue;
202  }
203  if( elems2.size() <= idColumnData2 )
204  {
205  std::cerr << "Can't found " << idColumnData2 << " column on file2 (" << inputdata2 << "). Is the file/column exist ?" << std::endl;
206  continue;
207  }
208 
209  v1 = atof( elems1[ idColumnData1 ].c_str() );
210  v2 = atof( elems2[ idColumnData2 ].c_str() );
211 
212  if( isMongeMean && (( v1 >= 0.0 ) ^ ( v2 >= 0.0 ))) // hack for Monge. Can be reversed.
213  {
214  v2 = -v2;
215  }
216 
217  absd1d2 = std::abs ( v1 - v2 );
218  if ( Linf < absd1d2 )
219  {
220  Linf = absd1d2;
221  }
222  L1 += absd1d2;
223  L2 += absd1d2 * absd1d2;
224 
225  ++nb_elements;
226  }
227 
228  if( h == - std::numeric_limits<double>::max())
229  {
230  std::cerr << "Can't found h value on file1 (" << inputdata1 << "). Is the file exist ?" << std::endl;
231  return 0;
232  }
233 
234  double meanL1 = L1 / (double)nb_elements;
235  double meanL2 = ( sqrt ( L2 )) / (double)nb_elements;
236 
237  output << h << " "
238  << meanL1 << " "
239  << meanL2 << " "
240  << Linf
241  << std::endl;
242 
243  return 1;
244 }
245 
246 int main( int argc, char** argv )
247 {
248  // parse command line CLI ----------------------------------------------
249  CLI::App app;
250  std::string filename1;
251  std::string filename2;
252  unsigned int column1;
253  unsigned int column2;
254  std::string output_filename;
255  bool isMongeMean {false};
256 
257  app.description("Computes satistics (L1, L2, Loo) from results of two estimators.\n Typical use example:\n \t statisticsEstimators --file1 <file1> --column1 <column1> --file2 <file2> --column2 <column2> --output <output>\n");
258  app.add_option("-f,--file1,1",filename1,"File 1.")->required()->check(CLI::ExistingFile);
259  app.add_option("-F,--file2,2",filename2,"File 2.")->required()->check(CLI::ExistingFile);
260  app.add_option("--column1,-c", column1, "Column of file 1" )->required();
261  app.add_option("--column2,-C", column2, "Column of file 2" )->required();
262  app.add_option("--output,-o,2", output_filename, "Output file")->required();
263  app.add_option("--monge,-m", isMongeMean, "Is from Monge mean computation (optional, default false)", true);
264 
265  app.get_formatter()->column_width(40);
266  CLI11_PARSE(app, argc, argv);
267  // END parse command line using CLI ----------------------------------------------
268 
269  std::ifstream inFileEmptyTest; inFileEmptyTest.open(output_filename.c_str());
270  bool isNew = inFileEmptyTest.peek() == std::ifstream::traits_type::eof(); inFileEmptyTest.close();
271  std::ofstream file( output_filename.c_str(), std::ofstream::out | std::ofstream::app );
272 
273  if( isNew )
274  {
275  file << "# h | "
276  << "L1 Mean Error | "
277  << "L2 Mean Error | "
278  << "Loo Mean Error"
279  << std::endl;
280  }
281 
282  if ( ComputeStatistics( filename1, filename2, column1, column2, isMongeMean, file ) == 0 )
283  {
284  file.close();
285  return -1;
286  }
287 
288  file.close();
289  return 1;
290 }
int main(int argc, char **argv)