indata.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  2. /// \file indata.h
  3. /// \brief Classes for text input data (used mainly for landcover input).
  4. /// File format can be either line 1:lon lat, line 2 etc.: year data-columns OR line 1: header,
  5. /// line 2 etc.: lon lat year data-columns. For local static data, use: lon lat data-columns,
  6. /// for global static data, use: dummy data-columns (with "static" as first word in header).
  7. /// \author Mats Lindeskog
  8. /// $Date: 2018-02-02 18:01:35 +0100 (Fri, 02 Feb 2018) $
  9. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  10. #ifndef INDATA_H
  11. #define INDATA_H
  12. #include "guess.h"
  13. using std::min;
  14. using std::max;
  15. /// Type for storing grid cell longitude, latitude and description text
  16. struct Coord {
  17. int id;
  18. double lon;
  19. double lat;
  20. xtring descrip;
  21. };
  22. namespace InData {
  23. const int MAXLINE = 20000;
  24. const int MAXNAMESIZE = 50;
  25. const int MAXRECORDS = 200;
  26. const int MAXLINESPARSE = 30000;
  27. const int NOTFOUND = -999;
  28. const double MAX_SEARCHRADIUS = 1.0;
  29. /// Formats in text input file
  30. typedef enum {EMPTY, GLOBAL_STATIC, GLOBAL_YEARLY, LOCAL_STATIC, LOCAL_YEARLY} fileformat;
  31. /// Type for storing grid cell longitude, latitude and associated data position on disk
  32. struct CoordPos {
  33. double lon;
  34. double lat;
  35. long int pos;
  36. };
  37. // Forward declaration of TimeDataDmem
  38. class TimeDataDmem;
  39. /// Class for reading a set of double data at coordinate positions over time (years), alternatively static or/and global.
  40. class TimeDataD {
  41. // PRIVATE VARIABLES
  42. /// File pointer to input file
  43. FILE *ifp;
  44. /// File name
  45. char *fileName;
  46. /// Number of data columns
  47. int nColumns; //Set in ParseFormat()
  48. /// Number of data years
  49. int nYears; //Set in ParseNYears()
  50. /// Number of data gridcells
  51. int nCells; //Set in ParseNCells()
  52. // Spacial resolution of input data in degrees
  53. double spatial_resolution;
  54. /// Offset to be used when searching for coordinates
  55. double offset;
  56. /// Whether the input file structure includes a header line with column names and coordinates on each line of data
  57. bool ifheader;
  58. /// String array of data column names
  59. char *header_arr[MAXRECORDS];
  60. /// Coordinates for current gridcell
  61. Coord currentStand;
  62. /// Pointer to data for one (current) gridcell
  63. double *data; //allocated in Allocate(), set in Load(), Load(Coord) or LoadNext()
  64. /// Pointer to array with data years
  65. int *year; //allocated in Allocate(), set in Load(), Load(Coord) or LoadNext()
  66. /// Pointer to array with indication whether data column contains values > 0 or not
  67. bool *checkdata; //allocated in CheckIfPresent()
  68. /// Format of input data
  69. fileformat format;
  70. /// First year of data
  71. int firstyear; //set in ParseNYears() or ParseNYearsSpatial()
  72. /// Whether data is currently being checked by CheckIfPresent()
  73. bool ischeckingdata;
  74. /// Whether data for the requested coordinates have been found and loaded
  75. bool loaded;
  76. /// Whether data sums up to 1.0
  77. bool unity_data;
  78. /// Pointer to memory copy of all data for the gridlist
  79. TimeDataDmem *memory_copy;
  80. /// Pointer to map of file positions of data for all gridcells in the file
  81. CoordPos *filemap;
  82. // PRIVATE METHODS
  83. /// Methods for parsing input file data format and structure
  84. fileformat ParseFormat(); //Called from Open(); Returns 0 if wrong format, sets nColumns, ifheader and header_arr[]
  85. int ParseNYears(); //Called from Open()
  86. int ParseNYearsGlobal(); //Called from ParseNYears()
  87. int ParseNYearsLocal(); //Called from ParseNYears()
  88. void ParseNCells();
  89. double ParseSpatialResolution(); //Called from Open()
  90. bool ParseNormalisation();
  91. /// Allocates memory for dynamic data structures
  92. bool Allocate(); //Called from Open()
  93. /// Finds data for a gridcell in input file. Quick version
  94. bool FindRecord(Coord c) const;
  95. /// Finds data for a gridcell in input file. Slower version, can handle blank lines
  96. bool FindRecord2(Coord c) const;
  97. /// Converts data column name to data column index
  98. int GetColumn(const char* name) const; // Returns column number for header name.
  99. /// Converts calender year to valid year position in data array.
  100. int CalenderYearToPosition(int calender_year) const;
  101. /// Creates map of the file positions of all gridcells' data
  102. void CreateFileMap();
  103. /// Sets the file pointer to required position (found in the file map)
  104. void SetPosition(long int pos) {fseek(ifp, pos, SEEK_SET);} // ecev3 - 0 to SEEK_SET
  105. /// Rewinds the file pointer
  106. void Rewind() {if(ifp) rewind(ifp);}
  107. /// Loads local data for a certain coordinate from a file map. Returns 0 if coordinate not found.
  108. bool LoadFromMap(Coord c);
  109. /// Sets offset to be used when searching for coordinates.
  110. void SetOffset(double gridlist_offset);
  111. /// Copies all data for the specified gridlist to memory
  112. void CopyToMemory(int ncells, ListArray_id<Coord>& lonlatlist);
  113. public:
  114. // PUBLIC METHODS
  115. /// Constructor
  116. TimeDataD(fileformat format=EMPTY); // default format value can only be used with header version input files !
  117. /// Deconstructor
  118. ~TimeDataD();
  119. // Methods to open input files and access data
  120. /// Opens input file, checks format and allocates memory. Returns false if error
  121. bool Open(const char* name);
  122. /// Opens input file, checks format and allocates memory. Copies all data for the gridlist into memory if LUTOMEMORY is defined. Returns false if error.
  123. bool Open(const char* name, ListArray_id<Coord>& gridlist, double gridlist_offset = 0.0);
  124. /// Releases dynamically allocated memory.
  125. void Close();
  126. /// Writes the data of the current coordinate to an output file
  127. void Output(char* outfile);
  128. /// Loads global data, closes input file.
  129. bool Load();
  130. /// Loads data for a certain coordinate. Returns false if coordinate not found.
  131. bool Load(Coord c);
  132. /// Steps through a data file, loading each coordinate's data consecutively. Returns false if error.
  133. bool LoadNext(long int *pos = NULL);
  134. /// Returns a single data value for a certain year and data column
  135. double Get(int calender_year, int column) const;
  136. /// Returns a single data value for column with header string name. Returns -999 if name not found.
  137. double Get(int calender_year, const char* name) const;
  138. /// Copies the data for the current gridcell for one year to an array.
  139. void Get(int calender_year, double* dataX) const;
  140. /// Copies all data for the current gridcell to an array.
  141. void Get(double* dataX) const;
  142. // Methods to access private data
  143. /// Returns the number of data columns
  144. int GetnColumns() const {return nColumns;}
  145. /// Copies the data column names to a string array
  146. bool GetHeader(char *cropnames[MAXRECORDS]) const;
  147. /// Copies the whole header to a string
  148. bool GetHeaderFull(char *header_line) const;
  149. /// Returns a pointer to a data name string for a column by its index
  150. char* GetHeader(int record) const;
  151. /// Returns the current coordinates
  152. Coord& GetCoord() {return currentStand;}
  153. /// Returns the number of gridcells with data in the input file
  154. int GetNCells(); // Calls ParseNCells() if nCells not yet set
  155. /// Returns the number of years in the input data
  156. int GetnYears() const {return nYears;}
  157. /// Returns the first year in the input data
  158. int GetFirstyear() const {return firstyear;}
  159. /// Returns the data format (EMPTY, GLOBAL_STATIC, GLOBAL_YEARLY, LOCAL_STATIC, LOCAL_YEARLY)
  160. fileformat GetFormat() const {return format;}
  161. /// Returns true if data for requested coordinates are found, false if not.
  162. bool isloaded();
  163. /// Sets spacial resolution
  164. void SetSpacialResolution(double resolution) {spatial_resolution = resolution;}
  165. /// Returns spacial resolution
  166. double GetSpacialResolution() const {return spatial_resolution;}
  167. double GetOffset() const { return offset;}
  168. bool NormalisedData();
  169. // Functions for finding out if data columns contain sensible data for a specified gridlist
  170. /// Checks if data column has any values > 0 in any of the gridcells in the gridlist
  171. void CheckIfPresent(ListArray_id<Coord>& gridlist);
  172. // Returns true if data column has any values > 0 in any of the gridcells in the gridlist (after CheckIfPresent() call)
  173. bool item_has_data(char* name);
  174. // Returns true if data name is in header
  175. bool item_in_header(char* name);
  176. /// Used by TimeDataDmem class to set pointer to full data copy
  177. void register_memory_copy(TimeDataDmem* mem_copy) {memory_copy = mem_copy;}
  178. };
  179. /// Class for loading all data for a gridlist to memory.
  180. class TimeDataDmem {
  181. // PRIVATE VARIABLES
  182. /// Pointer to gridlist
  183. Coord *gridlist;
  184. /// Pointer to data array
  185. double **data;
  186. /// Number of data columns
  187. int nColumns;
  188. /// Number of data years
  189. int nYears;
  190. /// Number of data gridcells
  191. int nCells;
  192. // Spacial resolution of input data in degrees
  193. double spatial_resolution;
  194. /// Whether the input file structure includes a header line with column names and coordinates on each line of data
  195. bool ifheader;
  196. /// String array of data column names
  197. char *header_arr[MAXRECORDS];
  198. /// Index of current gridcell in data array
  199. int currentCell;
  200. /// First year of data
  201. int firstyear;
  202. /// Whether data for the requested coordinates have been found and loaded
  203. bool loaded;
  204. // PRIVATE METHODS
  205. /// Converts calender year to valid year position in data array.
  206. int CalenderYearToPosition(int calender_year) const;
  207. /// Sets coord at index position
  208. void SetCoord(int index, Coord c);
  209. /// Sets data at index position
  210. void SetData(int index, double* data);
  211. public:
  212. // PUBLIC METHODS
  213. /// Constructor
  214. TimeDataDmem();
  215. /// Deconstructor
  216. ~TimeDataDmem();
  217. /// Allocates memory.
  218. void Open(int nCells, int nColumns, int nYears);
  219. /// Releases dynamically allocated memory.
  220. void Close();
  221. /// Copies all data for gridlist to memory
  222. void CopyFromTimeDataD(TimeDataD& Data, ListArray_id<Coord>& gridlistX);
  223. /// Loads data for a certain coordinate. Returns false if coordinate not found.
  224. bool Load(Coord c);
  225. /// Returns a single data value for a certain year and data column
  226. double Get(int calender_year, int column) const; // Returns a single value.
  227. /// Returns a single data value for column with header string name. Returns -999 if name not found.
  228. double Get(int calender_year, const char* name) const;
  229. /// Returns the first year in the input data
  230. int GetFirstyear() {return firstyear;}
  231. /// Returns true if data for requested coordinates are found, false if not.
  232. bool isloaded() const { return loaded;}
  233. /// Sets spacial resolution
  234. void SetSpacialResolution(double resolution) {spatial_resolution = resolution;}
  235. };
  236. } // namespace InData
  237. #endif//INDATA_H