View Javadoc
1   package net.sumaris.importation.service.ices;
2   
3   /*-
4    * #%L
5    * SUMARiS:: Core Importation
6    * %%
7    * Copyright (C) 2018 - 2019 SUMARiS Consortium
8    * %%
9    * This program is free software: you can redistribute it and/or modify
10   * it under the terms of the GNU General Public License as
11   * published by the Free Software Foundation, either version 3 of the
12   * License, or (at your option) any later version.
13   * 
14   * This program is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU General Public License for more details.
18   * 
19   * You should have received a copy of the GNU General Public
20   * License along with this program.  If not, see
21   * <http://www.gnu.org/licenses/gpl-3.0.html>.
22   * #L%
23   */
24  
25  import com.google.common.base.Joiner;
26  import com.google.common.collect.*;
27  import net.sumaris.core.config.SumarisConfiguration;
28  import net.sumaris.core.dao.technical.schema.*;
29  import net.sumaris.core.exception.SumarisTechnicalException;
30  import net.sumaris.core.model.technical.extraction.rdb.*;
31  import net.sumaris.core.service.referential.taxon.TaxonNameService;
32  import net.sumaris.core.util.Files;
33  import net.sumaris.core.vo.referential.TaxonNameVO;
34  import net.sumaris.importation.exception.FileValidationException;
35  import net.sumaris.importation.service.DataLoaderService;
36  import net.sumaris.importation.util.csv.CSVFileReader;
37  import org.apache.commons.collections4.CollectionUtils;
38  import org.apache.commons.lang3.ArrayUtils;
39  import org.apache.commons.lang3.StringUtils;
40  import org.slf4j.Logger;
41  import org.slf4j.LoggerFactory;
42  import org.springframework.beans.factory.annotation.Autowired;
43  import org.springframework.stereotype.Service;
44  
45  import java.io.File;
46  import java.io.IOException;
47  import java.util.*;
48  import java.util.function.BiFunction;
49  import java.util.stream.Collectors;
50  
51  @Service("icesDataLoaderService")
52  public class IcesDataLoaderServiceImpl implements IcesDataLoaderService {
53  
54  	protected static final Logger log = LoggerFactory.getLogger(IcesDataLoaderServiceImpl.class);
55  
56  	/**
57  	 * Allow to override the default column headers array, on a given table
58  	 * Should return the new headers array
59  	 */
60  	public interface HeaderAdapter extends BiFunction<DatabaseTableEnum, String[], String[]> {
61  
62  		@Override
63  		String[] apply(DatabaseTableEnum table, String[] headers);
64  	}
65  
66  	protected static final char DEFAULT_SEPARATOR = ',';
67  
68  	protected static final String MIXED_COLUMN_RECORD_TYPE = "record_type";
69  
70  	protected static List<DatabaseTableEnum> orderedTables = ImmutableList.of(
71  			DatabaseTableEnum.P01_RDB_TRIP,
72  			DatabaseTableEnum.P01_RDB_STATION,
73  			DatabaseTableEnum.P01_RDB_SPECIES_LIST,
74  			DatabaseTableEnum.P01_RDB_SPECIES_LENGTH,
75  			// TODO CA
76  			DatabaseTableEnum.P01_RDB_LANDING
77  			// TODO CE
78  	);
79  
80  	protected static Map<String, DatabaseTableEnum> tableByRecordTypeMap = ImmutableMap.<String, DatabaseTableEnum>builder()
81  			.put("TR", DatabaseTableEnum.P01_RDB_TRIP)
82  			.put("HH", DatabaseTableEnum.P01_RDB_STATION)
83  			.put("SL", DatabaseTableEnum.P01_RDB_SPECIES_LIST)
84  			.put("HL", DatabaseTableEnum.P01_RDB_SPECIES_LENGTH)
85  			// TODO CA
86  			.put("CL", DatabaseTableEnum.P01_RDB_LANDING)
87  			// TODO CE
88  			.build();
89  
90  	protected static Map<DatabaseTableEnum, String[]> headersByTableMap = ImmutableMap.<DatabaseTableEnum, String[]>builder()
91  			.put(ProductRdbTrip.TABLE, new String[]{
92  					MIXED_COLUMN_RECORD_TYPE,
93  					ProductRdbTrip.COLUMN_SAMPLING_TYPE,
94  					ProductRdbTrip.COLUMN_VESSEL_FLAG_COUNTRY,
95  					ProductRdbTrip.COLUMN_LANDING_COUNTRY,
96  					ProductRdbTrip.COLUMN_YEAR,
97  					ProductRdbTrip.COLUMN_PROJECT,
98  					ProductRdbTrip.COLUMN_TRIP_CODE,
99  					ProductRdbTrip.COLUMN_VESSEL_LENGTH,
100 					ProductRdbTrip.COLUMN_VESSEL_POWER,
101 					ProductRdbTrip.COLUMN_VESSEL_SIZE,
102 					ProductRdbTrip.COLUMN_VESSEL_TYPE,
103 					ProductRdbTrip.COLUMN_HARBOUR,
104 					ProductRdbTrip.COLUMN_NUMBER_OF_SETS,
105 					ProductRdbTrip.COLUMN_DAYS_AT_SEA,
106 					ProductRdbTrip.COLUMN_VESSEL_IDENTIFIER,
107 					ProductRdbTrip.COLUMN_SAMPLING_COUNTRY,
108 					ProductRdbTrip.COLUMN_SAMPLING_METHOD
109 			})
110 			.put(ProductRdbStation.TABLE, new String[]{
111 					MIXED_COLUMN_RECORD_TYPE,
112 					ProductRdbStation.COLUMN_SAMPLING_TYPE,
113 					ProductRdbStation.COLUMN_VESSEL_FLAG_COUNTRY,
114 					ProductRdbStation.COLUMN_LANDING_COUNTRY,
115 					ProductRdbStation.COLUMN_YEAR,
116 					ProductRdbStation.COLUMN_PROJECT,
117 					ProductRdbStation.COLUMN_TRIP_CODE,
118 					ProductRdbStation.COLUMN_STATION_NUMBER,
119 					ProductRdbStation.COLUMN_FISHING_VALIDITY,
120 					ProductRdbStation.COLUMN_AGGREGATION_LEVEL,
121 					ProductRdbStation.COLUMN_CATCH_REGISTRATION,
122 					ProductRdbStation.COLUMN_SPECIES_REGISTRATION,
123 					ProductRdbStation.COLUMN_DATE,
124 					ProductRdbStation.COLUMN_TIME,
125 					ProductRdbStation.COLUMN_FISHING_TIME,
126 					ProductRdbStation.COLUMN_POS_START_LAT,
127 					ProductRdbStation.COLUMN_POS_START_LON,
128 					ProductRdbStation.COLUMN_POS_END_LAT,
129 					ProductRdbStation.COLUMN_POS_END_LON,
130 					ProductRdbStation.COLUMN_AREA,
131 					ProductRdbStation.COLUMN_STATISTICAL_RECTANGLE,
132 					ProductRdbStation.COLUMN_SUB_POLYGON,
133 					ProductRdbStation.COLUMN_MAIN_FISHING_DEPTH,
134 					ProductRdbStation.COLUMN_MAIN_WATER_DEPTH,
135 					ProductRdbStation.COLUMN_NATIONAL_METIER,
136 					ProductRdbStation.COLUMN_EU_METIER_LEVEL5,
137 					ProductRdbStation.COLUMN_EU_METIER_LEVEL6,
138 					ProductRdbStation.COLUMN_GEAR_TYPE,
139 					ProductRdbStation.COLUMN_MESH_SIZE,
140 					ProductRdbStation.COLUMN_SELECTION_DEVICE,
141 					ProductRdbStation.COLUMN_MESH_SIZE_SELECTION_DEVICE
142 			})
143 			.put(ProductRdbSpeciesList.TABLE, new String[]{
144 					MIXED_COLUMN_RECORD_TYPE,
145 					ProductRdbSpeciesList.COLUMN_SAMPLING_TYPE,
146 					ProductRdbSpeciesList.COLUMN_VESSEL_FLAG_COUNTRY,
147 					ProductRdbSpeciesList.COLUMN_LANDING_COUNTRY,
148 					ProductRdbSpeciesList.COLUMN_YEAR,
149 					ProductRdbSpeciesList.COLUMN_PROJECT,
150 					ProductRdbSpeciesList.COLUMN_TRIP_CODE,
151 					ProductRdbSpeciesList.COLUMN_STATION_NUMBER,
152 					ProductRdbSpeciesList.COLUMN_SPECIES,
153 					ProductRdbSpeciesList.COLUMN_SEX,
154 					ProductRdbSpeciesList.COLUMN_CATCH_CATEGORY,
155 					ProductRdbSpeciesList.COLUMN_LANDING_CATEGORY,
156 					ProductRdbSpeciesList.COLUMN_COMMERCIAL_SIZE_CATEGORY_SCALE,
157 					ProductRdbSpeciesList.COLUMN_COMMERCIAL_SIZE_CATEGORY,
158 					ProductRdbSpeciesList.COLUMN_SUBSAMPLING_CATEGORY,
159 					ProductRdbSpeciesList.COLUMN_WEIGHT,
160 					ProductRdbSpeciesList.COLUMN_SUBSAMPLING_WEIGHT,
161 					ProductRdbSpeciesList.COLUMN_LENGTH_CODE
162 			})
163 			.put(ProductRdbSpeciesLength.TABLE, new String[]{
164 					MIXED_COLUMN_RECORD_TYPE,
165 					ProductRdbSpeciesLength.COLUMN_SAMPLING_TYPE,
166 					ProductRdbSpeciesLength.COLUMN_VESSEL_FLAG_COUNTRY,
167 					ProductRdbSpeciesLength.COLUMN_LANDING_COUNTRY,
168 					ProductRdbSpeciesLength.COLUMN_YEAR,
169 					ProductRdbSpeciesLength.COLUMN_PROJECT,
170 					ProductRdbSpeciesLength.COLUMN_TRIP_CODE,
171 					ProductRdbSpeciesLength.COLUMN_STATION_NUMBER,
172 					ProductRdbSpeciesLength.COLUMN_SPECIES,
173 					ProductRdbSpeciesLength.COLUMN_CATCH_CATEGORY,
174 					ProductRdbSpeciesLength.COLUMN_LANDING_CATEGORY,
175 					ProductRdbSpeciesLength.COLUMN_COMMERCIAL_SIZE_CATEGORY_SCALE,
176 					ProductRdbSpeciesLength.COLUMN_COMMERCIAL_SIZE_CATEGORY,
177 					ProductRdbSpeciesLength.COLUMN_SUBSAMPLING_CATEGORY,
178 					ProductRdbSpeciesLength.COLUMN_SEX,
179 					ProductRdbSpeciesLength.COLUMN_INDIVIDUAL_SEX,
180 					ProductRdbSpeciesLength.COLUMN_LENGTH_CLASS,
181 					ProductRdbSpeciesLength.COLUMN_NUMBER_AT_LENGTH
182 			})
183 			// TODO CA
184 			.put(ProductRdbLandingStatistics.TABLE, new String[]{
185 					MIXED_COLUMN_RECORD_TYPE,
186 					ProductRdbLandingStatistics.COLUMN_VESSEL_FLAG_COUNTRY,
187 					ProductRdbLandingStatistics.COLUMN_LANDING_COUNTRY,
188 					ProductRdbLandingStatistics.COLUMN_YEAR,
189 					ProductRdbLandingStatistics.COLUMN_QUARTER,
190 					ProductRdbLandingStatistics.COLUMN_MONTH,
191 					ProductRdbLandingStatistics.COLUMN_AREA,
192 					ProductRdbLandingStatistics.COLUMN_STATISTICAL_RECTANGLE,
193 					ProductRdbLandingStatistics.COLUMN_SUB_POLYGON,
194 					ProductRdbLandingStatistics.COLUMN_SPECIES,
195 					ProductRdbLandingStatistics.COLUMN_LANDING_CATEGORY,
196 					ProductRdbLandingStatistics.COLUMN_COMMERCIAL_SIZE_CATEGORY_SCALE,
197 					ProductRdbLandingStatistics.COLUMN_COMMERCIAL_SIZE_CATEGORY,
198 					ProductRdbLandingStatistics.COLUMN_NATIONAL_METIER,
199 					ProductRdbLandingStatistics.COLUMN_EU_METIER_LEVEL5,
200 					ProductRdbLandingStatistics.COLUMN_EU_METIER_LEVEL6,
201 					ProductRdbLandingStatistics.COLUMN_HARBOUR,
202 					ProductRdbLandingStatistics.COLUMN_VESSEL_LENGTH_CATEGORY,
203 					ProductRdbLandingStatistics.COLUMN_UNALLOCATED_CATCH_WEIGHT,
204 					ProductRdbLandingStatistics.COLUMN_AREA_MISREPORTED_CATCH_WEIGHT,
205 					ProductRdbLandingStatistics.COLUMN_OFFICIAL_LANDINGS_WEIGHT,
206 					ProductRdbLandingStatistics.COLUMN_LANDINGS_MULTIPLIER,
207 					ProductRdbLandingStatistics.COLUMN_OFFICIAL_LANDINGS_VALUE
208 			})
209 			// TODO CE
210 			.build();
211 
212 
213 	protected static final Map<String, String> headerReplacements = ImmutableMap.<String, String>builder()
214 			// FRA synonyms
215 			.put("landCtry", ProductRdbLandingStatistics.COLUMN_LANDING_COUNTRY)
216 			.put("vslFlgCtry", ProductRdbLandingStatistics.COLUMN_VESSEL_FLAG_COUNTRY)
217 			.put("year", ProductRdbLandingStatistics.COLUMN_YEAR)
218 			.put("quarter", ProductRdbLandingStatistics.COLUMN_QUARTER)
219 			.put("month", ProductRdbLandingStatistics.COLUMN_MONTH)
220 			.put("area", ProductRdbLandingStatistics.COLUMN_AREA)
221 			.put("rect", ProductRdbLandingStatistics.COLUMN_STATISTICAL_RECTANGLE)
222 			.put("subRect", ProductRdbLandingStatistics.COLUMN_SUB_POLYGON)
223 			.put("taxon", ProductRdbLandingStatistics.COLUMN_SPECIES)
224 			.put("landCat", ProductRdbLandingStatistics.COLUMN_LANDING_CATEGORY)
225 			.put("commCatScl", ProductRdbLandingStatistics.COLUMN_COMMERCIAL_SIZE_CATEGORY_SCALE)
226 			.put("commCat", ProductRdbLandingStatistics.COLUMN_COMMERCIAL_SIZE_CATEGORY)
227 			.put("foCatNat", ProductRdbLandingStatistics.COLUMN_NATIONAL_METIER)
228 			.put("foCatEu5", ProductRdbLandingStatistics.COLUMN_EU_METIER_LEVEL5)
229 			.put("foCatEu6", ProductRdbLandingStatistics.COLUMN_EU_METIER_LEVEL6)
230 			.put("harbour", ProductRdbLandingStatistics.COLUMN_HARBOUR)
231 			.put("vslLenCat", ProductRdbLandingStatistics.COLUMN_VESSEL_LENGTH_CATEGORY)
232 			.put("unallocCatchWt", ProductRdbLandingStatistics.COLUMN_UNALLOCATED_CATCH_WEIGHT)
233 			.put("misRepCatchWt", ProductRdbLandingStatistics.COLUMN_AREA_MISREPORTED_CATCH_WEIGHT)
234 			.put("landWt", ProductRdbLandingStatistics.COLUMN_OFFICIAL_LANDINGS_WEIGHT)
235 			.put("landMult", ProductRdbLandingStatistics.COLUMN_LANDINGS_MULTIPLIER)
236 			.put("landValue", ProductRdbLandingStatistics.COLUMN_OFFICIAL_LANDINGS_VALUE)
237 
238 			// BEL synonyms
239 			.put("FAC_National", ProductRdbLandingStatistics.COLUMN_NATIONAL_METIER)
240 			.put("FAC_EC_lvl5", ProductRdbLandingStatistics.COLUMN_EU_METIER_LEVEL5)
241 			.put("FAC_EC_lvl6", ProductRdbLandingStatistics.COLUMN_EU_METIER_LEVEL6)
242 
243 
244 			.build();
245 
246 	protected static final Map<String, String> linesReplacements = ImmutableMap.<String, String>builder()
247 			.put("\"NA\"", "")
248 			.put(",NA,NA,", ",,,")
249 			.put(",NA,", ",,")
250 
251 			// -- FRA Data
252 			//.put(",u10,", ",0-10,")
253 			.put(",o40,", ",>40,")
254 			.put(",([0-9]{1,2})-([0-9]{1,2}),", ",$1-<$2,")
255 
256 			// -- BEL Data
257 			.put(",([0-9]{1,2})-<([0-9]{1,2}),", ",$1-$2,")
258 
259 			// --- GBR data
260 			// Country code ISO3166-2:GB -> ISO3166-1 alpha-3
261 			.put("GB[_-]?(ENG|NIR|SCT|WLS)", "GBR")
262 			// Date format from DD/MM/YYYY -> YYYY-MM-DD
263 			.put(",([0-9]{2})/([0-9]{2})/([0-9]{4}),", ",$3-$2-$1,")
264 			// Country code ISO3166-1 alpha-2 -> ISO3166-1 alpha-3
265 			.put(",BE,BE,", ",BEL,BEL,")
266 			.put(",FR,FR,", ",FRA,FRA,")
267 			.put(",DK,DK,", ",DNK,DNK,")
268 			.put(",NL,NL,", ",NLD,NLD,")
269 			// Replace unknown metier
270 			.put("XXX_XXX", "UNK_MZZ")
271 
272 			// GBR + BEL
273 			// - If metier level5 is empty, fill using metier level 6
274 			.put(",,([A-Z]{2,3})_([A-Z]{2,3})_0_0_0,", ",$1_$2,$1_$2_0_0_0,")
275 			// - If gear is empty, fill using metier level 6
276 			.put(",([A-Z]{2,3})_([A-Z]{2,3})_0_0_0,,", ",$1_$2_0_0_0,$1,")
277 
278 			.build();
279 
280 	@Autowired
281 	protected SumarisConfiguration config;
282 
283 	@Autowired
284 	protected SumarisDatabaseMetadata databaseMetadata;
285 
286 	@Autowired
287 	protected DataLoaderService dataLoaderService;
288 
289 	@Autowired
290 	protected TaxonNameService taxonNameService;
291 
292 	@Override
293 	public void loadLanding(File inputFile, String country, boolean validate, boolean appendData) throws IOException, FileValidationException {
294 		loadTable(inputFile, DatabaseTableEnum.P01_RDB_LANDING, country, validate, appendData);
295 	}
296 
297 	@Override
298 	public void loadTrip(File inputFile, String country, boolean validate, boolean appendData) throws IOException, FileValidationException {
299 		loadTable(inputFile, DatabaseTableEnum.P01_RDB_TRIP, country, validate, appendData);
300 	}
301 
302 	@Override
303 	public void detectFormatAndLoad(File inputFile, String country, boolean validate, boolean appendData) throws IOException, FileValidationException {
304 		Files.checkExists(inputFile);
305 
306 		File tempFile = prepareFile(inputFile);
307 
308 		// Detect file format
309 		CSVFileReader reader = new CSVFileReader(tempFile, true);
310 		String[] headers = reader.getHeaders();
311 		String[] secondRow = reader.readNext();
312 		reader.close();
313 
314 		// Empty file
315 		if (headers == null && secondRow == null) {
316 			log.warn("File is empty. No data to load.");
317 			return;
318 		}
319 
320 		// If only one row (no headers), then mixed format
321 		if (headers != null && secondRow == null) {
322 			loadMixed(inputFile, country, validate, appendData);
323 			return;
324 		}
325 
326 		// More than one row: detected if mixed file
327 		for(String recordType: tableByRecordTypeMap.keySet()) {
328 			DatabaseTableEnum table = tableByRecordTypeMap.get(recordType);
329 			String[] expectedHeaders = headersByTableMap.get(table);
330 
331 			boolean isMixedFile =
332 							// First column value should by the record type
333 							recordType.equals(headers[0]) &&
334 							// Second column value should NOT be an header name
335 							!Arrays.asList(expectedHeaders).contains(headers[1].toLowerCase());
336 			if (isMixedFile) {
337 				log.info(String.format("[%s] Detected format: mixed record types", inputFile.getName()));
338 				loadMixed(inputFile, country, validate, appendData);
339 				return;
340 			}
341 		}
342 
343 		// Detected record type, from the second row
344 		for(String recordType: tableByRecordTypeMap.keySet()) {
345 			DatabaseTableEnum table = tableByRecordTypeMap.get(recordType);
346 			String[] expectedHeaders = headersByTableMap.get(table);
347 
348 			boolean isOneRecordTypeFile =
349 							// At least the second expected column header
350 							Arrays.asList(expectedHeaders).contains(headers[1].toLowerCase()) &&
351 							// Second line start with expected record type
352 							recordType.equals(secondRow[0]);
353 			if (isOneRecordTypeFile) {
354 				log.info(String.format("[%s] Detected format: unique record type {%s}", inputFile.getName(), table.name().toLowerCase()));
355 				loadTable(inputFile, table, country, validate, appendData);
356 				return;
357 			}
358 		}
359 
360 		throw new SumarisTechnicalException("Unable to detect file format");
361 	}
362 
363 	@Override
364 	public void loadMixed(File inputFile, String country, boolean validate, boolean appendData) throws IOException, FileValidationException {
365 		Files.checkExists(inputFile);
366 
367 		// If not append : then remove old data
368 		if (!appendData) {
369 			String[] filterCols = StringUtils.isNotBlank(country) ? new String[] { ProductRdbTrip.COLUMN_VESSEL_FLAG_COUNTRY } : null;
370 			String[] filterVals = StringUtils.isNotBlank(country) ? new String[] { country } : null;
371 			List<DatabaseTableEnum> deleteOrderedTables = orderedTables.stream().sorted(Comparator.reverseOrder()).collect(Collectors.toList());
372 			for (DatabaseTableEnum table: deleteOrderedTables) {
373 				dataLoaderService.remove(table, filterCols, filterVals);
374 			}
375 		}
376 
377 		Collection<File> tempFiles = null;
378 		try {
379 			// Split file in many, by table
380 			Map<DatabaseTableEnum, File> files = prepareMixedFile(inputFile, getHeadersAdapter(inputFile, country));
381 
382 			tempFiles = files.values();
383 
384 			for (DatabaseTableEnum table: orderedTables) {
385 				File file = files.get(table);
386 				if (file != null && file.exists()) {
387 					log.info("-----------------------------------------------");
388 					dataLoaderService.load(file, table, validate);
389 				}
390 			}
391 		}
392 		catch(Exception e) {
393 			log.error(e.getMessage(), e);
394 			throw e;
395 		}
396 		finally {
397 			if (CollectionUtils.isNotEmpty(tempFiles)) {
398 				//tempFiles.forEach(FileUtils::deleteQuietly);
399 			}
400 		}
401 
402 	}
403 
404 
405 	/* -- protected methods -- */
406 
407 	protected void loadTable(File inputFile, DatabaseTableEnum table, String country, boolean validate, boolean appendData) throws IOException, FileValidationException {
408 		Files.checkExists(inputFile);
409 
410 		// If not append : then remove old data
411 		if (!appendData) {
412 
413 			if (StringUtils.isNotBlank(country)) {
414 				dataLoaderService.remove(table, new String[] { ProductRdbTrip.COLUMN_VESSEL_FLAG_COUNTRY }, new String[] { country });
415 			} else {
416 				dataLoaderService.remove(table, null, null);
417 			}
418 		}
419 
420 		File tempFile = null;
421 		try {
422 			tempFile = prepareFileForTable(inputFile, table, DEFAULT_SEPARATOR);
423 
424 			// Do load
425 			dataLoaderService.load(tempFile, table, validate);
426 		}
427 		catch(Exception e) {
428 			log.error(e.getMessage(), e);
429 			throw e;
430 		}
431 		finally {
432 			Files.deleteQuietly(tempFile);
433 
434 			Files.deleteFiles(inputFile.getParentFile(), "^.*.tmp[0-9]+$");
435 		}
436 	}
437 
438 	protected File prepareFileForTable(File inputFile, DatabaseTableEnum table, char separator){
439 
440 		File tempFile = prepareFile(inputFile, separator);
441 
442 		try {
443 
444 			// If species list table
445 			if (table != null && (
446 					table == DatabaseTableEnum.P01_RDB_SPECIES_LIST
447 				|| table == DatabaseTableEnum.P01_RDB_SPECIES_LENGTH)
448 			) {
449 
450 				// Get taxon name, to create a replacement map
451 				Map<String, String> taxonNameReplacements = Maps.newHashMap();
452 				for (TaxonNameVO t: taxonNameService.getAll(true)) {
453 					if (StringUtils.isNotBlank(t.getLabel())) {
454 						String regexp = t.getName() + "[^,;\t\"]*" + separator;
455 						String replacement = t.getLabel() + separator;
456 						taxonNameReplacements.put(regexp, replacement);
457 					}
458 				}
459 
460 				// Replace in lines
461 				Files.replaceAll(tempFile, taxonNameReplacements, 1, -1/*all rows*/);
462 			}
463 
464 			return tempFile;
465 
466 		} catch(IOException e) {
467 			throw new SumarisTechnicalException("Could not preparing file: " + inputFile.getPath(), e);
468 		}
469 
470 
471 	}
472 
473 	protected File prepareFile(File inputFile) {
474 		return prepareFile(inputFile, DEFAULT_SEPARATOR);
475 	}
476 
477 	protected File prepareFile(File inputFile, char separator) {
478 
479 		try {
480 			File tempFile = Files.getNewTemporaryFile(inputFile);
481 
482 			// Replace in headers (exact match
483 			Map<String, String> exactHeaderReplacements = Maps.newHashMap();
484 			for (String header: headerReplacements.keySet()) {
485 				String regexp = "(^|" + separator + ")\"?" + header + "\"?(" + separator + "|$)";
486 				String replacement = separator + headerReplacements.get(header) + separator;
487 				exactHeaderReplacements.put(regexp, replacement);
488 			}
489 			Files.replaceAllInHeader(inputFile, tempFile, exactHeaderReplacements);
490 
491 			// Replace in lines
492 			Files.replaceAll(tempFile, linesReplacements, 1, -1/*all rows*/);
493 
494 			return tempFile;
495 		} catch(IOException e) {
496 			throw new SumarisTechnicalException("Could not preparing file: " + inputFile.getPath(), e);
497 		}
498 	}
499 
500 
501 	protected Map<DatabaseTableEnum, File> prepareMixedFile(File inputFile, HeaderAdapter headersAdapter) throws IOException {
502 
503 		CSVFileReader reader = new CSVFileReader(inputFile, true);
504 		char separator = reader.getSeparator();
505 		reader.close();
506 
507 		// Delete previous temp files
508 		Files.deleteTemporaryFiles(inputFile);
509 
510 		Map<DatabaseTableEnum, File> result = Maps.newHashMap();
511 		for(String recordType: tableByRecordTypeMap.keySet()) {
512 			DatabaseTableEnum table = tableByRecordTypeMap.get(recordType);
513 			File tempFile = Files.getNewTemporaryFile(inputFile);
514 			List<String> prefixes = ImmutableList.of(recordType+separator, "\""+recordType+"\""+separator);
515 			Files.filter(inputFile, tempFile, (line) ->
516 				prefixes.stream().filter(prefix -> line.startsWith(prefix)).findFirst().isPresent()
517 			);
518 
519 			if (!Files.isEmpty(tempFile)) {
520 				tempFile = insertHeader(tempFile, table, separator, headersAdapter);
521 
522 				tempFile = prepareFileForTable(tempFile, table, separator);
523 
524 				String recordBasename = String.format("%s-%s.%s%s", Files.getNameWithoutExtension(inputFile), recordType, Files.getExtension(inputFile), Files.TEMPORARY_FILE_DEFAULT_EXTENSION);
525 				File recordFile = new File(inputFile.getParentFile(), recordBasename);
526 				Files.copyFile(tempFile, recordFile);
527 
528 				Files.deleteTemporaryFiles(inputFile);
529 
530 				result.put(table, recordFile);
531 			}
532 		}
533 
534 		return result;
535 	}
536 
537 	public File insertHeader(File inputFile, DatabaseTableEnum table, char separator, HeaderAdapter headersAdapter) throws IOException {
538 
539 		String[] headers = headersByTableMap.get(table);
540 		if (ArrayUtils.isEmpty(headers)) {
541 			throw new SumarisTechnicalException(String.format("No default headers defined, for table {%s}", table.name()));
542 		}
543 
544 		// Adapt headers, if need (e.g. remove/add specific columns)
545 		if (headersAdapter != null) {
546 			headers = headersAdapter.apply(table, headers);
547 			if (ArrayUtils.isEmpty(headers)) {
548 				throw new SumarisTechnicalException(String.format("No headers returned by the adapter, for table {%s}", table.name()));
549 			}
550 		}
551 
552 		File tempFile = Files.getNewTemporaryFile(inputFile);
553 		String headerLine = Joiner.on(separator).join(headers);
554 		Files.prependLines(inputFile, tempFile, headerLine);
555 		return tempFile;
556 	}
557 
558 	/**
559 	 * Generate a new headers adapter, depending on the file origin. E.g. in GBR data, remove the fishing_activity column that is not present
560 	 * @param inputFile
561 	 * @param country
562 	 * @return
563 	 */
564 	public HeaderAdapter getHeadersAdapter(final File inputFile, final String country) {
565 
566 		// Special case for GBR
567 		if ("GBR".equals(country)) {
568 			return (table, headers) -> {
569 				if (table == ProductRdbStation.TABLE) {
570 					List<String> result = Lists.newArrayList(headers);
571 
572 					// Remove missing columns in the GRB file
573 					result.remove(ProductRdbStation.COLUMN_FISHING_VALIDITY);
574 
575 					return result.toArray(new String[result.size()]);
576 				}
577 				return headers;
578 			};
579 		}
580 
581 		return null;
582 	}
583 }
584 
585