View Javadoc
1   package net.sumaris.importation.util.csv;
2   
3   /*-
4    * #%L
5    * SUMARiS:: Core Importation
6    * %%
7    * Copyright (C) 2018 - 2019 SUMARiS Consortium
8    * %%
9    * This program is free software: you can redistribute it and/or modify
10   * it under the terms of the GNU General Public License as
11   * published by the Free Software Foundation, either version 3 of the
12   * License, or (at your option) any later version.
13   * 
14   * This program is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU General Public License for more details.
18   * 
19   * You should have received a copy of the GNU General Public
20   * License along with this program.  If not, see
21   * <http://www.gnu.org/licenses/gpl-3.0.html>.
22   * #L%
23   */
24  
25  import au.com.bytecode.opencsv.CSVReader;
26  import com.google.common.base.Preconditions;
27  import net.sumaris.core.dao.technical.schema.SumarisHibernateColumnMetadata;
28  import net.sumaris.core.dao.technical.schema.SumarisTableMetadata;
29  import net.sumaris.core.util.type.SequenceIterator;
30  import net.sumaris.importation.service.vo.DataLoadError;
31  import static net.sumaris.importation.service.vo.DataLoadError.*;
32  import net.sumaris.importation.service.vo.DataLoadResult;
33  import org.apache.commons.io.input.BOMInputStream;
34  import org.apache.commons.lang3.StringUtils;
35  import org.slf4j.Logger;
36  import org.slf4j.LoggerFactory;
37  import org.nuiton.i18n.I18n;
38  import org.springframework.dao.DataIntegrityViolationException;
39  
40  import java.io.*;
41  import java.nio.charset.Charset;
42  import java.util.Arrays;
43  
44  public class CSVFileReader implements FileReader {
45  
46  	private static final Logger log = LoggerFactory.getLogger(CSVFileReader.class);
47  
48  
49  	public final static int MAX_LOG_ERRORS = 500;
50  
51  	public final static char[] CSV_SEPARATORS = { ';', ',', '\t' };
52  
53  	protected CSVReader delegate;
54  
55  	protected FileInputStream fis;
56  	protected BOMInputStream ubis;
57  	protected InputStreamReader isr;
58  	protected BufferedReader br;
59  
60  	protected DataLoadResult result;
61  
62  	protected String[] headers;
63  
64  	protected String[] rowBuffer;
65  
66  	protected SequenceIterator lineCounter;
67  
68  	protected boolean ignoreEmptyRow;
69  
70  	protected String filename;
71  
72  	protected char separator;
73  
74  	// TODO : not used yet : used to know if a row is empty
75  	protected String emptyCellsRow;
76  
77  	public CSVFileReader(File inputFile, boolean ignoreEmptyRow) throws IOException {
78  		init(inputFile, ignoreEmptyRow, true, null);
79  	}
80  
81  	public CSVFileReader(File inputFile, boolean ignoreEmptyRow, boolean hasHeaders) throws IOException {
82  		init(inputFile, ignoreEmptyRow, hasHeaders, null);
83  	}
84  
85  	public CSVFileReader(File inputFile, boolean ignoreEmptyRow, boolean hasHeaders, String encodingName)
86  			throws IOException {
87  		init(inputFile, ignoreEmptyRow, hasHeaders, encodingName);
88  	}
89  
90  	@Override
91  	public String getFileName() {
92  		return filename;
93  	}
94  
95  	public char getSeparator() {
96  		return separator;
97  	}
98  
99  	protected void init(File inputFile, boolean ignoreEmptyRow, boolean hasHeaders, String encodingName)
100 			throws IOException {
101 		Preconditions.checkNotNull(inputFile);
102 		if (inputFile.exists() == false) {
103 			throw new FileNotFoundException("File not exists: " + inputFile.getAbsolutePath());
104 		}
105 		if (log.isDebugEnabled()) {
106 			log.debug("Initialize CSV Reader for file: " + inputFile.getPath());
107 		}
108 
109 		this.ignoreEmptyRow = ignoreEmptyRow;
110 		this.filename = inputFile.getName();
111 		this.result = new DataLoadResult();
112 		lineCounter = new SequenceIterator();
113 
114 		this.rowBuffer = null;
115 		this.headers = null;
116 		for (char separator : CSV_SEPARATORS) {
117 
118 			fis = new FileInputStream(inputFile);
119 			ubis = new BOMInputStream(fis);
120 			if (encodingName != null) {
121 				isr = new InputStreamReader(ubis, Charset.forName(encodingName));
122 			} else {
123 				isr = new InputStreamReader(ubis);
124 			}
125 			br = new BufferedReader(isr);
126 			delegate = new CSVReader(br, separator, '\"', 0);
127 
128 			// Read column headers :
129 			rowBuffer = readNext();
130 			if (rowBuffer.length == 1) {
131 				close();
132 			} else {
133 				if (log.isDebugEnabled()) {
134 					log.debug("Will use detected separator: " + (separator == '\t' ? "TAB" : separator));
135 				}
136 				this.separator = separator;
137 				this.emptyCellsRow = StringUtils.join(new String[this.rowBuffer.length], separator);
138 				break;
139 			}
140 		}
141 
142 		if (hasHeaders) {
143 			if (rowBuffer == null) {
144 				addError(null,
145 						ErrorType.FATAL,
146 						"NO_HEADER",
147 						I18n.t("import.validation.error.NO_HEADER", null, null));
148 
149 				throw new DataIntegrityViolationException("No valid headers found in file: " + inputFile.getPath());
150 			} else {
151 				this.headers = rowBuffer;
152 				this.rowBuffer = null;
153 			}
154 		}
155 	}
156 
157 	public String[] readNext() throws IOException {
158 		String[] cols = null;
159 		if (rowBuffer != null && rowBuffer.length > 0) {
160 			cols = rowBuffer;
161 			rowBuffer = null;
162 			return cols;
163 		}
164 		if (!ignoreEmptyRow) {
165 			// Read the next line
166 			cols = delegate.readNext();
167 
168 			// Increment line number
169 			lineCounter.next();
170 		} else {
171 			do {
172 				// Read the next line
173 				cols = delegate.readNext();
174 
175 				// Increment line number
176 				lineCounter.next();
177 
178 				// If end of file, return null
179 				if (cols == null || cols.length == 0) {
180 					return null;
181 				}
182 			} while (cols.length == 1
183 					&& cols[0].trim().length() == 0);
184 		}
185 
186 		// Ensure cols length is equals (or greater) than headers length 
187 		// to avoid IndexOutOfRangeException
188 		if (headers != null && cols.length != headers.length) {
189 			return Arrays.copyOf(cols, headers.length);
190 		}
191 
192 		return cols;
193 	}
194 
195 	/**
196 	 * Add a error only once (will log the record only the first call).
197 	 * The errorCode is used as an unique key
198 	 * @param tableMetadata
199 	 * @param colMeta
200 	 * @param columnNumber
201 	 * @param errorType
202 	 * @param errorCode
203 	 * @param description
204 	 */
205 	protected void addErrorOnce(
206             SumarisTableMetadata tableMetadata, SumarisHibernateColumnMetadata colMeta,
207             int columnNumber,
208             ErrorType errorType,
209             String errorCode, String description) {
210 		addError(createError(tableMetadata, colMeta, columnNumber, errorType, errorCode, description), true);
211 	}
212 
213 	protected void addError(
214             SumarisTableMetadata tableMetadata, SumarisHibernateColumnMetadata colMeta,
215             int columnNumber,
216             ErrorType errorType,
217             String errorCode, String description) {
218 
219 		addError(createError(tableMetadata, colMeta, columnNumber, errorType, errorCode, description),
220 				false);
221 	}
222 
223 	protected void addError(DataLoadError error, boolean onlyOnce) {
224 
225 		if (onlyOnce) {
226 			result.addErrorOnce(error);
227 		}
228 		else {
229 			result.addError(error);
230 		}
231 
232 		if (result.errorCount() < MAX_LOG_ERRORS) {
233 
234 			// log
235 			switch (error.getErrorType()) {
236 				case WARNING:
237 					log.warn(error.getDescription());
238 					break;
239 				case ERROR:
240 					log.error(error.getDescription());
241 					break;
242 				case FATAL:
243 					log.error(error.getDescription());
244 					break;
245 			}
246 		}
247 	}
248 
249 	protected DataLoadError createError(
250             SumarisTableMetadata tableMetadata, SumarisHibernateColumnMetadata colMeta,
251             int columnNumber,
252             ErrorType errorType,
253             String errorCode, String description) {
254 
255 		DataLoadError error = DataLoadError.Builder.create(tableMetadata, colMeta, lineCounter.getCurrentValue(), description)
256 				.setColumnNumber(columnNumber != -1 ? columnNumber : null)
257 				.setErrorCode(errorCode)
258 				.setErrorType(errorType)
259 				.build();
260 
261 		return error;
262 	}
263 
264 	protected void addError(Integer columnIndex,
265 							ErrorType errorType,
266 							String errorCode,
267 							String description) {
268 		Preconditions.checkArgument(columnIndex == null || columnIndex.intValue() >= 0);
269 
270 		DataLoadError error = new DataLoadError();
271 
272 		error.setLineNumber(lineCounter.getCurrentValue());
273 		error.setColumnNumber(columnIndex);
274 		String fullDescription = null;
275 		if (columnIndex != null
276 				&& headers != null
277 				&& columnIndex.intValue() < headers.length) {
278 			error.setColumnName(headers[columnIndex]);
279 
280 			fullDescription = String.format("[%s:%s - %s] %s", filename, lineCounter.getCurrentValue(), headers[columnIndex], description);
281 		} else {
282 			fullDescription = String.format("[%s:%s] %s", filename, lineCounter.getCurrentValue(), description);
283 		}
284 		error.setDescription(fullDescription);
285 		error.setErrorCode(errorCode);
286 		error.setErrorType(errorType);
287 
288 		addError(error, false);
289 	}
290 
291 	@Override
292 	public void close() throws IOException {
293 		delegate.close();
294 		br.close();
295 		isr.close();
296 		ubis.close();
297 		fis.close();
298 
299 		lineCounter = new SequenceIterator();
300 		rowBuffer = null;
301 		emptyCellsRow = null;
302 	}
303 
304 	public DataLoadResult getResult() {
305 		return result;
306 	}
307 
308 	public String[] getHeaders() {
309 		return headers;
310 	}
311 
312 	public boolean isIgnoreEmptyRow() {
313 		return ignoreEmptyRow;
314 	}
315 
316 	public void setIgnoreEmptyRow(boolean ignoreEmptyRow) {
317 		this.ignoreEmptyRow = ignoreEmptyRow;
318 	}
319 
320 	public int getCurrentLine() {
321 		return lineCounter.getCurrentValue();
322 	}
323 
324 
325 }