1 package net.sumaris.importation.util.csv;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 import au.com.bytecode.opencsv.CSVReader;
26 import com.google.common.base.Preconditions;
27 import net.sumaris.core.dao.technical.schema.SumarisHibernateColumnMetadata;
28 import net.sumaris.core.dao.technical.schema.SumarisTableMetadata;
29 import net.sumaris.core.util.type.SequenceIterator;
30 import net.sumaris.importation.service.vo.DataLoadError;
31 import static net.sumaris.importation.service.vo.DataLoadError.*;
32 import net.sumaris.importation.service.vo.DataLoadResult;
33 import org.apache.commons.io.input.BOMInputStream;
34 import org.apache.commons.lang3.StringUtils;
35 import org.slf4j.Logger;
36 import org.slf4j.LoggerFactory;
37 import org.nuiton.i18n.I18n;
38 import org.springframework.dao.DataIntegrityViolationException;
39
40 import java.io.*;
41 import java.nio.charset.Charset;
42 import java.util.Arrays;
43
44 public class CSVFileReader implements FileReader {
45
46 private static final Logger log = LoggerFactory.getLogger(CSVFileReader.class);
47
48
49 public final static int MAX_LOG_ERRORS = 500;
50
51 public final static char[] CSV_SEPARATORS = { ';', ',', '\t' };
52
53 protected CSVReader delegate;
54
55 protected FileInputStream fis;
56 protected BOMInputStream ubis;
57 protected InputStreamReader isr;
58 protected BufferedReader br;
59
60 protected DataLoadResult result;
61
62 protected String[] headers;
63
64 protected String[] rowBuffer;
65
66 protected SequenceIterator lineCounter;
67
68 protected boolean ignoreEmptyRow;
69
70 protected String filename;
71
72 protected char separator;
73
74
75 protected String emptyCellsRow;
76
77 public CSVFileReader(File inputFile, boolean ignoreEmptyRow) throws IOException {
78 init(inputFile, ignoreEmptyRow, true, null);
79 }
80
81 public CSVFileReader(File inputFile, boolean ignoreEmptyRow, boolean hasHeaders) throws IOException {
82 init(inputFile, ignoreEmptyRow, hasHeaders, null);
83 }
84
85 public CSVFileReader(File inputFile, boolean ignoreEmptyRow, boolean hasHeaders, String encodingName)
86 throws IOException {
87 init(inputFile, ignoreEmptyRow, hasHeaders, encodingName);
88 }
89
90 @Override
91 public String getFileName() {
92 return filename;
93 }
94
95 public char getSeparator() {
96 return separator;
97 }
98
99 protected void init(File inputFile, boolean ignoreEmptyRow, boolean hasHeaders, String encodingName)
100 throws IOException {
101 Preconditions.checkNotNull(inputFile);
102 if (inputFile.exists() == false) {
103 throw new FileNotFoundException("File not exists: " + inputFile.getAbsolutePath());
104 }
105 if (log.isDebugEnabled()) {
106 log.debug("Initialize CSV Reader for file: " + inputFile.getPath());
107 }
108
109 this.ignoreEmptyRow = ignoreEmptyRow;
110 this.filename = inputFile.getName();
111 this.result = new DataLoadResult();
112 lineCounter = new SequenceIterator();
113
114 this.rowBuffer = null;
115 this.headers = null;
116 for (char separator : CSV_SEPARATORS) {
117
118 fis = new FileInputStream(inputFile);
119 ubis = new BOMInputStream(fis);
120 if (encodingName != null) {
121 isr = new InputStreamReader(ubis, Charset.forName(encodingName));
122 } else {
123 isr = new InputStreamReader(ubis);
124 }
125 br = new BufferedReader(isr);
126 delegate = new CSVReader(br, separator, '\"', 0);
127
128
129 rowBuffer = readNext();
130 if (rowBuffer.length == 1) {
131 close();
132 } else {
133 if (log.isDebugEnabled()) {
134 log.debug("Will use detected separator: " + (separator == '\t' ? "TAB" : separator));
135 }
136 this.separator = separator;
137 this.emptyCellsRow = StringUtils.join(new String[this.rowBuffer.length], separator);
138 break;
139 }
140 }
141
142 if (hasHeaders) {
143 if (rowBuffer == null) {
144 addError(null,
145 ErrorType.FATAL,
146 "NO_HEADER",
147 I18n.t("import.validation.error.NO_HEADER", null, null));
148
149 throw new DataIntegrityViolationException("No valid headers found in file: " + inputFile.getPath());
150 } else {
151 this.headers = rowBuffer;
152 this.rowBuffer = null;
153 }
154 }
155 }
156
157 public String[] readNext() throws IOException {
158 String[] cols = null;
159 if (rowBuffer != null && rowBuffer.length > 0) {
160 cols = rowBuffer;
161 rowBuffer = null;
162 return cols;
163 }
164 if (!ignoreEmptyRow) {
165
166 cols = delegate.readNext();
167
168
169 lineCounter.next();
170 } else {
171 do {
172
173 cols = delegate.readNext();
174
175
176 lineCounter.next();
177
178
179 if (cols == null || cols.length == 0) {
180 return null;
181 }
182 } while (cols.length == 1
183 && cols[0].trim().length() == 0);
184 }
185
186
187
188 if (headers != null && cols.length != headers.length) {
189 return Arrays.copyOf(cols, headers.length);
190 }
191
192 return cols;
193 }
194
195
196
197
198
199
200
201
202
203
204
205 protected void addErrorOnce(
206 SumarisTableMetadata tableMetadata, SumarisHibernateColumnMetadata colMeta,
207 int columnNumber,
208 ErrorType errorType,
209 String errorCode, String description) {
210 addError(createError(tableMetadata, colMeta, columnNumber, errorType, errorCode, description), true);
211 }
212
213 protected void addError(
214 SumarisTableMetadata tableMetadata, SumarisHibernateColumnMetadata colMeta,
215 int columnNumber,
216 ErrorType errorType,
217 String errorCode, String description) {
218
219 addError(createError(tableMetadata, colMeta, columnNumber, errorType, errorCode, description),
220 false);
221 }
222
223 protected void addError(DataLoadError error, boolean onlyOnce) {
224
225 if (onlyOnce) {
226 result.addErrorOnce(error);
227 }
228 else {
229 result.addError(error);
230 }
231
232 if (result.errorCount() < MAX_LOG_ERRORS) {
233
234
235 switch (error.getErrorType()) {
236 case WARNING:
237 log.warn(error.getDescription());
238 break;
239 case ERROR:
240 log.error(error.getDescription());
241 break;
242 case FATAL:
243 log.error(error.getDescription());
244 break;
245 }
246 }
247 }
248
249 protected DataLoadError createError(
250 SumarisTableMetadata tableMetadata, SumarisHibernateColumnMetadata colMeta,
251 int columnNumber,
252 ErrorType errorType,
253 String errorCode, String description) {
254
255 DataLoadError error = DataLoadError.Builder.create(tableMetadata, colMeta, lineCounter.getCurrentValue(), description)
256 .setColumnNumber(columnNumber != -1 ? columnNumber : null)
257 .setErrorCode(errorCode)
258 .setErrorType(errorType)
259 .build();
260
261 return error;
262 }
263
264 protected void addError(Integer columnIndex,
265 ErrorType errorType,
266 String errorCode,
267 String description) {
268 Preconditions.checkArgument(columnIndex == null || columnIndex.intValue() >= 0);
269
270 DataLoadError error = new DataLoadError();
271
272 error.setLineNumber(lineCounter.getCurrentValue());
273 error.setColumnNumber(columnIndex);
274 String fullDescription = null;
275 if (columnIndex != null
276 && headers != null
277 && columnIndex.intValue() < headers.length) {
278 error.setColumnName(headers[columnIndex]);
279
280 fullDescription = String.format("[%s:%s - %s] %s", filename, lineCounter.getCurrentValue(), headers[columnIndex], description);
281 } else {
282 fullDescription = String.format("[%s:%s] %s", filename, lineCounter.getCurrentValue(), description);
283 }
284 error.setDescription(fullDescription);
285 error.setErrorCode(errorCode);
286 error.setErrorType(errorType);
287
288 addError(error, false);
289 }
290
291 @Override
292 public void close() throws IOException {
293 delegate.close();
294 br.close();
295 isr.close();
296 ubis.close();
297 fis.close();
298
299 lineCounter = new SequenceIterator();
300 rowBuffer = null;
301 emptyCellsRow = null;
302 }
303
304 public DataLoadResult getResult() {
305 return result;
306 }
307
308 public String[] getHeaders() {
309 return headers;
310 }
311
312 public boolean isIgnoreEmptyRow() {
313 return ignoreEmptyRow;
314 }
315
316 public void setIgnoreEmptyRow(boolean ignoreEmptyRow) {
317 this.ignoreEmptyRow = ignoreEmptyRow;
318 }
319
320 public int getCurrentLine() {
321 return lineCounter.getCurrentValue();
322 }
323
324
325 }