View Javadoc

1   /**
2   * Copyright (C) 2007 EDIT
3   * European Distributed Institute of Taxonomy 
4   * http://www.e-taxonomy.eu
5   * 
6   * The contents of this file are subject to the Mozilla Public License Version 1.1
7   * See LICENSE.TXT at the top of this package for the full license terms.
8   */
9   
10  package eu.etaxonomy.cdm.io.berlinModel.in;
11  
12  import java.io.IOException;
13  import java.net.MalformedURLException;
14  import java.net.URISyntaxException;
15  import java.net.URL;
16  import java.sql.ResultSet;
17  import java.sql.SQLException;
18  import java.util.Collection;
19  import java.util.HashMap;
20  import java.util.HashSet;
21  import java.util.Map;
22  import java.util.Set;
23  
24  import org.apache.log4j.Logger;
25  import org.springframework.stereotype.Component;
26  
27  import eu.etaxonomy.cdm.common.CdmUtils;
28  import eu.etaxonomy.cdm.common.mediaMetaData.ImageMetaData;
29  import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
30  import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelFactsImportValidator;
31  import eu.etaxonomy.cdm.io.common.IOValidator;
32  import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
33  import eu.etaxonomy.cdm.io.common.Source;
34  import eu.etaxonomy.cdm.model.common.Annotation;
35  import eu.etaxonomy.cdm.model.common.CdmBase;
36  import eu.etaxonomy.cdm.model.common.DescriptionElementSource;
37  import eu.etaxonomy.cdm.model.common.Language;
38  import eu.etaxonomy.cdm.model.common.Marker;
39  import eu.etaxonomy.cdm.model.common.MarkerType;
40  import eu.etaxonomy.cdm.model.common.TermVocabulary;
41  import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
42  import eu.etaxonomy.cdm.model.description.Feature;
43  import eu.etaxonomy.cdm.model.description.TaxonDescription;
44  import eu.etaxonomy.cdm.model.description.TextData;
45  import eu.etaxonomy.cdm.model.media.ImageFile;
46  import eu.etaxonomy.cdm.model.media.Media;
47  import eu.etaxonomy.cdm.model.media.MediaRepresentation;
48  import eu.etaxonomy.cdm.model.reference.ReferenceBase;
49  import eu.etaxonomy.cdm.model.taxon.Taxon;
50  import eu.etaxonomy.cdm.model.taxon.TaxonBase;
51  import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
52  
53  /**
54   * @author a.mueller
55   * @created 20.03.2008
56   * @version 1.0
57   */
58  @Component
59  public class BerlinModelFactsImport  extends BerlinModelImportBase {
60  	private static final Logger logger = Logger.getLogger(BerlinModelFactsImport.class);
61  
62  	public static final String NAMESPACE = "Fact";
63  	
64  	public static final String SEQUENCE_PREFIX = "ORDER: ";
65  	
66  	private int modCount = 10000;
67  	private static final String pluralString = "facts";
68  	private static final String dbTableName = "Fact";
69  
70  	//FIXME don't use as class variable
71  	private Map<Integer, Feature> featureMap;
72  	
73  	public BerlinModelFactsImport(){
74  		super();
75  	}
76  
77  
78  	private TermVocabulary<Feature> getFeatureVocabulary(){
79  		try {
80  			//TODO work around until service method works
81  			TermVocabulary<Feature> featureVocabulary =  BerlinModelTransformer.factCategory2Feature(1).getVocabulary();
82  			//TermVocabulary<Feature> vocabulary = getTermService().getVocabulary(vocabularyUuid);
83  			return featureVocabulary;
84  		} catch (UnknownCdmTypeException e) {
85  			logger.error("Feature vocabulary not available. New vocabulary created");
86  			return new TermVocabulary<Feature>() ;
87  		}
88  	}
89  	
90  	private Map<Integer, Feature>  invokeFactCategories(BerlinModelImportConfigurator bmiConfig){
91  		
92  		Map<Integer, Feature>  result = bmiConfig.getFeatureMap();
93  		Source source = bmiConfig.getSource();
94  		
95  		try {
96  			//get data from database
97  			String strQuery = 
98  					" SELECT FactCategory.* " + 
99  					" FROM FactCategory "+
100                     " WHERE (1=1)";
101 			ResultSet rs = source.getResultSet(strQuery) ;
102 
103 			
104 			TermVocabulary<Feature> featureVocabulary = getFeatureVocabulary();
105 			int i = 0;
106 			//for each reference
107 			while (rs.next()){
108 				
109 				if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("FactCategories handled: " + (i-1));}
110 				
111 				int factCategoryId = rs.getInt("factCategoryId");
112 				String factCategory = rs.getString("factCategory");
113 				
114 					
115 				Feature feature;
116 				try {
117 					feature = BerlinModelTransformer.factCategory2Feature(factCategoryId);
118 				} catch (UnknownCdmTypeException e) {
119 					logger.warn("New Feature (FactCategoryId: " + factCategoryId + ")");
120 					feature = Feature.NewInstance(factCategory, factCategory, null);
121 					featureVocabulary.addTerm(feature);
122 					feature.setSupportsTextData(true);
123 					//TODO
124 //					MaxFactNumber	int	Checked
125 //					ExtensionTableName	varchar(100)	Checked
126 //					Description	nvarchar(1000)	Checked
127 //					locExtensionFormName	nvarchar(80)	Checked
128 //					RankRestrictionFk	int	Checked
129 				}
130 								
131 				result.put(factCategoryId, feature);
132 			}
133 			Collection<Feature> col = result.values();
134 			getTermService().save((Collection)col);
135 			return result;
136 		} catch (SQLException e) {
137 			logger.error("SQLException:" +  e);
138 			return null;
139 		}
140 
141 	}
142 
143 	/* (non-Javadoc)
144 	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#doInvoke(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
145 	 */
146 	@Override
147 	protected boolean doInvoke(BerlinModelImportState state) {
148 		featureMap = invokeFactCategories(state.getConfig());
149 		return super.doInvoke(state);
150 	}
151 		
152 
153 	/* (non-Javadoc)
154 	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
155 	 */
156 	@Override
157 	protected String getRecordQuery(BerlinModelImportConfigurator config) {
158 			String strQuery = 
159 					" SELECT Fact.*, PTaxon.RIdentifier as taxonId, RefDetail.Details " + 
160 					" FROM Fact " +
161                       	" INNER JOIN PTaxon ON Fact.PTNameFk = PTaxon.PTNameFk AND Fact.PTRefFk = PTaxon.PTRefFk " +
162                       	" LEFT OUTER JOIN RefDetail ON Fact.FactRefDetailFk = RefDetail.RefDetailId AND Fact.FactRefFk = RefDetail.RefFk " +
163               	" WHERE (FactId IN (" + ID_LIST_TOKEN + "))" + 
164                         " ORDER By Sequence";
165 		return strQuery;
166 	}
167 	
168 
169 	/* (non-Javadoc)
170 	 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#doPartition(eu.etaxonomy.cdm.io.berlinModel.in.ResultSetPartitioner, eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
171 	 */
172 	public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state) {
173 		boolean success = true ;
174 		BerlinModelImportConfigurator config = state.getConfig();
175 		Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>();
176 		Map<String, TaxonBase> taxonMap = (Map<String, TaxonBase>) partitioner.getObjectMap(BerlinModelTaxonImport.NAMESPACE);
177 		Map<String, ReferenceBase> biblioRefMap = (Map<String, ReferenceBase>) partitioner.getObjectMap(BerlinModelReferenceImport.BIBLIO_REFERENCE_NAMESPACE);
178 		Map<String, ReferenceBase> nomRefMap = (Map<String, ReferenceBase>) partitioner.getObjectMap(BerlinModelReferenceImport.NOM_REFERENCE_NAMESPACE);
179 
180 		ResultSet rs = partitioner.getResultSet();
181 		
182 			ReferenceBase<?> sourceRef = state.getConfig().getSourceReference();
183 			
184 		try{
185 			int i = 0;
186 			//for each fact
187 			while (rs.next()){
188 				try{
189 					if ((i++ % modCount) == 0){ logger.info("Facts handled: " + (i-1));}
190 					
191 					int factId = rs.getInt("factId");
192 					Object taxonIdObj = rs.getObject("taxonId");
193 					int taxonId = rs.getInt("taxonId");
194 					Object factRefFkObj = rs.getObject("factRefFk");
195 					Object categoryFkObj = rs.getObject("factCategoryFk");
196 					Integer categoryFk = rs.getInt("factCategoryFk");
197 					String details = rs.getString("Details");
198 					String fact = CdmUtils.Nz(rs.getString("Fact"));
199 					String notes = CdmUtils.Nz(rs.getString("notes"));
200 					Boolean doubtfulFlag = rs.getBoolean("DoubtfulFlag");
201 					Boolean publishFlag = rs.getBoolean("publishFlag");
202 					
203 					TaxonBase taxonBase = getTaxon(taxonMap, taxonIdObj, taxonId);
204 					Feature feature = getFeature(featureMap, categoryFkObj, categoryFk) ;
205 					
206 					if (taxonBase == null){
207 						logger.warn("Taxon for Fact " + factId + " does not exist in store");
208 						success = false;
209 					}else{
210 						Taxon taxon;
211 						if ( taxonBase instanceof Taxon ) {
212 							taxon = (Taxon) taxonBase;
213 						}else{
214 							logger.warn("TaxonBase " + (taxonIdObj==null?"(null)":taxonIdObj) + " for Fact " + factId + " was not of type Taxon but: " + taxonBase.getClass().getSimpleName());
215 							success = false;
216 							continue;
217 						}
218 						
219 						TaxonDescription taxonDescription = null;
220 						Set<TaxonDescription> descriptionSet= taxon.getDescriptions();
221 						
222 						boolean isImage = false;
223 						Media media = null;
224 						//for diptera images
225 						if (categoryFk == 51){  //TODO check also FactCategory string
226 							isImage = true;
227 							media = Media.NewInstance();
228 							taxonDescription = makeImage(state, fact, media, descriptionSet, taxon);
229 							if (taxonDescription == null){
230 								continue;
231 							}
232 						}
233 						//all others (no image)
234 						else{ 
235 							for (TaxonDescription desc: descriptionSet){
236 								if (! desc.isImageGallery()){
237 									taxonDescription = desc;
238 								}
239 							}
240 							if (taxonDescription == null){
241 								taxonDescription = TaxonDescription.NewInstance();
242 								taxonDescription.setTitleCache(sourceRef == null ? null : sourceRef.getTitleCache(), true);
243 								taxon.addDescription(taxonDescription);
244 							}
245 						}
246 					
247 						//textData
248 						TextData textData = null;
249 						boolean newTextData = true;
250 	
251 						// For Cichorieae DB: If fact category is 31 (Systematics) and there is already a Systematics TextData 
252 						// description element append the fact text to the existing TextData
253 						if(categoryFk == 31) {
254 							Set<DescriptionElementBase> descriptionElements = taxonDescription.getElements();
255 							for (DescriptionElementBase descriptionElement : descriptionElements) {
256 								String featureString = descriptionElement.getFeature().getRepresentation(Language.DEFAULT()).getLabel();
257 								if (descriptionElement instanceof TextData && featureString.equals("Systematics")) { // TODO: test
258 									textData = (TextData)descriptionElement;
259 									String factTextStr = textData.getText(Language.DEFAULT());
260 									// FIXME: Removing newlines doesn't work
261 									if (factTextStr.contains("\\r\\n")) {
262 										factTextStr = factTextStr.replaceAll("\\r\\n","");
263 									}
264 									StringBuilder factText = new StringBuilder(factTextStr);
265 									factText.append(fact);
266 									fact = factText.toString();
267 									newTextData = false;
268 									break;
269 								}
270 							}
271 						}
272 						
273 						if(newTextData == true)	{ 
274 							textData = TextData.NewInstance(); 
275 						}
276 						
277 						//for diptera database
278 						if (categoryFk == 99 && notes.contains("<OriginalName>")){
279 //							notes = notes.replaceAll("<OriginalName>", "");
280 //							notes = notes.replaceAll("</OriginalName>", "");
281 							fact = notes + ": " +  fact ;
282 						}
283 						//TODO textData.putText(fact, bmiConfig.getFactLanguage());  //doesn't work because  bmiConfig.getFactLanguage() is not not a persistent Language Object
284 						//throws  in thread "main" org.springframework.dao.InvalidDataAccessApiUsageException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language; nested exception is org.hibernate.TransientObjectException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language
285 						if (isImage){
286 							textData.addMedia(media);
287 							textData.setType(Feature.IMAGE());
288 						}else{
289 							textData.putText(fact, Language.DEFAULT());
290 							textData.setType(feature);
291 						}
292 						
293 						//reference
294 						ReferenceBase citation = null;
295 						String factRefFk = String.valueOf(factRefFkObj);
296 						if (factRefFkObj != null){
297 							citation = getReferenceOnlyFromMaps(
298 									biblioRefMap, nomRefMap, factRefFk);	
299 							}
300 						if (citation == null && (factRefFkObj != null)){
301 								logger.warn("Citation not found in referenceMap: " + factRefFk);
302 							success = false;
303 							}
304 						if (citation != null || CdmUtils.isNotEmpty(details)){
305 							DescriptionElementSource originalSource = DescriptionElementSource.NewInstance();
306 							originalSource.setCitation(citation);
307 							originalSource.setCitationMicroReference(details);
308 							textData.addSource(originalSource);
309 						}
310 						taxonDescription.addElement(textData);
311 						//doubtfulFlag
312 						if (doubtfulFlag){
313 							textData.addMarker(Marker.NewInstance(MarkerType.IS_DOUBTFUL(), true));
314 						}
315 						//publisheFlag
316 						textData.addMarker(Marker.NewInstance(MarkerType.PUBLISH(), publishFlag));
317 						//Sequence
318 						Integer sequence = rs.getInt("Sequence");
319 						if (sequence != null && sequence != 999){
320 							String strSequence = String.valueOf(sequence);
321 							strSequence = SEQUENCE_PREFIX + strSequence;
322 							//TODO make it an Extension when possible
323 							//Extension datesExtension = Extension.NewInstance(textData, strSequence, ExtensionType.ORDER());
324 							Annotation annotation = Annotation.NewInstance(strSequence, Language.DEFAULT());
325 							textData.addAnnotation(annotation);
326 						}
327 						
328 						//						if (categoryFkObj == FACT_DESCRIPTION){
329 	//						//;
330 	//					}else if (categoryFkObj == FACT_OBSERVATION){
331 	//						//;
332 	//					}else if (categoryFkObj == FACT_DISTRIBUTION_EM){
333 	//						//
334 	//					}else {
335 	//						//TODO
336 	//						//logger.warn("FactCategory " + categoryFk + " not yet implemented");
337 	//					}
338 						
339 						//notes
340 						doCreatedUpdatedNotes(state, textData, rs);
341 						
342 						//TODO
343 						//Designation References -> unclear how to map to CDM
344 						//factId -> OriginalSource for descriptionElements not yet implemented
345 						
346 						//sequence -> textData is not an identifiable entity therefore extensions are not possible
347 						//fact category better
348 						
349 						taxaToSave.add(taxon);
350 					}
351 				} catch (Exception re){
352 					logger.error("An exception occurred during the facts import");
353 					re.printStackTrace();
354 					success = false;
355 				}
356 				//put
357 			}
358 			logger.info("Facts handled: " + (i-1));
359 			logger.info("Taxa to save: " + taxaToSave.size());
360 			getTaxonService().save(taxaToSave);	
361 		}catch(SQLException e){
362 			throw new RuntimeException(e);
363 		}
364 		return success;
365 	}
366 
367 	/* (non-Javadoc)
368 	 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
369 	 */
370 	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
371 		String nameSpace;
372 		Class cdmClass;
373 		Set<String> idSet;
374 		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
375 			
376 		try{
377 			Set<String> taxonIdSet = new HashSet<String>();
378 			Set<String> referenceIdSet = new HashSet<String>();
379 			Set<String> refDetailIdSet = new HashSet<String>();
380 			while (rs.next()){
381 				handleForeignKey(rs, taxonIdSet, "taxonId");
382 				handleForeignKey(rs, referenceIdSet, "FactRefFk");
383 				handleForeignKey(rs, referenceIdSet, "PTDesignationRefFk");
384 				handleForeignKey(rs, refDetailIdSet, "FactRefDetailFk");
385 				handleForeignKey(rs, refDetailIdSet, "PTDesignationRefDetailFk");
386 		}
387 			
388 			//taxon map
389 			nameSpace = BerlinModelTaxonImport.NAMESPACE;
390 			cdmClass = TaxonBase.class;
391 			idSet = taxonIdSet;
392 			Map<String, TaxonBase> taxonMap = (Map<String, TaxonBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
393 			result.put(nameSpace, taxonMap);
394 
395 
396 			//nom reference map
397 			nameSpace = BerlinModelReferenceImport.NOM_REFERENCE_NAMESPACE;
398 			cdmClass = ReferenceBase.class;
399 			idSet = referenceIdSet;
400 			Map<String, ReferenceBase> nomReferenceMap = (Map<String, ReferenceBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
401 			result.put(nameSpace, nomReferenceMap);
402 
403 			//biblio reference map
404 			nameSpace = BerlinModelReferenceImport.BIBLIO_REFERENCE_NAMESPACE;
405 			cdmClass = ReferenceBase.class;
406 			idSet = referenceIdSet;
407 			Map<String, ReferenceBase> biblioReferenceMap = (Map<String, ReferenceBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
408 			result.put(nameSpace, biblioReferenceMap);
409 			
410 			//nom refDetail map
411 			nameSpace = BerlinModelRefDetailImport.NOM_REFDETAIL_NAMESPACE;
412 			cdmClass = ReferenceBase.class;
413 			idSet = refDetailIdSet;
414 			Map<String, ReferenceBase> nomRefDetailMap= (Map<String, ReferenceBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
415 			result.put(nameSpace, nomRefDetailMap);
416 			
417 			//biblio refDetail map
418 			nameSpace = BerlinModelRefDetailImport.BIBLIO_REFDETAIL_NAMESPACE;
419 			cdmClass = ReferenceBase.class;
420 			idSet = refDetailIdSet;
421 			Map<String, ReferenceBase> biblioRefDetailMap= (Map<String, ReferenceBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
422 			result.put(nameSpace, biblioRefDetailMap);
423 	
424 		} catch (SQLException e) {
425 			throw new RuntimeException(e);
426 	}
427 		return result;
428 	}
429 	
430 	
431 	/**
432 	 * @param state 
433 	 * @param media 
434 	 * @param media 
435 	 * @param descriptionSet 
436 	 * 
437 	 */
438 	private TaxonDescription makeImage(BerlinModelImportState state, String fact, Media media, Set<TaxonDescription> descriptionSet, Taxon taxon) {
439 		TaxonDescription taxonDescription = null;
440 		ReferenceBase sourceRef = state.getConfig().getSourceReference();
441 		String uri = fact;
442 		Integer size = null; 
443 		ImageMetaData imageMetaData = ImageMetaData.newInstance();
444 		URL url;
445 		try {
446 			url = new URL(fact.trim());
447 		} catch (MalformedURLException e) {
448 			logger.warn("Malformed URL. Image could not be imported: " + CdmUtils.Nz(uri));
449 			return null;
450 		}
451 		try {
452 			imageMetaData.readMetaData(url.toURI(), 0);
453 		}
454 		catch(URISyntaxException e){
455 			logger.error("URISyntaxException reading image metadata." , e);
456 		} catch (IOException e) {
457 			logger.error("IOError reading image metadata." , e);
458 		}
459 		MediaRepresentation mediaRepresentation = MediaRepresentation.NewInstance(imageMetaData.getMimeType(), null);
460 		media.addRepresentation(mediaRepresentation);
461 		ImageFile image = ImageFile.NewInstance(uri, size, imageMetaData);
462 		mediaRepresentation.addRepresentationPart(image);
463 		
464 		taxonDescription = taxon.getOrCreateImageGallery(sourceRef == null ? null :sourceRef.getTitleCache());
465 		
466 		return taxonDescription;
467 	}
468 
469 	private TaxonBase getTaxon(Map<String, TaxonBase> taxonMap, Object taxonIdObj, Integer taxonId){
470 		if (taxonIdObj != null){
471 			return taxonMap.get(String.valueOf(taxonId));
472 		}else{
473 			return null;
474 		}
475 		
476 	}
477 	
478 	private Feature getFeature(Map<Integer, Feature>  featureMap, Object categoryFkObj, Integer categoryFk){
479 		if (categoryFkObj != null){
480 			return featureMap.get(categoryFk); 
481 		}else{
482 			return null;
483 		}
484 		
485 	}
486 	
487 
488 	/* (non-Javadoc)
489 	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
490 	 */
491 	@Override
492 	protected boolean doCheck(BerlinModelImportState state){
493 		IOValidator<BerlinModelImportState> validator = new BerlinModelFactsImportValidator();
494 		return validator.validate(state);
495 	}
496 				
497 	/* (non-Javadoc)
498 	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getTableName()
499 	 */
500 	@Override
501 	protected String getTableName() {
502 		return dbTableName;
503 			}
504 	
505 	/* (non-Javadoc)
506 	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getPluralString()
507 	 */
508 	@Override
509 	public String getPluralString() {
510 		return pluralString;
511 		}
512 	
513 	/* (non-Javadoc)
514 	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
515 	 */
516 	protected boolean isIgnore(BerlinModelImportState state){
517 		return ! state.getConfig().isDoFacts();
518 	}
519 
520 
521 }