View Javadoc

1   /**
2   * Copyright (C) 2007 EDIT
3   * European Distributed Institute of Taxonomy 
4   * http://www.e-taxonomy.eu
5   * 
6   * The contents of this file are subject to the Mozilla Public License Version 1.1
7   * See LICENSE.TXT at the top of this package for the full license terms.
8   */
9   
10  package eu.etaxonomy.cdm.io.specimen.excel.in;
11  
12  import java.net.URL;
13  import java.util.ArrayList;
14  import java.util.HashMap;
15  import java.util.List;
16  import java.util.Map;
17  
18  import org.apache.log4j.Logger;
19  import org.apache.poi.hssf.usermodel.HSSFWorkbook;
20  import org.springframework.stereotype.Component;
21  import org.springframework.transaction.TransactionStatus;
22  
23  import eu.etaxonomy.cdm.common.ExcelUtils;
24  import eu.etaxonomy.cdm.common.mediaMetaData.MediaMetaData;
25  import eu.etaxonomy.cdm.common.mediaMetaData.ImageMetaData;
26  import eu.etaxonomy.cdm.database.DbSchemaValidation;
27  import eu.etaxonomy.cdm.io.common.ICdmIO;
28  import eu.etaxonomy.cdm.io.common.IImportConfigurator;
29  import eu.etaxonomy.cdm.io.common.MapWrapper;
30  import eu.etaxonomy.cdm.io.specimen.SpecimenIoBase;
31  import eu.etaxonomy.cdm.io.specimen.UnitsGatheringArea;
32  import eu.etaxonomy.cdm.io.specimen.UnitsGatheringEvent;
33  import eu.etaxonomy.cdm.model.agent.Institution;
34  import eu.etaxonomy.cdm.model.common.CdmBase;
35  import eu.etaxonomy.cdm.model.location.NamedArea;
36  import eu.etaxonomy.cdm.model.media.ImageFile;
37  import eu.etaxonomy.cdm.model.media.Media;
38  import eu.etaxonomy.cdm.model.media.MediaRepresentation;
39  import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
40  import eu.etaxonomy.cdm.model.name.NonViralName;
41  import eu.etaxonomy.cdm.model.occurrence.Collection;
42  import eu.etaxonomy.cdm.model.occurrence.DerivationEvent;
43  import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
44  import eu.etaxonomy.cdm.model.occurrence.DerivedUnitBase;
45  import eu.etaxonomy.cdm.model.occurrence.DeterminationEvent;
46  import eu.etaxonomy.cdm.model.occurrence.FieldObservation;
47  import eu.etaxonomy.cdm.model.occurrence.LivingBeing;
48  import eu.etaxonomy.cdm.model.occurrence.Observation;
49  import eu.etaxonomy.cdm.model.occurrence.Specimen;
50  import eu.etaxonomy.cdm.model.reference.ReferenceBase;
51  import eu.etaxonomy.cdm.model.taxon.Taxon;
52  import eu.etaxonomy.cdm.model.taxon.TaxonBase;
53  import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
54  
55  /**
56   * @author p.kelbert
57   * @created 29.10.2008
58   * @version 1.0
59   */
60  @Component
61  public class SpecimenExcelImport  extends SpecimenIoBase<SpecimenExcelImportConfigurator, SpecimenExcelImportState>  implements ICdmIO<SpecimenExcelImportState> {
62  
63  	private static final Logger logger = Logger.getLogger(SpecimenExcelImport.class);
64  
65  	protected String fullScientificNameString;
66  	protected String nomenclatureCode;
67  	protected String institutionCode;
68  	protected String collectionCode;
69  	protected String unitID;
70  	protected String recordBasis;
71  	protected String accessionNumber;
72  	protected String collectorsNumber;
73  	protected String fieldNumber;
74  	protected Double longitude;
75  	protected Double latitude;
76  	protected String locality;
77  	protected String languageIso = null;
78  	protected String country;
79  	protected String isocountry;
80  	protected int depth;
81  	protected int altitude;
82  	protected ArrayList<String> gatheringAgentList;
83  	protected ArrayList<String> identificationList;
84  	protected ArrayList<String> namedAreaList;
85  	protected ArrayList<String> multimediaObjects;
86  
87  	protected HSSFWorkbook hssfworkbook = null;
88  
89  
90  	public SpecimenExcelImport() {
91  		super();
92  	}
93  
94  
95  	/*
96  	 * Store the unit's properties into variables
97  	 * @param unit: the hashmap containing the splitted Excel line (Key=column name, value=value)
98  	 */
99  	private void setUnitPropertiesExcel(HashMap<String,String> unit){
100 		String author = unit.get("author");
101 		author=author.replaceAll("None","");
102 		String taxonName = unit.get("taxonName");
103 		taxonName = taxonName.replaceAll("None", "");
104 
105 		try {
106 			this.institutionCode = unit.get("institution").replaceAll("None", null);
107 		} catch (Exception e) {this.institutionCode = "";}
108 
109 		try {this.collectionCode = unit.get("collection").replaceAll("None", null);
110 		} catch (Exception e) {this.collectionCode = "";}
111 
112 		try {this.unitID = unit.get("unitID").replaceAll("None", null);
113 		} catch (Exception e) {this.unitID = "";}
114 
115 		try {this.recordBasis = unit.get("recordBasis").replaceAll("None", null);
116 		} catch (Exception e) {this.recordBasis = "";}
117 
118 		try {this.accessionNumber = null;
119 		} catch (Exception e) {this.accessionNumber = "";}
120 
121 		try {this.locality = unit.get("locality").replaceAll("None", null);
122 		} catch (Exception e) {this.locality = "";}
123 
124 		try {this.longitude = Double.valueOf(unit.get("longitude"));
125 		} catch (Exception e) {this.longitude = 0.0;}
126 
127 		try {this.latitude = Double.valueOf(unit.get("latitude"));
128 		} catch (Exception e) {this.latitude = 0.0;}
129 
130 		try {this.country = unit.get("country").replaceAll("None", null);
131 		} catch (Exception e) {this.country = "";}
132 
133 		try {this.isocountry = unit.get("isoCountry").replaceAll("None", null);
134 		} catch (Exception e) {this.isocountry = "";}
135 
136 		try {this.fieldNumber = unit.get("field number").replaceAll("None", null);
137 		} catch (Exception e) {this.fieldNumber = "";}
138 
139 		try {this.collectorsNumber = unit.get("collector number").replaceAll("None", null);
140 		} catch (Exception e) {this.collectorsNumber = "";}
141 
142 		try {
143 			String url =unit.get("url");		
144 			url=url.replaceAll("None", null);
145 			this.multimediaObjects.add(url);
146 		} catch (Exception e) {this.multimediaObjects = new ArrayList<String>();}
147 
148 		try {
149 			String coll =unit.get("collector");		
150 			coll=coll.replaceAll("None", null);
151 			this.gatheringAgentList.add(coll);
152 		} catch (Exception e) {this.gatheringAgentList = new ArrayList<String>();}
153 
154 		try {this.identificationList.add(taxonName+" "+author);
155 		} catch (Exception e) {this.identificationList = new ArrayList<String>();}
156 
157 	}
158 
159 	private Institution getInstitution(String institutionCode, SpecimenExcelImportConfigurator config){
160 		Institution institution;
161 		List<Institution> institutions;
162 		try{
163 			institutions= getAgentService().searchInstitutionByCode(this.institutionCode);
164 		}catch(Exception e){
165 			institutions=new ArrayList<Institution>();
166 		}
167 		if (institutions.size() ==0 || !config.getReUseExistingMetadata()){
168 			System.out.println("Institution (agent) unknown or not allowed to reuse existing metadata");
169 			//create institution
170 			institution = Institution.NewInstance();
171 			institution.setCode(this.institutionCode);				
172 		}
173 		else{
174 			System.out.println("Institution (agent) already in the db");
175 			institution = institutions.get(0);
176 		}
177 		return institution;
178 	}
179 
180 	/*
181 	 * Look if the Collection does already exists
182 	 * @param collectionCode: a string
183 	 * @param institution: the current Institution
184 	 * @param app
185 	 * @return the Collection (existing or new)
186 	 */
187 	private Collection getCollection(String collectionCode, Institution institution, SpecimenExcelImportConfigurator config){
188 		Collection collection = Collection.NewInstance();
189 		List<Collection> collections;
190 		try{
191 			collections = getCollectionService().searchByCode(this.collectionCode);
192 		}catch(Exception e){
193 			collections=new ArrayList<Collection>();
194 		}
195 		if (collections.size() ==0 || !config.getReUseExistingMetadata()){
196 			System.out.println("Collection not found or do not reuse existing metadata  "+this.collectionCode);
197 			//create new collection
198 			collection.setCode(this.collectionCode);
199 			collection.setCodeStandard("GBIF");
200 			collection.setInstitute(institution);
201 		}
202 		else{
203 			boolean collectionFound=false;
204 			for (int i=0; i<collections.size(); i++){
205 				collection = collections.get(i);
206 				try {
207 					if (collection.getInstitute().getCode().equalsIgnoreCase(institution.getCode())){ 
208 						//found a collection with the same code and the same institution
209 						collectionFound=true;
210 					}
211 				} catch (NullPointerException e) {}
212 			}
213 			if (!collectionFound){ 
214 				collection.setCode(this.collectionCode);
215 				collection.setCodeStandard("GBIF");
216 				collection.setInstitute(institution);
217 			}
218 
219 		}
220 		return collection;
221 	}
222 
223 	/*
224 	 * 
225 	 * @param app
226 	 * @param derivedThing
227 	 * @param sec
228 	 */
229 	private void setTaxonNameBase(SpecimenExcelImportConfigurator config, DerivedUnitBase derivedThing, ReferenceBase sec){
230 		NonViralName<?> taxonName = null;
231 		String fullScientificNameString;
232 		Taxon taxon = null;
233 		DeterminationEvent determinationEvent = null;
234 		List<TaxonBase> names = null;
235 
236 		String scientificName="";
237 		boolean preferredFlag=false;
238 
239 		for (int i = 0; i < this.identificationList.size(); i++) {
240 			fullScientificNameString = this.identificationList.get(i);
241 			fullScientificNameString = fullScientificNameString.replaceAll(" et ", " & ");
242 			if (fullScientificNameString.indexOf("_preferred_") != -1){
243 				scientificName = fullScientificNameString.split("_preferred_")[0];
244 				String pTmp = fullScientificNameString.split("_preferred_")[1].split("_code_")[0];
245 				if (pTmp == "1" || pTmp.toLowerCase().indexOf("true") != -1)
246 					preferredFlag=true;
247 				else
248 					preferredFlag=false;
249 			}
250 			else scientificName = fullScientificNameString;
251 
252 			if (fullScientificNameString.indexOf("_code_") != -1)	
253 				this.nomenclatureCode = fullScientificNameString.split("_code_")[1];
254 
255 			if (config.getDoAutomaticParsing()){	
256 				taxonName = this.parseScientificName(scientificName);	
257 			} else {
258 				taxonName.setTitleCache(scientificName, true);
259 			}
260 
261 			if (config.getDoReUseTaxon()){
262 				try{
263 					names = getTaxonService().searchTaxaByName(scientificName, sec);
264 					taxon = (Taxon)names.get(0);
265 				}
266 				catch(Exception e){taxon=null;}
267 			}
268 			if (!config.getDoReUseTaxon() || taxon == null){
269 				getNameService().save(taxonName);
270 				taxon = Taxon.NewInstance(taxonName, sec); //sec set null
271 			}
272 
273 			determinationEvent = DeterminationEvent.NewInstance();
274 			determinationEvent.setTaxon(taxon);
275 			determinationEvent.setPreferredFlag(preferredFlag);
276 //			no reference in the GBIF INDEX
277 //			for (int l=0;l<this.referenceList.size();l++){
278 //			ReferenceBase reference = new Generic();
279 //			reference.setTitleCache(this.referenceList.get(l));
280 //			determinationEvent.addReference(reference);
281 //			}
282 			derivedThing.addDetermination(determinationEvent);
283 		}
284 
285 	}
286 
287 	private NonViralName<?> parseScientificName(String scientificName){
288 		System.out.println("parseScientificName");
289 		NonViralNameParserImpl nvnpi = NonViralNameParserImpl.NewInstance();
290 		NonViralName<?>taxonName = null;
291 		boolean problem=false;
292 
293 		System.out.println("nomenclature: "+this.nomenclatureCode);
294 
295 		if(this.nomenclatureCode == null){
296 			taxonName = NonViralName.NewInstance(null);
297 			taxonName.setTitleCache(scientificName, true);
298 			return taxonName;
299 		}
300 
301 		if (this.nomenclatureCode.toString().equals("Zoological")){
302 			taxonName = nvnpi.parseFullName(scientificName,NomenclaturalCode.ICZN,null);
303 			if (taxonName.hasProblem())
304 				problem=true;
305 		}
306 		if (this.nomenclatureCode.toString().equals("Botanical")){
307 			taxonName  = nvnpi.parseFullName(scientificName,NomenclaturalCode.ICBN,null);
308 			if (taxonName.hasProblem())
309 				problem=true;;}
310 		if (this.nomenclatureCode.toString().equals("Bacterial")){
311 			taxonName = nvnpi.parseFullName(scientificName,NomenclaturalCode.ICNB, null);
312 			if (taxonName.hasProblem())
313 				problem=true;
314 		}
315 		if (this.nomenclatureCode.toString().equals("Cultivar")){
316 			taxonName = nvnpi.parseFullName(scientificName,NomenclaturalCode.ICNCP, null);
317 			if (taxonName.hasProblem())
318 				problem=true;;
319 		}
320 //		if (this.nomenclatureCode.toString().equals("Viral")){
321 //		ViralName taxonName = (ViralName)nvnpi.parseFullName(scientificName,NomenclaturalCode.ICVCN(), null);
322 //		if (taxonName.hasProblem())
323 //		System.out.println("pb ICVCN");
324 //		}
325 		//TODO: parsing of ViralNames?
326 		if(problem){
327 			taxonName = NonViralName.NewInstance(null);
328 			taxonName.setTitleCache(scientificName, true);
329 		}
330 		return taxonName;
331 
332 	}
333 
334 
335 	/*
336 	 * Store the unit with its Gathering informations in the CDM
337 	 */
338 	public boolean start(SpecimenExcelImportConfigurator config){
339 		boolean result = true;
340 //		CdmApplicationController app2 = null;
341 		TransactionStatus tx = null;
342 
343 //		app = config.getCdmAppController();
344 //		try {
345 //		app = CdmApplicationController.NewInstance(config.getDestination(), config.getDbSchemaValidation());
346 //		} catch (DataSourceNotFoundException e1) {
347 //		e1.printStackTrace();
348 //		System.out.println("DataSourceNotFoundException "+e1);
349 //		} catch (TermNotFoundException e1) {
350 //		e1.printStackTrace();
351 //		System.out.println("TermNotFoundException " +e1);
352 //		}
353 		
354 		tx = startTransaction();
355 		try {
356 			ReferenceBase sec = config.getTaxonReference();
357 
358 			/**
359 			 * SPECIMEN OR OBSERVATION OR LIVING
360 			 */
361 			DerivedUnitBase derivedThing = null;
362 			//create specimen
363 			boolean rbFound=false;
364 			if (this.recordBasis != null){
365 				if (this.recordBasis.toLowerCase().startsWith("s")) {//specimen
366 					derivedThing = Specimen.NewInstance();
367 					rbFound = true;
368 				}
369 				else if (this.recordBasis.toLowerCase().startsWith("o")) {//observation
370 					derivedThing = Observation.NewInstance();	
371 					rbFound = true;
372 				}
373 				else if (this.recordBasis.toLowerCase().startsWith("l")) {//living -> fossil, herbarium sheet....???
374 					derivedThing = LivingBeing.NewInstance();
375 					rbFound = true;
376 				}
377 				if (! rbFound){
378 					logger.info("The basis of record does not seem to be known: "+this.recordBasis);
379 					derivedThing = DerivedUnit.NewInstance();
380 				}
381 			}
382 			else{
383 				logger.info("The basis of record is null");
384 				derivedThing = DerivedUnit.NewInstance();
385 			}
386 
387 			this.setTaxonNameBase(config, derivedThing, sec);
388 
389 
390 			//set catalogue number (unitID)
391 			derivedThing.setCatalogNumber(this.unitID);
392 			derivedThing.setAccessionNumber(this.accessionNumber);
393 			derivedThing.setCollectorsNumber(this.collectorsNumber);
394 
395 
396 			/**
397 			 * INSTITUTION & COLLECTION
398 			 */
399 			//manage institution
400 			Institution institution = this.getInstitution(this.institutionCode,config);
401 			//manage collection
402 			Collection collection = this.getCollection(this.collectionCode, institution, config); 
403 			//link specimen & collection
404 			derivedThing.setCollection(collection);
405 
406 			/**
407 			 * GATHERING EVENT
408 			 */
409 
410 			UnitsGatheringEvent unitsGatheringEvent = new UnitsGatheringEvent(getTermService(), this.locality, this.languageIso, this.longitude, 
411 					this.latitude, this.gatheringAgentList);
412 			UnitsGatheringArea unitsGatheringArea = new UnitsGatheringArea(this.isocountry, this.country, getOccurrenceService());
413 			NamedArea areaCountry = unitsGatheringArea.getArea();
414 			unitsGatheringEvent.addArea(areaCountry);
415 			//Only for ABCD XML data
416 //			unitsGatheringArea = new UnitsGatheringArea(this.namedAreaList);
417 //			ArrayList<NamedArea> nas = unitsGatheringArea.getAreas();
418 //			for (int i=0; i<nas.size();i++)
419 //				unitsGatheringEvent.addArea(nas.get(i));
420 
421 
422 			//create field/observation
423 			FieldObservation fieldObservation = FieldObservation.NewInstance();
424 			//add fieldNumber
425 			fieldObservation.setFieldNumber(this.fieldNumber);
426 			//join gatheringEvent to fieldObservation
427 			fieldObservation.setGatheringEvent(unitsGatheringEvent.getGatheringEvent());
428 			//add Multimedia URLs
429 			if(this.multimediaObjects.size()>0){
430 				MediaRepresentation representation;
431 				Media media;
432 				MediaMetaData mmd ;
433 				ImageMetaData imd ;
434 				URL url ;
435 				ImageFile imf;
436 				for (int i=0;i<this.multimediaObjects.size();i++){
437 					if(this.multimediaObjects.get(i) != null){
438 						//mmd = new MediaMetaData();
439 						imd = ImageMetaData.newInstance();
440 						url = new URL(this.multimediaObjects.get(i));
441 						//imd = MediaMetaData.readImageMetaData(url, imd);
442 						imd.readMetaData(url.toURI(), 0);
443 						if (imd != null){
444 							System.out.println("image not null");
445 							representation = MediaRepresentation.NewInstance();
446 							imf = ImageFile.NewInstance(this.multimediaObjects.get(i), null, imd);
447 							representation.addRepresentationPart(imf);
448 							media = Media.NewInstance();
449 							media.addRepresentation(representation);
450 							fieldObservation.addMedia(media);
451 						}
452 					}
453 				}
454 			}
455 //			//link fieldObservation and specimen
456 			DerivationEvent derivationEvent = DerivationEvent.NewInstance();
457 			derivationEvent.addOriginal(fieldObservation);
458 			derivedThing.addDerivationEvent(derivationEvent);
459 
460 			/**
461 			 * SAVE AND STORE DATA
462 			 */			
463 
464 			getTermService().save(areaCountry);//save it sooner
465 			//ONLY FOR ABCD XML DATA
466 //			for (int i=0; i<nas.size();i++)
467 //				app.getTermService().saveTerm(nas.get(i));//save it sooner (foreach area)
468 			getTermService().saveLanguageData(unitsGatheringEvent.getLocality());//save it sooner
469 			getOccurrenceService().save(derivedThing);
470 
471 			logger.info("saved new specimen ...");
472 
473 
474 		} catch (Exception e) {
475 			logger.warn("Error when reading record!!");
476 			e.printStackTrace();
477 			result = false;
478 		}
479 		commitTransaction(tx);
480 		System.out.println("commit done");
481 		//app.close();
482 		return result;
483 	}
484 
485 
486 //	/* (non-Javadoc)
487 //	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doInvoke(eu.etaxonomy.cdm.io.common.IImportConfigurator, eu.etaxonomy.cdm.api.application.CdmApplicationController, java.util.Map)
488 //	 */
489 //	@Override
490 //	protected boolean doInvoke(IImportConfigurator config, 
491 //			Map<String, MapWrapper<? extends CdmBase>> stores){ 
492 //		SpecimenImportState state = ((SpecimenImportConfigurator)config).getState();
493 //		state.setConfig((SpecimenImportConfigurator)config);
494 //		return doInvoke(state);
495 //	}
496 	
497 //	public boolean doInvoke(SpecimenImportState state){
498 //		invoke(state.getConfig());
499 //		return false;
500 //	}
501 
502 
503 	/* (non-Javadoc)
504 	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IoStateBase)
505 	 */
506 	@Override
507 	protected boolean isIgnore(SpecimenExcelImportState state) {
508 		return false;
509 	}
510 
511 
512 	/* (non-Javadoc)
513 	 * @see eu.etaxonomy.cdm.io.specimen.SpecimenIoBase#doInvoke(eu.etaxonomy.cdm.io.specimen.abcd206.SpecimenImportState)
514 	 */
515 	@Override
516 	protected boolean doInvoke(SpecimenExcelImportState state) {
517 		System.out.println("INVOKE Specimen Import From Excel File (Synthesys Cache format");
518 		SpecimenExcelImport test = new SpecimenExcelImport();
519 		String sourceName = state.getConfig().getSourceNameString();
520 		ArrayList<HashMap<String,String>> unitsList = null;
521 		try{
522 			System.out.println("euhhhhhhhhhh");
523 			unitsList = ExcelUtils.parseXLS(sourceName);
524 		}
525 		catch(Exception e){System.out.println("moui..."+e);}
526 		System.out.println("unitsList"+unitsList);
527 		if (unitsList != null){
528 			HashMap<String,String> unit=null;
529 			for (int i=0; i<unitsList.size();i++){
530 				unit = unitsList.get(i);
531 				test.setUnitPropertiesExcel(unit);//and then invoke
532 				test.start(state.getConfig());
533 				state.getConfig().setDbSchemaValidation(DbSchemaValidation.UPDATE);
534 			}
535 		}
536 
537 		return false;
538 	}
539 
540 
541 	/* (non-Javadoc)
542 	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
543 	 */
544 	@Override
545 	protected boolean doCheck(SpecimenExcelImportState state) {
546 		logger.warn("Validation not yet implemented for " + this.getClass().getSimpleName());
547 		return true;
548 	}
549 
550 
551 }