View Javadoc

1   /**
2   * Copyright (C) 2008 EDIT
3   * European Distributed Institute of Taxonomy 
4   * http://www.e-taxonomy.eu
5   * 
6   * The contents of this file are subject to the Mozilla Public License Version 1.1
7   * See LICENSE.TXT at the top of this package for the full license terms.
8   */
9   package eu.etaxonomy.cdm.io.excel.distribution;
10  
11  import java.io.FileNotFoundException;
12  import java.util.ArrayList;
13  import java.util.HashMap;
14  import java.util.List;
15  import java.util.Map;
16  import java.util.Set;
17  
18  import org.apache.log4j.Logger;
19  import org.springframework.stereotype.Component;
20  import org.springframework.transaction.TransactionStatus;
21  
22  import eu.etaxonomy.cdm.api.service.config.IIdentifiableEntityServiceConfigurator;
23  import eu.etaxonomy.cdm.api.service.config.impl.IdentifiableServiceConfiguratorImpl;
24  import eu.etaxonomy.cdm.common.CdmUtils;
25  import eu.etaxonomy.cdm.common.ExcelUtils;
26  import eu.etaxonomy.cdm.io.common.CdmIoBase;
27  import eu.etaxonomy.cdm.io.common.ICdmIO;
28  import eu.etaxonomy.cdm.io.common.IImportConfigurator;
29  import eu.etaxonomy.cdm.io.common.MapWrapper;
30  import eu.etaxonomy.cdm.io.excel.common.ExcelImportState;
31  import eu.etaxonomy.cdm.model.common.CdmBase;
32  import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
33  import eu.etaxonomy.cdm.model.description.Distribution;
34  import eu.etaxonomy.cdm.model.description.PresenceAbsenceTermBase;
35  import eu.etaxonomy.cdm.model.description.PresenceTerm;
36  import eu.etaxonomy.cdm.model.description.TaxonDescription;
37  import eu.etaxonomy.cdm.model.location.NamedArea;
38  import eu.etaxonomy.cdm.model.location.TdwgArea;
39  import eu.etaxonomy.cdm.model.name.TaxonNameBase;
40  import eu.etaxonomy.cdm.model.taxon.Taxon;
41  import eu.etaxonomy.cdm.persistence.query.MatchMode;
42  
43  /**
44   * @author a.babadshanjan
45   * @created 10.11.2008
46   * @version 1.0
47   */
48  @Component
49  public class DistributionImport extends CdmIoBase<ExcelImportState> implements ICdmIO<ExcelImportState> {
50  
51      /* used */
52      private static final String EDIT_NAME_COLUMN = "EDIT";
53      private static final String TDWG_DISTRIBUTION_COLUMN = "TDWG";
54      private static final String STATUS_COLUMN = "Status";
55      /* not used */
56  //    private static final String LITERATURE_NUMBER_COLUMN = "Lit.";
57  //    private static final String LITERATURE_COLUMN = "Literature";
58  //    private static final String VERNACULAR_NAME_COLUMN = "Vernacular";
59  //    private static final String HABITAT_COLUMN = "Habitat";
60  //    private static final String CONTROL_COLUMN = "Control";
61  //    private static final String TRANSLATED_COLUMN = "Translated";
62  //    private static final String ISO_DISTRIBUTION_COLUMN = "ISO";
63  //    private static final String NOTES_COLUMN = "Notes";
64  //    private static final String PAGE_NUMBER_COLUMN = "Page";
65  //    private static final String INFO_COLUMN = "Info";
66      
67  	private static final Logger logger = Logger.getLogger(DistributionImport.class);
68  	
69  	// Stores already processed descriptions
70  	Map<Taxon, TaxonDescription> myDescriptions = new HashMap<Taxon, TaxonDescription>();
71  
72  	@Override
73  	protected boolean doInvoke(ExcelImportState state) {
74  		
75  		if (logger.isDebugEnabled()) { logger.debug("Importing distribution data"); }
76      	
77  		// read and save all rows of the excel worksheet
78  		ArrayList<HashMap<String, String>> recordList;
79      	try{
80      		recordList = ExcelUtils.parseXLS(state.getConfig().getSourceNameString());
81  		} catch (FileNotFoundException e1) {
82  			logger.error("File not found: " + (String)state.getConfig().getSource());
83  			return false;
84  		}
85      	if (recordList != null) {
86      		HashMap<String,String> record = null;
87      		TransactionStatus txStatus = startTransaction();
88  
89      		for (int i = 0; i < recordList.size(); i++) {
90      			record = recordList.get(i);
91      			analyzeRecord(record);
92      		}
93      		commitTransaction(txStatus);
94      	}
95      	
96  		try {
97  			if (logger.isDebugEnabled()) { logger.debug("End distribution data import"); }
98  				
99  		} catch (Exception e) {
100     		logger.error("Error closing the application context");
101     		e.printStackTrace();
102 		}
103     	
104     	return true;
105 	}
106 			
107 
108 	/** 
109 	 *  Reads the data of one Excel sheet row
110 	 */
111     private void analyzeRecord(HashMap<String,String> record) {
112     	/*
113     	 * Relevant columns:
114     	 * Name (EDIT)
115     	 * Distribution TDWG
116     	 * Status (only entries if not native) 
117     	 * Literature number
118     	 * Literature
119     	*/
120     	
121         String editName = "";
122         ArrayList<String> distributionList = new ArrayList<String>();
123         String status = "";
124         String literatureNumber = "";
125         String literature = "";
126         
127     	Set<String> keys = record.keySet();
128     	
129     	for (String key: keys) {
130     		
131     		String value = (String) record.get(key);
132     		if (!value.equals("")) {
133     			if (logger.isDebugEnabled()) { logger.debug(key + ": '" + value + "'"); }
134     		}
135     		
136     		if (key.contains(EDIT_NAME_COLUMN)) {
137     			editName = (String) CdmUtils.removeDuplicateWhitespace(value.trim());
138     			
139 			} else if(key.contains(TDWG_DISTRIBUTION_COLUMN)) {
140 				distributionList =  CdmUtils.buildList(value);
141 				
142 			} else if(key.contains(STATUS_COLUMN)) {
143 				status = (String) CdmUtils.removeDuplicateWhitespace(value.trim());
144 				
145 //			} else if(key.contains(LITERATURE_NUMBER_COLUMN)) {
146 //				literatureNumber = (String) CdmUtils.removeDuplicateWhitespace(value.trim());
147 //				
148 //			} else if(key.contains(LITERATURE_COLUMN)) {
149 //				literature = (String) CdmUtils.removeDuplicateWhitespace(value.trim());
150 //				
151 			} else {
152 				//logger.warn("Column " + key + " ignored");
153 			}
154     	}
155     	
156     	// Store the data of this record in the DB
157     	if (!editName.equals("")) {
158     		saveRecord(editName, distributionList, status, literatureNumber, literature);
159     	}
160     }
161     
162     
163 	/** 
164 	 *  Stores the data of one Excel sheet row in the database
165 	 */
166     private void saveRecord(String taxonName, ArrayList<String> distributionList,
167     		String status, String literatureNumber, String literature) {
168 
169     	IdentifiableServiceConfiguratorImpl config = 
170     		IdentifiableServiceConfiguratorImpl.NewInstance();
171     	config.setTitleSearchString(taxonName);
172     	config.setMatchMode(MatchMode.BEGINNING);
173     	
174 		try {
175     		// get the matching names from the DB
176     		//List<TaxonNameBase> taxonNameBases = getNameService().findByTitle(config);
177     		List<TaxonNameBase<?,?>> taxonNameBases = getNameService().findNamesByTitle(taxonName);
178     		if (taxonNameBases.isEmpty()) {
179     			logger.error("Taxon name '" + taxonName + "' not found in DB");
180     		} else {
181     			if (logger.isDebugEnabled()) { logger.debug("Taxon found"); }
182     		}
183 
184     		// get the taxa for the matching names
185     		for(TaxonNameBase<?,?> dbTaxonName: taxonNameBases) {
186 
187     			Set<Taxon> taxa = dbTaxonName.getTaxa();
188     			if (taxa.isEmpty()) {
189     				logger.warn("No taxon found for name '" + taxonName + "'");
190     			} else if (taxa.size() > 1) {
191     				logger.warn("More than one taxa found for name '" + taxonName + "'");
192     			}
193 
194     			for(Taxon taxon: taxa) {
195 
196     				TaxonDescription myDescription = null;
197 
198     				// If we have created a description for this taxon earlier, take this one.
199     				// Otherwise, create a new description.
200     				// We don't update any existing descriptions in the database at this point.
201     				if (myDescriptions.containsKey(taxon)) {
202     					myDescription = myDescriptions.get(taxon);
203     				} else {
204     					myDescription = TaxonDescription.NewInstance(taxon);
205     					taxon.addDescription(myDescription);
206     					myDescriptions.put(taxon, myDescription);
207     				}
208 
209     				// Status
210     				PresenceAbsenceTermBase<?> presenceAbsenceStatus = PresenceTerm.NewInstance();
211     				if (status.equals("")) {
212     					presenceAbsenceStatus = PresenceTerm.NATIVE();
213     				} else {
214     					presenceAbsenceStatus = PresenceTerm.getPresenceTermByAbbreviation(status);
215     				}
216     				// TODO: Handle absence case. 
217     				// This case has not yet occurred in the excel input file, though.
218 					
219     				/* Set to true if taxon needs to be saved if at least one new distribution exists */
220     				boolean save = false;
221     				
222     				// TDWG areas
223     				for (String distribution: distributionList) {
224 
225                         /* Set to true if this distribution is a new one*/
226         				boolean ignore = false;
227         				
228     					if(!distribution.equals("")) {
229     						NamedArea namedArea = TdwgArea.getAreaByTdwgAbbreviation(distribution);
230         					TaxonDescription taxonDescription = myDescriptions.get(taxon);
231         					if (namedArea != null) {    
232     		    				// Check against existing distributions and ignore the ones that occur multiple times
233             					Set<DescriptionElementBase> myDescriptionElements = taxonDescription.getElements();
234     	    					for(DescriptionElementBase descriptionElement : myDescriptionElements) {
235     	    						if (descriptionElement instanceof Distribution) {
236     	    							if (namedArea == ((Distribution)descriptionElement).getArea()) {
237     	    								ignore = true;
238     	    								if (logger.isDebugEnabled()) { 
239     	    									logger.debug("Distribution ignored: " + distribution); 
240     	    								}
241     	    		    					break;
242      	    							}
243     	    						}
244     	    					}
245     	    					// Create new distribution if not yet exist
246     	    					if (ignore == false) {
247     	    						save = true;
248     	    						Distribution newDistribution = Distribution.NewInstance(namedArea, presenceAbsenceStatus);
249     	    						myDescription.addElement(newDistribution);
250     	    						if (logger.isDebugEnabled()) { 
251     	    							logger.debug("Distribution created: " + newDistribution.toString());
252     	    						}
253     	    					}
254     						}
255     					}
256     				}
257     				if (save == true) {
258     					getTaxonService().save(taxon);
259     					if (logger.isDebugEnabled()) { logger.debug("Taxon saved"); }
260     				}
261     			}
262     		} 
263     	} catch (Exception e) {
264     		logger.error("Error");
265     		e.printStackTrace();
266     	}
267     }
268     
269     
270 	@Override
271 	protected boolean doCheck(ExcelImportState state) {
272 		boolean result = true;
273 		logger.warn("No check implemented for distribution data import");
274 		return result;
275 	}
276 	
277 
278 	@Override
279 	protected boolean isIgnore(ExcelImportState state) {
280 		return false;
281 	}
282 
283 }