View Javadoc

1   /**
2   * Copyright (C) 2007 EDIT
3   * European Distributed Institute of Taxonomy 
4   * http://www.e-taxonomy.eu
5   * 
6   * The contents of this file are subject to the Mozilla Public License Version 1.1
7   * See LICENSE.TXT at the top of this package for the full license terms.
8   */
9   
10  package eu.etaxonomy.cdm.io.berlinModel.in;
11  
12  import java.sql.ResultSet;
13  import java.sql.SQLException;
14  import java.util.ArrayList;
15  import java.util.HashMap;
16  import java.util.HashSet;
17  import java.util.List;
18  import java.util.Map;
19  import java.util.Set;
20  
21  import org.apache.log4j.Logger;
22  import org.springframework.stereotype.Component;
23  
24  import eu.etaxonomy.cdm.common.CdmUtils;
25  import eu.etaxonomy.cdm.hibernate.HibernateProxyHelper;
26  import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
27  import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelOccurrenceImportValidator;
28  import eu.etaxonomy.cdm.io.common.IOValidator;
29  import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
30  import eu.etaxonomy.cdm.model.common.Annotation;
31  import eu.etaxonomy.cdm.model.common.AnnotationType;
32  import eu.etaxonomy.cdm.model.common.CdmBase;
33  import eu.etaxonomy.cdm.model.common.Marker;
34  import eu.etaxonomy.cdm.model.common.MarkerType;
35  import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
36  import eu.etaxonomy.cdm.model.description.Distribution;
37  import eu.etaxonomy.cdm.model.description.PresenceAbsenceTermBase;
38  import eu.etaxonomy.cdm.model.description.TaxonDescription;
39  import eu.etaxonomy.cdm.model.location.NamedArea;
40  import eu.etaxonomy.cdm.model.location.TdwgArea;
41  import eu.etaxonomy.cdm.model.reference.ReferenceBase;
42  import eu.etaxonomy.cdm.model.taxon.Taxon;
43  import eu.etaxonomy.cdm.model.taxon.TaxonBase;
44  import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
45  
46  
47  /**
48   * @author a.mueller
49   * @created 20.03.2008
50   * @version 1.0
51   */
52  @Component
53  public class BerlinModelOccurrenceImport  extends BerlinModelImportBase {
54  	private static final Logger logger = Logger.getLogger(BerlinModelOccurrenceImport.class);
55  
56  	public static final String NAMESPACE = "Occurrence";
57  	
58  	
59  	
60  	private static int modCount = 5000;
61  	private static final String pluralString = "occurrences";
62  	private static final String dbTableName = "emOccurrence";  //??
63  
64  
65  	public BerlinModelOccurrenceImport(){
66  		super();
67  	}
68  	
69  	/* (non-Javadoc)
70  	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getIdQuery()
71  	 */
72  	@Override
73  	protected String getIdQuery() {
74  		return " SELECT occurrenceId FROM " + getTableName();
75  	}
76  
77  	/* (non-Javadoc)
78  	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
79  	 */
80  	@Override
81  	protected String getRecordQuery(BerlinModelImportConfigurator config) {
82  			String strQuery =   //DISTINCT because otherwise emOccurrenceSource creates multiple records for a single distribution 
83              " SELECT DISTINCT PTaxon.RIdentifier AS taxonId, emOccurrence.OccurrenceId, emOccurrence.Native, emOccurrence.Introduced, " +
84              		" emOccurrence.Cultivated, emOccurSumCat.emOccurSumCatId, emOccurSumCat.Short, emOccurSumCat.Description, " +  
85                  	" emOccurSumCat.OutputCode, emArea.AreaId, emArea.TDWGCode " + 
86                  " FROM emOccurrence INNER JOIN " +  
87                  	" emArea ON emOccurrence.AreaFk = emArea.AreaId INNER JOIN " + 
88                  	" PTaxon ON emOccurrence.PTNameFk = PTaxon.PTNameFk AND emOccurrence.PTRefFk = PTaxon.PTRefFk LEFT OUTER JOIN " + 
89                  	" emOccurSumCat ON emOccurrence.SummaryStatus = emOccurSumCat.emOccurSumCatId LEFT OUTER JOIN " +  
90                  	" emOccurrenceSource ON emOccurrence.OccurrenceId = emOccurrenceSource.OccurrenceFk " +  
91              " WHERE (emOccurrence.OccurrenceId IN (" + ID_LIST_TOKEN + ")  )" +  
92                  " ORDER BY PTaxon.RIdentifier";
93  		return strQuery;
94  	}
95  
96  	/* (non-Javadoc)
97  	 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#doPartition(eu.etaxonomy.cdm.io.berlinModel.in.ResultSetPartitioner, eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
98  	 */
99  	public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state) {
100 		boolean success = true;
101 		Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>();
102 		
103 		Map<String, TaxonBase> taxonMap = (Map<String, TaxonBase>) partitioner.getObjectMap(BerlinModelTaxonImport.NAMESPACE);
104 			
105 		ResultSet rs = partitioner.getResultSet();
106 
107 		try {
108 			//map to store the mapping of duplicate berlin model occurrences to their real distributions
109 			//duplicated may occurr due to area mappings from BM areas to TDWG areas
110 			Map<Integer, String> duplicateMap = new HashMap<Integer, String>();
111 			int oldTaxonId = -1;
112 			TaxonDescription oldDescription = null;
113 			int i = 0;
114 			int countDescriptions = 0;
115 			int countDistributions = 0;
116 			int countDuplicates = 0;
117 			//for each reference
118             while (rs.next()){
119                 
120                 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("Facts handled: " + (i-1));}
121                 
122                 int occurrenceId = rs.getInt("OccurrenceId");
123                 int newTaxonId = rs.getInt("taxonId");
124                 String tdwgCodeString = rs.getString("TDWGCode");
125                 Integer emStatusId = (Integer)rs.getObject("emOccurSumCatId");
126                 
127                 try {
128                     //status
129                      PresenceAbsenceTermBase<?> status = null;
130                      String alternativeStatusString = null;
131                      if (emStatusId != null){
132                     	status = BerlinModelTransformer.occStatus2PresenceAbsence(emStatusId);
133                      }else{
134                     	 String[] stringArray = new String[]{rs.getString("Native"), rs.getString("Introduced"), rs.getString("Cultivated")};
135                     	 alternativeStatusString = CdmUtils.concat(",", stringArray);
136                      }
137                      
138                      //Create area list
139                      List<NamedArea> tdwgAreas = new ArrayList<NamedArea>();
140                      if (tdwgCodeString != null){
141                            String[] tdwgCodes = tdwgCodeString.split(";");
142                            for (String tdwgCode : tdwgCodes){
143                                  NamedArea tdwgArea = TdwgArea.getAreaByTdwgAbbreviation(tdwgCode.trim());
144                                  if (tdwgArea != null){
145                                        tdwgAreas.add(tdwgArea);
146                                  }
147                            }
148                      }
149                      ReferenceBase<?> sourceRef = state.getConfig().getSourceReference();
150                      //create description(elements)
151                      TaxonDescription taxonDescription = getTaxonDescription(newTaxonId, oldTaxonId, oldDescription, taxonMap, occurrenceId, sourceRef);
152                      if (tdwgAreas.size() == 0){
153                     	 logger.warn("No areas defined for occurrence " + occurrenceId);
154                      }
155                      for (NamedArea tdwgArea : tdwgAreas){
156                            Distribution distribution = Distribution.NewInstance(tdwgArea, status);
157                            if (status == null){
158                         	   AnnotationType annotationType = AnnotationType.EDITORIAL();
159                         	   Annotation annotation = Annotation.NewInstance(alternativeStatusString, annotationType, null);
160                         	   distribution.addAnnotation(annotation);
161                         	   distribution.addMarker(Marker.NewInstance(MarkerType.PUBLISH(), false));
162                            }
163 //                         distribution.setCitation(sourceRef);
164                            if (taxonDescription != null) { 
165                         	   Distribution duplicate = checkIsNoDuplicate(taxonDescription, distribution, duplicateMap , occurrenceId);
166                                if (duplicate == null){
167 	                        	   taxonDescription.addElement(distribution); 
168 	                               distribution.addSource(String.valueOf(occurrenceId), NAMESPACE, state.getConfig().getSourceReference(), null);
169 	                        	   countDistributions++; 
170 	                               if (taxonDescription != oldDescription){ 
171 	                            	   taxaToSave.add(taxonDescription.getTaxon()); 
172 	                                   oldDescription = taxonDescription; 
173 	                                   countDescriptions++; 
174 	                               	} 
175                                }else{                          	  
176                             	   countDuplicates++;
177                             	   duplicate.addSource(String.valueOf(occurrenceId), NAMESPACE, state.getConfig().getSourceReference(), null);
178                             	   logger.info("Distribution is duplicate");	                           }
179 	                       	} else { 
180 	                       		logger.warn("Distribution " + tdwgArea.getLabel() + " ignored. OccurrenceId = " + occurrenceId);
181 	                       		success = false;
182 	                       	}
183                      }
184                      
185                 } catch (UnknownCdmTypeException e) {
186                      logger.error("Unknown presenceAbsence status id: " + emStatusId); 
187                 	e.printStackTrace();
188                      success = false;
189                 }
190                 
191             }
192            
193             logger.info("Distributions: " + countDistributions + ", Descriptions: " + countDescriptions );
194 			logger.info("Duplicate occurrences: "  + (countDuplicates));
195 
196 			logger.info("Taxa to save: " + taxaToSave.size());
197 			getTaxonService().save(taxaToSave);	
198 			
199 			return success;
200 		} catch (SQLException e) {
201 			logger.error("SQLException:" +  e);
202 			return false;
203 		}
204 	}
205 
206 	/* (non-Javadoc)
207 	 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
208 	 */
209 	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
210 		String nameSpace;
211 		Class cdmClass;
212 		Set<String> idSet;
213 		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
214 		
215 		try{
216 			Set<String> taxonIdSet = new HashSet<String>();
217 			while (rs.next()){
218 				handleForeignKey(rs, taxonIdSet, "taxonId");
219 			}
220 			
221 			//taxon map
222 			nameSpace = BerlinModelTaxonImport.NAMESPACE;
223 			cdmClass = TaxonBase.class;
224 			idSet = taxonIdSet;
225 			Map<String, TaxonBase> objectMap = (Map<String, TaxonBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
226 			result.put(nameSpace, objectMap);
227 
228 		} catch (SQLException e) {
229 			throw new RuntimeException(e);
230 		}
231 		return result;
232 	}
233 
234 
235 
236 	/**
237      * Tests if a distribution with the same tdwgArea and the same status already exists in the description. 
238      * If so the old distribution is returned 
239      * @param description
240      * @param tdwgArea
241      * @return false, if dupplicate exists. True otherwise.
242      */
243     private Distribution checkIsNoDuplicate(TaxonDescription description, Distribution distribution, Map<Integer, String> duplicateMap, Integer bmDistributionId){
244     	for (DescriptionElementBase descElBase : description.getElements()){
245     		if (descElBase.isInstanceOf(Distribution.class)){
246     			Distribution oldDistr = HibernateProxyHelper.deproxy(descElBase, Distribution.class);
247     			NamedArea oldArea = oldDistr.getArea();
248     			if (oldArea != null && oldArea.equals(distribution.getArea())){
249     				PresenceAbsenceTermBase<?> oldStatus = oldDistr.getStatus();
250     				if (oldStatus != null && oldStatus.equals(distribution.getStatus())){
251     					duplicateMap.put(bmDistributionId, oldDistr.getSources().iterator().next().getIdInSource());
252     					return oldDistr;
253     				}
254     			}
255     		}
256     	}
257     	return null;
258     }
259 	
260 	/**
261 	 * Use same TaxonDescription if two records belong to the same taxon 
262 	 * @param newTaxonId
263 	 * @param oldTaxonId
264 	 * @param oldDescription
265 	 * @param taxonMap
266 	 * @return
267 	 */
268 	private TaxonDescription getTaxonDescription(int newTaxonId, int oldTaxonId, TaxonDescription oldDescription, Map<String, TaxonBase> taxonMap, int occurrenceId, ReferenceBase<?> sourceSec){
269 		TaxonDescription result = null;
270 		if (oldDescription == null || newTaxonId != oldTaxonId){
271 			TaxonBase taxonBase = taxonMap.get(String.valueOf(newTaxonId));
272 			//TODO for testing
273 			//TaxonBase taxonBase = Taxon.NewInstance(BotanicalName.NewInstance(Rank.SPECIES()), null);
274 			Taxon taxon;
275 			if ( taxonBase instanceof Taxon ) {
276 				taxon = (Taxon) taxonBase;
277 			} else if (taxonBase != null) {
278 				logger.warn("TaxonBase for Occurrence " + occurrenceId + " was not of type Taxon but: " + taxonBase.getClass().getSimpleName());
279 				return null;
280 			} else {
281 				logger.warn("TaxonBase for Occurrence " + occurrenceId + " is null.");
282 				return null;
283 			}		
284 			Set<TaxonDescription> descriptionSet= taxon.getDescriptions();
285 			if (descriptionSet.size() > 0) {
286 				result = descriptionSet.iterator().next(); 
287 			}else{
288 				result = TaxonDescription.NewInstance();
289 				result.setTitleCache(sourceSec.getTitleCache(), true);
290 				taxon.addDescription(result);
291 			}
292 		}else{
293 			result = oldDescription;
294 		}
295 		return result;
296 	}
297 	
298 
299 	/* (non-Javadoc)
300 	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
301 	 */
302 	@Override
303 	protected boolean doCheck(BerlinModelImportState state){
304 		IOValidator<BerlinModelImportState> validator = new BerlinModelOccurrenceImportValidator();
305 		return validator.validate(state);
306 	}
307 	
308 	/* (non-Javadoc)
309 	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getTableName()
310 	 */
311 	@Override
312 	protected String getTableName() {
313 		return dbTableName;
314 	}
315 	
316 	/* (non-Javadoc)
317 	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getPluralString()
318 	 */
319 	@Override
320 	public String getPluralString() {
321 		return pluralString;
322 	}
323 
324 	/* (non-Javadoc)
325 	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
326 	 */
327 	protected boolean isIgnore(BerlinModelImportState state){
328 		return ! state.getConfig().isDoOccurrence();
329 	}
330 	
331 }