View Javadoc

1   /**
2   * Copyright (C) 2007 EDIT
3   * European Distributed Institute of Taxonomy 
4   * http://www.e-taxonomy.eu
5   * 
6   * The contents of this file are subject to the Mozilla Public License Version 1.1
7   * See LICENSE.TXT at the top of this package for the full license terms.
8   */
9   
10  package eu.etaxonomy.cdm.io.erms;
11  
12  import java.sql.ResultSet;
13  import java.sql.SQLException;
14  import java.util.HashMap;
15  import java.util.HashSet;
16  import java.util.Map;
17  import java.util.Set;
18  import java.util.UUID;
19  
20  import org.apache.log4j.Logger;
21  import org.springframework.stereotype.Component;
22  
23  import eu.etaxonomy.cdm.common.CdmUtils;
24  import eu.etaxonomy.cdm.io.common.IOValidator;
25  import eu.etaxonomy.cdm.io.common.mapping.DbIgnoreMapper;
26  import eu.etaxonomy.cdm.io.common.mapping.DbImportExtensionMapper;
27  import eu.etaxonomy.cdm.io.common.mapping.DbImportMapping;
28  import eu.etaxonomy.cdm.io.common.mapping.DbImportObjectCreationMapper;
29  import eu.etaxonomy.cdm.io.common.mapping.DbImportStringMapper;
30  import eu.etaxonomy.cdm.io.common.mapping.DbNotYetImplementedMapper;
31  import eu.etaxonomy.cdm.io.common.mapping.IMappingImport;
32  import eu.etaxonomy.cdm.io.erms.validation.ErmsTaxonImportValidator;
33  import eu.etaxonomy.cdm.model.common.CdmBase;
34  import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
35  import eu.etaxonomy.cdm.model.name.NonViralName;
36  import eu.etaxonomy.cdm.model.name.Rank;
37  import eu.etaxonomy.cdm.model.reference.ReferenceBase;
38  import eu.etaxonomy.cdm.model.taxon.Synonym;
39  import eu.etaxonomy.cdm.model.taxon.Taxon;
40  import eu.etaxonomy.cdm.model.taxon.TaxonBase;
41  
42  
43  /**
44   * @author a.mueller
45   * @created 20.02.2010
46   * @version 1.0
47   */
48  @Component
49  public class ErmsTaxonImport  extends ErmsImportBase<TaxonBase> implements IMappingImport<TaxonBase, ErmsImportState>{
50  	private static final Logger logger = Logger.getLogger(ErmsTaxonImport.class);
51  	
52  	public static final UUID TNS_EXT_UUID = UUID.fromString("41cb0450-ac84-4d73-905e-9c7773c23b05");
53  	
54  	private DbImportMapping mapping;
55  	
56  	//second path is not used anymore, there is now an ErmsTaxonRelationImport class instead
57  	private boolean isSecondPath = false;
58  	
59  	private int modCount = 10000;
60  	private static final String pluralString = "taxa";
61  	private static final String dbTableName = "tu";
62  	private static final Class cdmTargetClass = TaxonBase.class;
63  
64  	public ErmsTaxonImport(){
65  		super(pluralString, dbTableName, cdmTargetClass);
66  	}
67  	
68  	
69  
70  //	/* (non-Javadoc)
71  //	 * @see eu.etaxonomy.cdm.io.erms.ErmsImportBase#getIdQuery()
72  //	 */
73  //	@Override
74  //	protected String getIdQuery() {
75  //		String strQuery = " SELECT id FROM tu WHERE id < 300000 " ;
76  //		return strQuery;
77  //	}
78  
79  
80  	/* (non-Javadoc)
81  	 * @see eu.etaxonomy.cdm.io.erms.ErmsImportBase#getMapping()
82  	 */
83  	protected DbImportMapping getMapping() {
84  		if (mapping == null){
85  			mapping = new DbImportMapping();
86  			
87  			mapping.addMapper(DbImportObjectCreationMapper.NewInstance(this, "id", TAXON_NAMESPACE)); //id + tu_status
88  			UUID tsnUuid = ErmsTransformer.uuidTsn;
89  			mapping.addMapper(DbImportExtensionMapper.NewInstance("tsn", tsnUuid, "TSN", "TSN", "TSN"));
90  //			mapping.addMapper(DbImportStringMapper.NewInstance("tu_name", "(NonViralName)name.nameCache"));
91  			
92  			UUID displayNameUuid = ErmsTransformer.uuidDisplayName;
93  			mapping.addMapper(DbImportExtensionMapper.NewInstance("tu_displayname", displayNameUuid, "display name", "display name", "display name"));
94  			UUID fuzzyNameUuid = ErmsTransformer.uuidFuzzyName;
95  			mapping.addMapper(DbImportExtensionMapper.NewInstance("tu_fuzzyname", fuzzyNameUuid, "fuzzy name", "fuzzy name", "fuzzy name"));
96  			mapping.addMapper(DbImportStringMapper.NewInstance("tu_authority", "(NonViralName)name.authorshipCache"));
97  			
98  			UUID fossilStatusUuid = ErmsTransformer.uuidFossilStatus;
99  			mapping.addMapper(DbImportExtensionMapper.NewInstance("fossil_name", fossilStatusUuid, "fossil status", "fossil status", "fos. stat."));
100 //			mapping.addMapper(DbImportExtensionTypeCreationMapper.NewInstance("fossil_name", EXTENSION_TYPE_NAMESPACE, "fossil_name", "fossil_name", "fossil_name"));
101 			
102 			UUID credibilityUuid = ErmsTransformer.uuidCredibility;
103 			mapping.addMapper(DbImportExtensionMapper.NewInstance("tu_credibility", credibilityUuid, "credibility", "credibility", "credibility")); //Werte: null, unknown, marked for deletion
104 			
105 			UUID completenessUuid = ErmsTransformer.uuidCompleteness;
106 			mapping.addMapper(DbImportExtensionMapper.NewInstance("tu_completeness", completenessUuid, "completeness", "completeness", "completeness")); //null, unknown, tmpflag, tmp2, tmp3, complete
107 			
108 			UUID unacceptUuid = ErmsTransformer.uuidUnacceptReason;
109 			mapping.addMapper(DbImportExtensionMapper.NewInstance("tu_unacceptreason", unacceptUuid, "unaccept reason", "unaccept reason", "reason"));
110 			
111 			UUID qualityUuid = ErmsTransformer.uuidQualityStatus;
112 			mapping.addMapper(DbImportExtensionMapper.NewInstance("qualitystatus_name", qualityUuid, "quality status", "quality status", "quality status")); //checked by Tax Editor ERMS1.1, Added by db management team (2x), checked by Tax Editor
113 			
114 //			UUID hiddenUuid = ErmsTransformer.uuidHidden;
115 //			mapping.addMapper(DbImportMarkerCreationMapper.Mapper.NewInstance("qualitystatus_name", qualityUuid, "quality status", "quality status", "quality status")); //checked by Tax Editor ERMS1.1, Added by db management team (2x), checked by Tax Editor
116 			
117 			
118 			//ignore
119 			mapping.addMapper(DbIgnoreMapper.NewInstance("tu_marine", "marine flag not implemented in PESI"));
120 			mapping.addMapper(DbIgnoreMapper.NewInstance("tu_brackish", "brackish flag not implemented in PESI"));
121 			mapping.addMapper(DbIgnoreMapper.NewInstance("tu_fresh", "freshwater flag not implemented in PESI"));
122 			mapping.addMapper(DbIgnoreMapper.NewInstance("tu_terrestrial", "terrestrial flag not implemented in PESI"));
123 			mapping.addMapper(DbIgnoreMapper.NewInstance("tu_fossil", "tu_fossil implemented as foreign key"));
124 			mapping.addMapper(DbIgnoreMapper.NewInstance("cache_citation", "citation cache not needed in PESI"));
125 			mapping.addMapper(DbNotYetImplementedMapper.NewInstance("tu_sp", "included in rank/object creation")); 
126 			
127 			
128 			//not yet implemented or ignore
129 			mapping.addMapper(DbNotYetImplementedMapper.NewInstance("tu_hidden", "Needs DbImportMarkerMapper implemented"));
130 			
131 //			//second path / implemented in ErmsTaxonRelationImport
132 //			DbImportMapping secondPathMapping = new DbImportMapping();
133 //			secondPathMapping.addMapper(DbImportTaxIncludedInMapper.NewInstance("id", "tu_parent", TAXON_NAMESPACE, null)); //there is only one tree
134 //			secondPathMapping.addMapper(DbImportSynonymMapper.NewInstance("id", "tu_acctaxon", TAXON_NAMESPACE, null)); 			
135 //			secondPathMapping.addMapper(DbImportNameTypeDesignationMapper.NewInstance("id", "tu_typetaxon", NAME_NAMESPACE, "tu_typedesignationstatus"));
136 //			secondPathMapping.addMapper(DbNotYetImplementedMapper.NewInstance("tu_acctaxon"));
137 //			mapping.setSecondPathMapping(secondPathMapping);
138 			
139 		}
140 		return mapping;
141 	}
142 
143 	/* (non-Javadoc)
144 	 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
145 	 */
146 	@Override
147 	protected String getRecordQuery(ErmsImportConfigurator config) {
148 		String strSelect = " SELECT tu.*, parent1.tu_name AS parent1name, parent2.tu_name AS parent2name, parent3.tu_name AS parent3name, " 
149 			+ " parent1.tu_rank AS parent1rank, parent2.tu_rank AS parent2rank, parent3.tu_rank AS parent3rank, " + 
150 			" status.status_id as status_id,  fossil.fossil_name, qualitystatus.qualitystatus_name";
151 		String strFrom = " FROM tu  LEFT OUTER JOIN  tu AS parent1 ON parent1.id = tu.tu_parent " + 
152 				" LEFT OUTER JOIN   tu AS parent2  ON parent2.id = parent1.tu_parent " + 
153 				" LEFT OUTER JOIN tu AS parent3 ON parent2.tu_parent = parent3.id " + 
154 				" LEFT OUTER JOIN status ON tu.tu_status = status.status_id " + 
155 				" LEFT OUTER JOIN fossil ON tu.tu_fossil = fossil.fossil_id " +
156 				" LEFT OUTER JOIN qualitystatus ON tu.tu_qualitystatus = qualitystatus.id ";
157 		String strWhere = " WHERE ( tu.id IN (" + ID_LIST_TOKEN + ") )";
158 		String strRecordQuery = strSelect + strFrom + strWhere;
159 		return strRecordQuery;
160 	}
161 	
162 
163 //	/**
164 //	 * @param config
165 //	 * @return
166 //	 */
167 //	private String getSecondPathRecordQuery(ErmsImportConfigurator config) {
168 //		//TODO get automatic by second path mappers
169 //		String selectAttributes = "id, tu_parent, tu_typetaxon, tu_typetaxon, tu_typedesignation, tu_acctaxon, tu_status"; 
170 //		String strRecordQuery = 
171 //			" SELECT  " + selectAttributes + 
172 //			" FROM tu " +
173 //			" WHERE ( tu.id IN (" + ID_LIST_TOKEN + ") )";
174 //		return strRecordQuery;
175 //	}
176 
177 
178 //	private String getSecondPathIdQuery(){
179 //		return getIdQuery();
180 //	}
181 	
182 	/* (non-Javadoc)
183 	 * @see eu.etaxonomy.cdm.io.erms.ErmsImportBase#doInvoke(eu.etaxonomy.cdm.io.erms.ErmsImportState)
184 	 */
185 	@Override
186 	protected boolean doInvoke(ErmsImportState state) {
187 		//first path
188 		boolean success = super.doInvoke(state);
189 		
190 //		//second path
191 //		isSecondPath = true;
192 //		ErmsImportConfigurator config = state.getConfig();
193 //		Source source = config.getSource();
194 //			
195 //		String strIdQuery = getSecondPathIdQuery();
196 //		String strRecordQuery = getSecondPathRecordQuery(config);
197 //
198 //		int recordsPerTransaction = config.getRecordsPerTransaction();
199 //		try{
200 //			ResultSetPartitioner partitioner = ResultSetPartitioner.NewInstance(source, strIdQuery, strRecordQuery, recordsPerTransaction);
201 //			while (partitioner.nextPartition()){
202 //				partitioner.doPartition(this, state);
203 //			}
204 //		} catch (SQLException e) {
205 //			logger.error("SQLException:" +  e);
206 //			return false;
207 //		}
208 //		
209 //		isSecondPath = false;
210 //
211 //		logger.info("end make " + getPluralString() + " ... " + getSuccessString(success));
212 		return success;
213 
214 	}
215 
216 
217 
218 	/* (non-Javadoc)
219 	 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
220 	 */
221 	public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
222 		String nameSpace;
223 		Class cdmClass;
224 		Set<String> idSet;
225 		Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
226 		
227 		try{
228 				Set<String> nameIdSet = new HashSet<String>();
229 				Set<String> referenceIdSet = new HashSet<String>();
230 				while (rs.next()){
231 	//				handleForeignKey(rs, nameIdSet, "PTNameFk");
232 	//				handleForeignKey(rs, referenceIdSet, "PTRefFk");
233 				}
234 
235 			//reference map
236 //			nameSpace = "Reference";
237 //			cdmClass = ReferenceBase.class;
238 //			Map<String, Person> referenceMap = (Map<String, Person>)getCommonService().getSourcedObjectsByIdInSource(Person.class, teamIdSet, nameSpace);
239 //			result.put(ReferenceBase.class, referenceMap);
240 
241 		} catch (SQLException e) {
242 			throw new RuntimeException(e);
243 		}
244 		return result;
245 	}
246 	
247 
248 	/* (non-Javadoc)
249 	 * @see eu.etaxonomy.cdm.io.common.mapping.IMappingImport#createObject(java.sql.ResultSet)
250 	 */
251 	public TaxonBase createObject(ResultSet rs, ErmsImportState state) throws SQLException {
252 		int statusId = rs.getInt("status_id");
253 		String tuName = rs.getString("tu_name");
254 		String displayName = rs.getString("tu_displayname");
255 		
256 		String parent1Name = rs.getString("parent1name");
257 		Integer parent1Rank = rs.getInt("parent1rank");
258 		
259 		String parent2Name = rs.getString("parent2name");
260 		Integer parent2Rank = rs.getInt("parent2rank");
261 		
262 		String parent3Name = rs.getString("parent3name");
263 		Integer parent3Rank = rs.getInt("parent3rank");
264 		
265 		
266 		NonViralName taxonName = getTaxonName(rs, state);
267 		//set epithets
268 		if (taxonName.isGenus() || taxonName.isSupraGeneric()){
269 			taxonName.setGenusOrUninomial(tuName);
270 		}else if (taxonName.isInfraGeneric()){
271 			taxonName.setInfraGenericEpithet(tuName);
272 			taxonName.setGenusOrUninomial(parent1Name);
273 		}else if (taxonName.isSpecies()){
274 			taxonName.setSpecificEpithet(tuName);
275 			getGenusAndInfraGenus(parent1Name, parent2Name, parent1Rank, taxonName);
276 		}else if (taxonName.isInfraSpecific()){
277 			if (parent1Rank < 220){
278 				handleException(parent1Rank, taxonName, displayName);
279 			}
280 			taxonName.setInfraSpecificEpithet(tuName);
281 			taxonName.setSpecificEpithet(parent1Name);
282 			getGenusAndInfraGenus(parent2Name, parent3Name, parent2Rank, taxonName);
283 		}else if (taxonName.getRank()== null){
284 			logger.warn("rank super domain still needs to be implemented. Used domain instead.");
285 			if ("Biota".equalsIgnoreCase(tuName)){
286 				Rank rank = Rank.DOMAIN();  //should be Superdomain
287 				taxonName.setRank(rank);
288 				taxonName.setGenusOrUninomial(tuName);
289 			}else{
290 				String warning = "TaxonName has no rank. Use namecache.";
291 				logger.warn(warning);
292 				taxonName.setNameCache(tuName);
293 			}
294 			
295 		}
296 		//e.g. Leucon [Platyhelminthes] ornatus
297 		if (containsBrackets(displayName)){
298 			taxonName.setNameCache(displayName);
299 			logger.warn("Set name cache: " +  displayName);
300 		}
301 		
302 		//add original source for taxon name (taxon original source is added in mapper
303 		ReferenceBase citation = state.getConfig().getSourceReference();
304 		addOriginalSource(rs, taxonName, "id", NAME_NAMESPACE, citation);
305 		
306 //		taxonName.setNameCache("Test");
307 		
308 		ErmsImportConfigurator config = state.getConfig();
309 		ReferenceBase sec = config.getSourceReference();
310 		if (statusId == 1){
311 			return Taxon.NewInstance(taxonName, sec);
312 		}else{
313 			return Synonym.NewInstance(taxonName, sec);
314 		}
315 	}
316 
317 
318 
319 	/**
320 	 * @param parent1Rank
321 	 * @param displayName 
322 	 * @param taxonName 
323 	 */
324 	private void handleException(Integer parent1Rank, NonViralName taxonName, String displayName) {
325 		logger.warn("Parent of infra specific taxon is higher than species. Used nameCache: " + displayName) ;
326 		taxonName.setNameCache(displayName);
327 	}
328 
329 
330 
331 	/**
332 	 * @param displayName
333 	 * @return
334 	 */
335 	private boolean containsBrackets(String displayName) {
336 		int index = displayName.indexOf("[");
337 		return (index > -1);
338 	}
339 
340 
341 
342 	/**
343 	 * @param parent1Name
344 	 * @param parent2Name
345 	 * @param parent1Rank
346 	 * @param taxonName
347 	 */
348 	private void getGenusAndInfraGenus(String parentName, String grandParentName, Integer parent1Rank, NonViralName taxonName) {
349 		if (parent1Rank <220 && parent1Rank > 180){
350 			//parent is infrageneric
351 			taxonName.setInfraGenericEpithet(parentName);
352 			taxonName.setGenusOrUninomial(grandParentName);
353 		}else{
354 			taxonName.setGenusOrUninomial(parentName);
355 		}
356 	}
357 
358 	/**
359 	 * @param rs
360 	 * @return
361 	 * @throws SQLException 
362 	 */
363 	private NonViralName getTaxonName(ResultSet rs, ErmsImportState state) throws SQLException {
364 		NonViralName result;
365 		Integer kingdomId = parseKingdomId(rs);
366 		Integer intRank = rs.getInt("tu_rank");
367 		
368 		NomenclaturalCode nc = ErmsTransformer.kingdomId2NomCode(kingdomId);
369 		Rank rank = null;
370 		if (kingdomId != null){
371 			rank = state.getRank(intRank, kingdomId);
372 		}else{
373 			logger.warn("KingdomId is null");
374 		}
375 		if (rank == null){
376 			logger.warn("Rank is null. KingdomId: " + kingdomId + ", rankId: " +  intRank);
377 		}
378 		if (nc != null){
379 			result = (NonViralName)nc.getNewTaxonNameInstance(rank);
380 		}else{
381 			result = NonViralName.NewInstance(rank);
382 		}
383 		
384 		return result;
385 	}
386 
387 	/**
388 	 * Returns the kingdom id by extracting it from the second character in the <code>tu_sp</code> 
389 	 * attribute. If the attribute can not be parsed to a valid id <code>null</code>
390 	 * is returned. If the attribute is <code>null</code> the id of the record is returned.
391 	 * @param rs
392 	 * @return
393 	 * @throws SQLException
394 	 */
395 	private int parseKingdomId(ResultSet rs) throws SQLException {
396 		Integer result = null;
397 		String treeString = rs.getString("tu_sp");
398 		if (treeString != null){
399 			if (CdmUtils.isNotEmpty(treeString) && treeString.length() > 1){
400 				String strKingdom = treeString.substring(1,2);
401 				
402 				if (! treeString.substring(0, 1).equals("#") && ! treeString.substring(2, 3).equals("#") ){
403 					logger.warn("Tree string " + treeString + " has no recognized format");
404 				}else{
405 					try {
406 						result = Integer.valueOf(strKingdom);
407 					} catch (NumberFormatException e) {
408 						logger.warn("Kingdom string " + strKingdom + "could not be recognized as a valid number");
409 					}
410 				}
411 			}
412 		}else{
413 			Integer tu_id = rs.getInt("id");
414 			result = tu_id;
415 		}
416 		return result;
417 	}
418 
419 	
420 	/* (non-Javadoc)
421 	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
422 	 */
423 	@Override
424 	protected boolean doCheck(ErmsImportState state){
425 		IOValidator<ErmsImportState> validator = new ErmsTaxonImportValidator();
426 		return validator.validate(state);
427 	}
428 	
429 	
430 	/* (non-Javadoc)
431 	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
432 	 */
433 	protected boolean isIgnore(ErmsImportState state){
434 		return ! state.getConfig().isDoTaxa();
435 	}
436 
437 
438 
439 }