1
2
3
4
5
6
7
8
9
10 package eu.etaxonomy.cdm.io.berlinModel.in;
11
12 import java.sql.ResultSet;
13 import java.sql.SQLException;
14 import java.util.ArrayList;
15 import java.util.HashMap;
16 import java.util.HashSet;
17 import java.util.List;
18 import java.util.Map;
19 import java.util.Set;
20
21 import org.apache.log4j.Logger;
22 import org.springframework.stereotype.Component;
23
24 import eu.etaxonomy.cdm.common.CdmUtils;
25 import eu.etaxonomy.cdm.hibernate.HibernateProxyHelper;
26 import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
27 import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelOccurrenceImportValidator;
28 import eu.etaxonomy.cdm.io.common.IOValidator;
29 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
30 import eu.etaxonomy.cdm.model.common.Annotation;
31 import eu.etaxonomy.cdm.model.common.AnnotationType;
32 import eu.etaxonomy.cdm.model.common.CdmBase;
33 import eu.etaxonomy.cdm.model.common.Marker;
34 import eu.etaxonomy.cdm.model.common.MarkerType;
35 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
36 import eu.etaxonomy.cdm.model.description.Distribution;
37 import eu.etaxonomy.cdm.model.description.PresenceAbsenceTermBase;
38 import eu.etaxonomy.cdm.model.description.TaxonDescription;
39 import eu.etaxonomy.cdm.model.location.NamedArea;
40 import eu.etaxonomy.cdm.model.location.TdwgArea;
41 import eu.etaxonomy.cdm.model.reference.ReferenceBase;
42 import eu.etaxonomy.cdm.model.taxon.Taxon;
43 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
44 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
45
46
47
48
49
50
51
52 @Component
53 public class BerlinModelOccurrenceImport extends BerlinModelImportBase {
54 private static final Logger logger = Logger.getLogger(BerlinModelOccurrenceImport.class);
55
56 public static final String NAMESPACE = "Occurrence";
57
58
59
60 private static int modCount = 5000;
61 private static final String pluralString = "occurrences";
62 private static final String dbTableName = "emOccurrence";
63
64
65 public BerlinModelOccurrenceImport(){
66 super();
67 }
68
69
70
71
72 @Override
73 protected String getIdQuery() {
74 return " SELECT occurrenceId FROM " + getTableName();
75 }
76
77
78
79
80 @Override
81 protected String getRecordQuery(BerlinModelImportConfigurator config) {
82 String strQuery =
83 " SELECT DISTINCT PTaxon.RIdentifier AS taxonId, emOccurrence.OccurrenceId, emOccurrence.Native, emOccurrence.Introduced, " +
84 " emOccurrence.Cultivated, emOccurSumCat.emOccurSumCatId, emOccurSumCat.Short, emOccurSumCat.Description, " +
85 " emOccurSumCat.OutputCode, emArea.AreaId, emArea.TDWGCode " +
86 " FROM emOccurrence INNER JOIN " +
87 " emArea ON emOccurrence.AreaFk = emArea.AreaId INNER JOIN " +
88 " PTaxon ON emOccurrence.PTNameFk = PTaxon.PTNameFk AND emOccurrence.PTRefFk = PTaxon.PTRefFk LEFT OUTER JOIN " +
89 " emOccurSumCat ON emOccurrence.SummaryStatus = emOccurSumCat.emOccurSumCatId LEFT OUTER JOIN " +
90 " emOccurrenceSource ON emOccurrence.OccurrenceId = emOccurrenceSource.OccurrenceFk " +
91 " WHERE (emOccurrence.OccurrenceId IN (" + ID_LIST_TOKEN + ") )" +
92 " ORDER BY PTaxon.RIdentifier";
93 return strQuery;
94 }
95
96
97
98
99 public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state) {
100 boolean success = true;
101 Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>();
102
103 Map<String, TaxonBase> taxonMap = (Map<String, TaxonBase>) partitioner.getObjectMap(BerlinModelTaxonImport.NAMESPACE);
104
105 ResultSet rs = partitioner.getResultSet();
106
107 try {
108
109
110 Map<Integer, String> duplicateMap = new HashMap<Integer, String>();
111 int oldTaxonId = -1;
112 TaxonDescription oldDescription = null;
113 int i = 0;
114 int countDescriptions = 0;
115 int countDistributions = 0;
116 int countDuplicates = 0;
117
118 while (rs.next()){
119
120 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("Facts handled: " + (i-1));}
121
122 int occurrenceId = rs.getInt("OccurrenceId");
123 int newTaxonId = rs.getInt("taxonId");
124 String tdwgCodeString = rs.getString("TDWGCode");
125 Integer emStatusId = (Integer)rs.getObject("emOccurSumCatId");
126
127 try {
128
129 PresenceAbsenceTermBase<?> status = null;
130 String alternativeStatusString = null;
131 if (emStatusId != null){
132 status = BerlinModelTransformer.occStatus2PresenceAbsence(emStatusId);
133 }else{
134 String[] stringArray = new String[]{rs.getString("Native"), rs.getString("Introduced"), rs.getString("Cultivated")};
135 alternativeStatusString = CdmUtils.concat(",", stringArray);
136 }
137
138
139 List<NamedArea> tdwgAreas = new ArrayList<NamedArea>();
140 if (tdwgCodeString != null){
141 String[] tdwgCodes = tdwgCodeString.split(";");
142 for (String tdwgCode : tdwgCodes){
143 NamedArea tdwgArea = TdwgArea.getAreaByTdwgAbbreviation(tdwgCode.trim());
144 if (tdwgArea != null){
145 tdwgAreas.add(tdwgArea);
146 }
147 }
148 }
149 ReferenceBase<?> sourceRef = state.getConfig().getSourceReference();
150
151 TaxonDescription taxonDescription = getTaxonDescription(newTaxonId, oldTaxonId, oldDescription, taxonMap, occurrenceId, sourceRef);
152 if (tdwgAreas.size() == 0){
153 logger.warn("No areas defined for occurrence " + occurrenceId);
154 }
155 for (NamedArea tdwgArea : tdwgAreas){
156 Distribution distribution = Distribution.NewInstance(tdwgArea, status);
157 if (status == null){
158 AnnotationType annotationType = AnnotationType.EDITORIAL();
159 Annotation annotation = Annotation.NewInstance(alternativeStatusString, annotationType, null);
160 distribution.addAnnotation(annotation);
161 distribution.addMarker(Marker.NewInstance(MarkerType.PUBLISH(), false));
162 }
163
164 if (taxonDescription != null) {
165 Distribution duplicate = checkIsNoDuplicate(taxonDescription, distribution, duplicateMap , occurrenceId);
166 if (duplicate == null){
167 taxonDescription.addElement(distribution);
168 distribution.addSource(String.valueOf(occurrenceId), NAMESPACE, state.getConfig().getSourceReference(), null);
169 countDistributions++;
170 if (taxonDescription != oldDescription){
171 taxaToSave.add(taxonDescription.getTaxon());
172 oldDescription = taxonDescription;
173 countDescriptions++;
174 }
175 }else{
176 countDuplicates++;
177 duplicate.addSource(String.valueOf(occurrenceId), NAMESPACE, state.getConfig().getSourceReference(), null);
178 logger.info("Distribution is duplicate"); }
179 } else {
180 logger.warn("Distribution " + tdwgArea.getLabel() + " ignored. OccurrenceId = " + occurrenceId);
181 success = false;
182 }
183 }
184
185 } catch (UnknownCdmTypeException e) {
186 logger.error("Unknown presenceAbsence status id: " + emStatusId);
187 e.printStackTrace();
188 success = false;
189 }
190
191 }
192
193 logger.info("Distributions: " + countDistributions + ", Descriptions: " + countDescriptions );
194 logger.info("Duplicate occurrences: " + (countDuplicates));
195
196 logger.info("Taxa to save: " + taxaToSave.size());
197 getTaxonService().save(taxaToSave);
198
199 return success;
200 } catch (SQLException e) {
201 logger.error("SQLException:" + e);
202 return false;
203 }
204 }
205
206
207
208
209 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
210 String nameSpace;
211 Class cdmClass;
212 Set<String> idSet;
213 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
214
215 try{
216 Set<String> taxonIdSet = new HashSet<String>();
217 while (rs.next()){
218 handleForeignKey(rs, taxonIdSet, "taxonId");
219 }
220
221
222 nameSpace = BerlinModelTaxonImport.NAMESPACE;
223 cdmClass = TaxonBase.class;
224 idSet = taxonIdSet;
225 Map<String, TaxonBase> objectMap = (Map<String, TaxonBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
226 result.put(nameSpace, objectMap);
227
228 } catch (SQLException e) {
229 throw new RuntimeException(e);
230 }
231 return result;
232 }
233
234
235
236
237
238
239
240
241
242
243 private Distribution checkIsNoDuplicate(TaxonDescription description, Distribution distribution, Map<Integer, String> duplicateMap, Integer bmDistributionId){
244 for (DescriptionElementBase descElBase : description.getElements()){
245 if (descElBase.isInstanceOf(Distribution.class)){
246 Distribution oldDistr = HibernateProxyHelper.deproxy(descElBase, Distribution.class);
247 NamedArea oldArea = oldDistr.getArea();
248 if (oldArea != null && oldArea.equals(distribution.getArea())){
249 PresenceAbsenceTermBase<?> oldStatus = oldDistr.getStatus();
250 if (oldStatus != null && oldStatus.equals(distribution.getStatus())){
251 duplicateMap.put(bmDistributionId, oldDistr.getSources().iterator().next().getIdInSource());
252 return oldDistr;
253 }
254 }
255 }
256 }
257 return null;
258 }
259
260
261
262
263
264
265
266
267
268 private TaxonDescription getTaxonDescription(int newTaxonId, int oldTaxonId, TaxonDescription oldDescription, Map<String, TaxonBase> taxonMap, int occurrenceId, ReferenceBase<?> sourceSec){
269 TaxonDescription result = null;
270 if (oldDescription == null || newTaxonId != oldTaxonId){
271 TaxonBase taxonBase = taxonMap.get(String.valueOf(newTaxonId));
272
273
274 Taxon taxon;
275 if ( taxonBase instanceof Taxon ) {
276 taxon = (Taxon) taxonBase;
277 } else if (taxonBase != null) {
278 logger.warn("TaxonBase for Occurrence " + occurrenceId + " was not of type Taxon but: " + taxonBase.getClass().getSimpleName());
279 return null;
280 } else {
281 logger.warn("TaxonBase for Occurrence " + occurrenceId + " is null.");
282 return null;
283 }
284 Set<TaxonDescription> descriptionSet= taxon.getDescriptions();
285 if (descriptionSet.size() > 0) {
286 result = descriptionSet.iterator().next();
287 }else{
288 result = TaxonDescription.NewInstance();
289 result.setTitleCache(sourceSec.getTitleCache(), true);
290 taxon.addDescription(result);
291 }
292 }else{
293 result = oldDescription;
294 }
295 return result;
296 }
297
298
299
300
301
302 @Override
303 protected boolean doCheck(BerlinModelImportState state){
304 IOValidator<BerlinModelImportState> validator = new BerlinModelOccurrenceImportValidator();
305 return validator.validate(state);
306 }
307
308
309
310
311 @Override
312 protected String getTableName() {
313 return dbTableName;
314 }
315
316
317
318
319 @Override
320 public String getPluralString() {
321 return pluralString;
322 }
323
324
325
326
327 protected boolean isIgnore(BerlinModelImportState state){
328 return ! state.getConfig().isDoOccurrence();
329 }
330
331 }