1
2
3
4
5
6
7
8
9
10 package eu.etaxonomy.cdm.io.eflora;
11
12 import java.util.ArrayList;
13 import java.util.HashMap;
14 import java.util.HashSet;
15 import java.util.List;
16 import java.util.Map;
17 import java.util.Set;
18 import java.util.UUID;
19 import java.util.regex.Matcher;
20 import java.util.regex.Pattern;
21
22 import org.apache.commons.lang.CharUtils;
23 import org.apache.commons.lang.StringUtils;
24 import org.apache.log4j.Logger;
25 import org.jdom.Attribute;
26 import org.jdom.Element;
27 import org.springframework.stereotype.Component;
28 import org.springframework.transaction.TransactionStatus;
29
30 import eu.etaxonomy.cdm.common.CdmUtils;
31 import eu.etaxonomy.cdm.common.ResultWrapper;
32 import eu.etaxonomy.cdm.common.XmlHelp;
33 import eu.etaxonomy.cdm.io.common.ICdmIO;
34 import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
35 import eu.etaxonomy.cdm.io.eflora.UnmatchedLeads.UnmatchedLeadsKey;
36 import eu.etaxonomy.cdm.model.agent.Person;
37 import eu.etaxonomy.cdm.model.agent.Team;
38 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
39 import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
40 import eu.etaxonomy.cdm.model.common.Annotation;
41 import eu.etaxonomy.cdm.model.common.AnnotationType;
42 import eu.etaxonomy.cdm.model.common.CdmBase;
43 import eu.etaxonomy.cdm.model.common.Credit;
44 import eu.etaxonomy.cdm.model.common.ExtensionType;
45 import eu.etaxonomy.cdm.model.common.ISourceable;
46 import eu.etaxonomy.cdm.model.common.Language;
47 import eu.etaxonomy.cdm.model.common.Marker;
48 import eu.etaxonomy.cdm.model.common.MarkerType;
49 import eu.etaxonomy.cdm.model.common.Representation;
50 import eu.etaxonomy.cdm.model.common.TimePeriod;
51 import eu.etaxonomy.cdm.model.description.CommonTaxonName;
52 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
53 import eu.etaxonomy.cdm.model.description.Feature;
54 import eu.etaxonomy.cdm.model.description.FeatureNode;
55 import eu.etaxonomy.cdm.model.description.PolytomousKey;
56 import eu.etaxonomy.cdm.model.description.TaxonDescription;
57 import eu.etaxonomy.cdm.model.description.TextData;
58 import eu.etaxonomy.cdm.model.name.BotanicalName;
59 import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
60 import eu.etaxonomy.cdm.model.name.NameRelationshipType;
61 import eu.etaxonomy.cdm.model.name.NameTypeDesignation;
62 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
63 import eu.etaxonomy.cdm.model.name.NonViralName;
64 import eu.etaxonomy.cdm.model.name.Rank;
65 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
66 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignationStatus;
67 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
68 import eu.etaxonomy.cdm.model.name.TypeDesignationBase;
69 import eu.etaxonomy.cdm.model.occurrence.Specimen;
70 import eu.etaxonomy.cdm.model.reference.IBook;
71 import eu.etaxonomy.cdm.model.reference.IJournal;
72 import eu.etaxonomy.cdm.model.reference.ReferenceBase;
73 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
74 import eu.etaxonomy.cdm.model.reference.ReferenceType;
75 import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
76 import eu.etaxonomy.cdm.model.taxon.Taxon;
77 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
78 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
79 import eu.etaxonomy.cdm.model.taxon.TaxonomicTree;
80 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
81 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
82
83
84
85
86
87
88 @Component
89 public class EfloraTaxonImport extends EfloraImportBase implements ICdmIO<EfloraImportState> {
90 private static final Logger logger = Logger.getLogger(EfloraTaxonImport.class);
91
92 private static int modCount = 30000;
93 private NonViralNameParserImpl parser = new NonViralNameParserImpl();
94
95 public EfloraTaxonImport(){
96 super();
97 }
98
99
100 @Override
101 public boolean doCheck(EfloraImportState state){
102 boolean result = true;
103 return result;
104 }
105
106
107 private UnmatchedLeads unmatchedLeads;
108
109 @Override
110 public boolean doInvoke(EfloraImportState state){
111 logger.info("start make Taxa ...");
112
113
114 state.putTree(null, null);
115
116 if (unmatchedLeads == null){
117 unmatchedLeads = UnmatchedLeads.NewInstance();
118 }
119 state.setUnmatchedLeads(unmatchedLeads);
120
121 TransactionStatus tx = startTransaction();
122 unmatchedLeads.saveToSession(getFeatureTreeService());
123
124
125
126 ReferenceBase sourceReference = state.getConfig().getSourceReference();
127 getReferenceService().saveOrUpdate(sourceReference);
128
129 Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>();
130 ResultWrapper<Boolean> success = ResultWrapper.NewInstance(true);
131
132 Element elbody= getBodyElement(state.getConfig());
133 List<Element> elTaxonList = elbody.getChildren();
134
135 int i = 0;
136
137 Set<String> unhandledTitleClassess = new HashSet<String>();
138 Set<String> unhandledNomeclatureChildren = new HashSet<String>();
139 Set<String> unhandledDescriptionChildren = new HashSet<String>();
140
141 Taxon lastTaxon = getLastTaxon(state);
142
143
144 for (Element elTaxon : elTaxonList){
145 try {
146 if ((i++ % modCount) == 0 && i > 1){ logger.info("Taxa handled: " + (i-1));}
147 if (! elTaxon.getName().equalsIgnoreCase("taxon")){
148 logger.warn("body has element other than 'taxon'");
149 }
150
151 BotanicalName botanicalName = BotanicalName.NewInstance(Rank.SPECIES());
152 Taxon taxon = Taxon.NewInstance(botanicalName, state.getConfig().getSourceReference());
153
154 handleTaxonAttributes(elTaxon, taxon, state);
155
156
157 List<Element> children = elTaxon.getChildren();
158 handleTaxonElement(state, unhandledTitleClassess, unhandledNomeclatureChildren, unhandledDescriptionChildren, taxon, children);
159 handleTaxonRelation(state, taxon, lastTaxon);
160 lastTaxon = taxon;
161 taxaToSave.add(taxon);
162 state.getConfig().setLastTaxonUuid(lastTaxon.getUuid());
163
164 } catch (Exception e) {
165 logger.warn("Exception occurred in Sapindacea taxon import: " + e);
166 e.printStackTrace();
167 }
168
169 }
170
171 System.out.println(state.getUnmatchedLeads().toString());
172 logger.warn("There are taxa with attributes 'excluded' and 'dubious'");
173
174 logger.info("Children for nomenclature are: " + unhandledNomeclatureChildren);
175 logger.info("Children for description are: " + unhandledDescriptionChildren);
176 logger.info("Children for homotypes are: " + unhandledHomotypeChildren);
177 logger.info("Children for nom are: " + unhandledNomChildren);
178
179
180
181 logger.info(i + " taxa handled. Saving ...");
182 getTaxonService().saveOrUpdate(taxaToSave);
183 getFeatureTreeService().saveOrUpdateFeatureNodesAll(state.getFeatureNodesToSave());
184 state.getFeatureNodesToSave().clear();
185 commitTransaction(tx);
186
187 logger.info("end makeTaxa ...");
188 logger.info("start makeKey ...");
189
190 logger.info("end makeKey ...");
191
192 return success.getValue();
193 }
194
195
196 private void handleTaxonAttributes(Element elTaxon, Taxon taxon, EfloraImportState state) {
197 List<Attribute> attrList = elTaxon.getAttributes();
198 for (Attribute attr : attrList){
199 String attrName = attr.getName();
200 String attrValue = attr.getValue();
201 if ("class".equals(attrName)){
202 if (attrValue.equalsIgnoreCase("dubious") || attrValue.equalsIgnoreCase("DUBIOUS GENUS") || attrValue.equalsIgnoreCase("DOUBTFUL SPECIES") ){
203 taxon.setDoubtful(true);
204 }else{
205 MarkerType markerType = getMarkerType(state, attrValue);
206 if (markerType == null){
207 logger.warn("Class attribute value for taxon not yet supported: " + attrValue);
208 }else{
209 taxon.addMarker(Marker.NewInstance(markerType, true));
210 }
211 }
212 }else if ("num".equals(attrName)){
213 logger.warn("num not yet supported");
214 }else{
215 logger.warn("Attribute " + attrName + " not yet supported for element taxon");
216 }
217 }
218
219 }
220
221
222 private Taxon getLastTaxon(EfloraImportState state) {
223 if (state.getConfig().getLastTaxonUuid() == null){
224 return null;
225 }else{
226 return (Taxon)getTaxonService().find(state.getConfig().getLastTaxonUuid());
227 }
228 }
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410 private void handleKeyChoices(EfloraImportState state, UnmatchedLeads openKeys, PolytomousKey key, Element elKeychoice, Taxon taxon) {
411
412
413 Feature feature = handleKeychoiceChar(state, elKeychoice);
414
415
416 List<FeatureNode> childNodes = handleKeychoiceLeads(state, key, elKeychoice, taxon, feature);
417
418
419 handleKeychoiceNum(openKeys, key, elKeychoice, childNodes);
420
421
422 verifyNoAttribute(elKeychoice);
423 }
424
425
426
427
428
429
430
431
432 private void handleKeychoiceNum(UnmatchedLeads openKeys, PolytomousKey key, Element elKeychoice, List<FeatureNode> childNodes) {
433 Attribute numAttr = elKeychoice.getAttribute("num");
434 String num = CdmUtils.removeTrailingDot(numAttr == null? "":numAttr.getValue());
435 UnmatchedLeadsKey okk = UnmatchedLeadsKey.NewInstance(key, num);
436 Set<FeatureNode> matchingNodes = openKeys.getNodes(okk);
437 for (FeatureNode matchingNode : matchingNodes){
438 for (FeatureNode childNode : childNodes){
439 matchingNode.addChild(childNode);
440 }
441 openKeys.removeNode(okk, matchingNode);
442 }
443 if (matchingNodes.isEmpty()){
444 for (FeatureNode childNode : childNodes){
445 key.getRoot().addChild(childNode);
446 }
447 }
448
449 elKeychoice.removeAttribute("num");
450 }
451
452
453
454
455
456
457
458
459
460
461 private List<FeatureNode> handleKeychoiceLeads( EfloraImportState state, PolytomousKey key, Element elKeychoice, Taxon taxon, Feature feature) {
462 List<FeatureNode> childNodes = new ArrayList<FeatureNode>();
463 List<Element> leads = elKeychoice.getChildren("lead");
464 for(Element elLead : leads){
465 FeatureNode childNode = handleLead(state, key, elLead, taxon, feature);
466 childNodes.add(childNode);
467 }
468 return childNodes;
469 }
470
471
472
473
474
475
476
477 private Feature handleKeychoiceChar(EfloraImportState state, Element elKeychoice) {
478 Feature feature = null;
479 Attribute charAttr = elKeychoice.getAttribute("char");
480 if (charAttr != null){
481 String charStr = charAttr.getValue();
482 feature = getFeature(charStr, state);
483 elKeychoice.removeAttribute("char");
484 }
485 return feature;
486 }
487
488
489 private FeatureNode handleLead(EfloraImportState state, PolytomousKey key, Element elLead, Taxon taxon, Feature feature) {
490 FeatureNode node = FeatureNode.NewInstance();
491 node.setFeature(feature);
492
493
494 String text = handleLeadText(elLead, node);
495
496
497 handleLeadNum(elLead, text);
498
499
500 handleLeadGoto(state, key, elLead, taxon, node);
501
502
503 verifyNoAttribute(elLead);
504
505 return node;
506 }
507
508
509
510
511
512
513
514 private String handleLeadText(Element elLead, FeatureNode node) {
515 String text = elLead.getAttributeValue("text").trim();
516 if (StringUtils.isBlank(text)){
517 logger.warn("Empty text in lead");
518 }
519 elLead.removeAttribute("text");
520 node.addQuestion(Representation.NewInstance(text, null, null, Language.DEFAULT()));
521 return text;
522 }
523
524
525
526
527
528
529
530
531
532 private void handleLeadGoto(EfloraImportState state, PolytomousKey key, Element elLead, Taxon taxon, FeatureNode node) {
533 Attribute gotoAttr = elLead.getAttribute("goto");
534 if (gotoAttr != null){
535 String strGoto = gotoAttr.getValue().trim();
536
537 UnmatchedLeadsKey gotoKey = null;
538 if (isInternalNode(strGoto)){
539 gotoKey = UnmatchedLeadsKey.NewInstance(key, strGoto);
540 }else{
541 String taxonKey = makeTaxonKey(strGoto, taxon);
542 gotoKey = UnmatchedLeadsKey.NewInstance(taxonKey);
543 }
544
545 UnmatchedLeads openKeys = state.getUnmatchedLeads();
546 openKeys.addKey(gotoKey, node);
547 if (gotoKey.isInnerLead()){
548 Set<FeatureNode> existingNodes = openKeys.getNodes(gotoKey);
549 for (FeatureNode existingNode : existingNodes){
550 node.addChild(existingNode);
551 }
552 }
553
554 elLead.removeAttribute("goto");
555 }else{
556 logger.warn("lead has no goto attribute");
557 }
558 }
559
560
561
562
563
564
565 private void handleLeadNum(Element elLead, String text) {
566 Attribute numAttr = elLead.getAttribute("num");
567 if (numAttr != null){
568
569 String num = numAttr.getValue();
570 elLead.removeAttribute("num");
571 }else{
572 logger.info("Keychoice has no num attribute: " + text);
573 }
574 }
575
576
577 private String makeTaxonKey(String strGoto, Taxon taxon) {
578 String result = "";
579 if (strGoto == null){
580 return "";
581 }
582 String strGenusName = CdmBase.deproxy(taxon.getName(), NonViralName.class).getGenusOrUninomial();
583 strGoto = strGoto.replaceAll("\\([^\\(\\)]*\\)", "");
584 strGoto = strGoto.replaceAll("\\s+", " ");
585
586 strGoto = strGoto.trim();
587 String[] split = strGoto.split("\\s");
588 for (int i = 0; i<split.length; i++){
589 String single = split[i];
590 if (isGenusAbbrev(single, strGenusName)){
591 split[i] = strGenusName;
592 }
593
594
595
596
597 result = (result + " " + split[i]).trim();
598 }
599 return result;
600 }
601
602
603 private boolean isInfraSpecificMarker(String single) {
604 try {
605 if (Rank.getRankByAbbreviation(single).isInfraSpecific()){
606 return true;
607 }
608 } catch (UnknownCdmTypeException e) {
609 return false;
610 }
611 return false;
612 }
613
614
615 private boolean isGenusAbbrev(String single, String strGenusName) {
616 if (! single.matches("[A-Z]\\.?")) {
617 return false;
618 }else if (single.length() == 0 || strGenusName == null || strGenusName.length() == 0){
619 return false;
620 }else{
621 return single.charAt(0) == strGenusName.charAt(0);
622 }
623 }
624
625
626 private boolean isInternalNode(String strGoto) {
627 return CdmUtils.isNumeric(strGoto);
628 }
629
630
631 private void makeKeyNotes(Element keyElement, PolytomousKey key) {
632 Element elNotes = keyElement.getChild("notes");
633 if (elNotes != null){
634 keyElement.removeContent(elNotes);
635 String notes = elNotes.getTextNormalize();
636 if (StringUtils.isNotBlank(notes)){
637 key.addAnnotation(Annotation.NewInstance(notes, AnnotationType.EDITORIAL(), Language.DEFAULT()));
638 }
639 }
640 }
641
642
643 private String makeKeyTitle(Element keyElement) {
644 String title = "- no title - ";
645 Attribute titleAttr = keyElement.getAttribute("title");
646 keyElement.removeAttribute(titleAttr);
647 if (titleAttr == null){
648 Element elTitle = keyElement.getChild("keytitle");
649 keyElement.removeContent(elTitle);
650 if (elTitle != null){
651 title = elTitle.getTextNormalize();
652 }
653 }else{
654 title = titleAttr.getValue();
655 }
656 return title;
657 }
658
659
660
661
662
663
664
665 private TextData handleChromosomes(EfloraImportState state, Element element, Taxon taxon) {
666 Feature chromosomeFeature = getFeature("chromosomes", state);
667 verifyNoAttribute(element);
668 verifyNoChildren(element);
669 String value = element.getTextNormalize();
670 value = replaceStart(value, "Chromosomes");
671 String chromosomesPart = getChromosomesPart(value);
672 String references = value.replace(chromosomesPart, "").trim();
673 chromosomesPart = chromosomesPart.replace(":", "").trim();
674 return addDescriptionElement(state, taxon, chromosomesPart, chromosomeFeature, references);
675 }
676
677
678
679
680
681
682
683 private void makeOriginalSourceReferences(ISourceable sourcable, String splitter, String refAll) {
684 String[] splits = refAll.split(splitter);
685 for (String strRef: splits){
686 ReferenceBase ref = ReferenceFactory.newGeneric();
687 ref.setTitleCache(strRef, true);
688 String refDetail = parseReferenceYearAndDetail(ref);
689 sourcable.addSource(null, null, ref, refDetail);
690 }
691
692
693
694
695
696
697
698
699
700
701
702 }
703
704
705
706
707
708
709 private String getChromosomesPart(String str) {
710 Pattern pattern = Pattern.compile("2n\\s*=\\s*\\d{1,2}:");
711 Matcher matcher = pattern.matcher(str);
712 if (matcher.find()){
713 return matcher.group(0);
714 }else{
715 logger.warn("Chromosomes could not be parsed: " + str);
716 }
717 return str;
718 }
719
720
721
722
723
724
725
726 private TextData handleTaxonNotes(EfloraImportState state, Element element, Taxon taxon) {
727 TextData result = null;
728 verifyNoChildren(element, true);
729
730 List<Attribute> attributes = element.getAttributes();
731 for (Attribute attribute : attributes){
732 if (! attribute.getName().equalsIgnoreCase("class")){
733 logger.warn("Char has unhandled attribute " + attribute.getName());
734 }else{
735 String classValue = attribute.getValue();
736 result = handleDescriptiveElement(state, element, taxon, classValue);
737 }
738 }
739
740 if (attributes.isEmpty()){
741 result = handleDescriptiveElement(state, element, taxon, "Note");
742 }
743
744
745
746 return result;
747 }
748
749
750
751
752
753
754
755
756
757
758 private TextData handleDescriptiveElement(EfloraImportState state, Element element, Taxon taxon, String classValue) {
759 TextData result = null;
760 Feature feature = getFeature(classValue, state);
761 if (feature == null){
762 logger.warn("Unhandled feature: " + classValue);
763 }else{
764 String value = element.getValue();
765 value = replaceStart(value, "Notes");
766 value = replaceStart(value, "Note");
767 result = addDescriptionElement(state, taxon, value, feature, null);
768 }
769 return result;
770 }
771
772
773 private void removeBr(Element element) {
774 element.removeChildren("Br");
775 element.removeChildren("br");
776 element.removeChildren("BR");
777 }
778
779
780
781
782
783
784
785 private TextData handleUses(EfloraImportState state, Element element, Taxon taxon) {
786 verifyNoAttribute(element);
787 verifyNoChildren(element, true);
788 String value = element.getTextNormalize();
789 value = replaceStart(value, "Uses");
790 Feature feature = Feature.USES();
791 return addDescriptionElement(state, taxon, value, feature, null);
792
793 }
794
795
796
797
798
799
800
801
802 private DescriptionElementBase handleDistribution(EfloraImportState state, Element element, Taxon taxon) {
803 verifyNoAttribute(element);
804 verifyNoChildren(element, true);
805 String value = element.getTextNormalize();
806 value = replaceStart(value, "Distribution");
807 Feature feature = Feature.DISTRIBUTION();
808
809 return addDescriptionElement(state, taxon, value, feature, null);
810 }
811
812
813
814
815
816
817
818
819 private TextData handleEcology(EfloraImportState state, Element elEcology, Taxon taxon) {
820 verifyNoAttribute(elEcology);
821 verifyNoChildren(elEcology, true);
822 String value = elEcology.getTextNormalize();
823 Feature feature = Feature.ECOLOGY();
824 if (value.startsWith("Habitat & Ecology")){
825 feature = getFeature("Habitat & Ecology", state);
826 value = replaceStart(value, "Habitat & Ecology");
827 }else if (value.startsWith("Habitat")){
828 value = replaceStart(value, "Habitat");
829 feature = getFeature("Habitat", state);
830 }
831 return addDescriptionElement(state, taxon, value, feature, null);
832 }
833
834
835
836
837
838
839
840 private String replaceStart(String value, String replacementString) {
841 if (value.startsWith(replacementString) ){
842 value = value.substring(replacementString.length()).trim();
843 }
844 while (value.startsWith("-") || value.startsWith("–") ){
845 value = value.substring("-".length()).trim();
846 }
847 return value;
848 }
849
850
851
852
853
854
855 protected String removeTrailing(String value, String replacementString) {
856 if (value == null){
857 return null;
858 }
859 if (value.endsWith(replacementString) ){
860 value = value.substring(0, value.length() - replacementString.length()).trim();
861 }
862 return value;
863 }
864
865
866
867
868
869
870
871 private void handleNomenclature(EfloraImportState state, Element elNomenclature, Taxon taxon, Set<String> unhandledChildren) {
872 verifyNoAttribute(elNomenclature);
873
874 List<Element> elements = elNomenclature.getChildren();
875 for (Element element : elements){
876 if (element.getName().equals("homotypes")){
877 handleHomotypes(state, element, taxon);
878 }else if (element.getName().equals("notes")){
879 handleNomenclatureNotes(state, element, taxon);
880 }else{
881 unhandledChildren.add(element.getName());
882 }
883 }
884
885 }
886
887
888
889 private void handleNomenclatureNotes(EfloraImportState state, Element elNotes, Taxon taxon) {
890 verifyNoAttribute(elNotes);
891 verifyNoChildren(elNotes);
892 String notesText = elNotes.getTextNormalize();
893 Annotation annotation = Annotation.NewInstance(notesText, AnnotationType.EDITORIAL(), Language.DEFAULT());
894 taxon.addAnnotation(annotation);
895 }
896
897
898
899 private static Set<String> unhandledHomotypeChildren = new HashSet<String>();
900
901
902
903
904
905 private void handleHomotypes(EfloraImportState state, Element elHomotypes, Taxon taxon) {
906 verifyNoAttribute(elHomotypes);
907
908 List<Element> elements = elHomotypes.getChildren();
909 HomotypicalGroup homotypicalGroup = null;
910 for (Element element : elements){
911 if (element.getName().equals("nom")){
912 homotypicalGroup = handleNom(state, element, taxon, homotypicalGroup);
913 }else{
914 unhandledHomotypeChildren.add(element.getName());
915 }
916 }
917
918 }
919
920 private static Set<String> unhandledNomChildren = new HashSet<String>();
921
922
923
924
925
926
927 private HomotypicalGroup handleNom(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
928 List<Attribute> attributes = elNom.getAttributes();
929
930 boolean taxonBaseClassType = false;
931 for (Attribute attribute : attributes){
932 if (! attribute.getName().equalsIgnoreCase("class")){
933 logger.warn("Nom has unhandled attribute " + attribute.getName());
934 }else{
935 String classValue = attribute.getValue();
936 if (classValue.equalsIgnoreCase("acceptedname")){
937 homotypicalGroup = handleNomTaxon(state, elNom, taxon,homotypicalGroup, false);
938 taxonBaseClassType = true;
939 }else if (classValue.equalsIgnoreCase("synonym")){
940 homotypicalGroup = handleNomTaxon(state, elNom, taxon, homotypicalGroup, true);
941 taxonBaseClassType = true;
942 }else if (classValue.equalsIgnoreCase("typeref")){
943 handleTypeRef(state, elNom, taxon, homotypicalGroup);
944 }else{
945 logger.warn("Unhandled class value for nom: " + classValue);
946 }
947
948 }
949 }
950
951 List<Element> elements = elNom.getChildren();
952 for (Element element : elements){
953 if (element.getName().equals("name") || element.getName().equals("homonym") ){
954 if (taxonBaseClassType == false){
955 logger.warn("Name or homonym tag not allowed in non taxon nom tag");
956 }
957 }else{
958 unhandledNomChildren.add(element.getName());
959 }
960 }
961
962 return homotypicalGroup;
963
964 }
965
966
967
968
969
970
971
972 protected void handleTypeRef(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
973 verifyNoChildren(elNom);
974 String typeRef = elNom.getTextNormalize();
975 typeRef = removeStartingTypeRefMinus(typeRef);
976
977 String[] split = typeRef.split(":");
978 if (split.length < 2){
979 logger.warn("typeRef has no ':' : " + typeRef);
980 }else if (split.length > 2){
981 logger.warn("typeRef has more than 1 ':' : " + typeRef);
982 }else{
983 StringBuffer typeType = new StringBuffer(split[0]);
984 String typeText = split[1].trim();
985 TypeDesignationBase typeDesignation = getTypeDesignationAndReference(typeType);
986
987
988 if (typeDesignation instanceof NameTypeDesignation){
989 makeNameTypeDesignations(typeType, typeText, typeDesignation);
990 }
991
992 else if (typeDesignation instanceof SpecimenTypeDesignation){
993 makeSpecimenTypeDesignation(typeType, typeText, typeDesignation);
994 }else{
995 logger.error("Unhandled type designation class" + typeDesignation.getClass().getName());
996 }
997 for (TaxonNameBase name : homotypicalGroup.getTypifiedNames()){
998 name.addTypeDesignation(typeDesignation, true);
999 }
1000 }
1001 }
1002
1003
1004
1005
1006
1007
1008 protected String removeStartingTypeRefMinus(String typeRef) {
1009 typeRef = replaceStart(typeRef, "-");
1010 typeRef = replaceStart(typeRef, "—");
1011 typeRef = replaceStart(typeRef, "\u002d");
1012 typeRef = replaceStart(typeRef, "\u2013");
1013 typeRef = replaceStart(typeRef, "--");
1014 return typeRef;
1015 }
1016
1017
1018
1019
1020
1021
1022 private void makeNameTypeDesignations(StringBuffer typeType, String typeText, TypeDesignationBase typeDesignation) {
1023 if (typeType.toString().trim().equalsIgnoreCase("Type")){
1024
1025 }else if (typeType.toString().trim().equalsIgnoreCase("Lectotype")){
1026 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.LECTOTYPE());
1027 }else if (typeType.toString().trim().equalsIgnoreCase("Syntype")){
1028 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.SYNTYPE());
1029 }else{
1030 logger.warn("Unhandled type string: " + typeType + "(" + CharUtils.unicodeEscaped(typeType.charAt(0)) + ")");
1031 }
1032
1033 typeText = cleanNameType(typeText);
1034
1035 BotanicalName nameType = (BotanicalName)parser.parseFullName(typeText, NomenclaturalCode.ICBN, Rank.SPECIES());
1036 ((NameTypeDesignation) typeDesignation).setTypeName(nameType);
1037
1038 }
1039
1040
1041 private String cleanNameType(String typeText) {
1042 String result;
1043 String[] split = typeText.split("\\[.*\\].?");
1044 result = split[0];
1045 return result;
1046 }
1047
1048
1049
1050
1051
1052
1053
1054 protected void makeSpecimenTypeDesignation(StringBuffer typeType, String typeText, TypeDesignationBase typeDesignation) {
1055 if (typeType.toString().trim().equalsIgnoreCase("Type")){
1056
1057 }else if (typeType.toString().trim().equalsIgnoreCase("Neotype") || typeType.toString().trim().equalsIgnoreCase("Neotypes")){
1058 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.NEOTYPE());
1059 }else if (typeType.toString().trim().equalsIgnoreCase("Syntype") || typeType.toString().trim().equalsIgnoreCase("Syntypes")){
1060 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.SYNTYPE());
1061 }else if (typeType.toString().trim().equalsIgnoreCase("Lectotype")){
1062 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.LECTOTYPE());
1063 }else if (typeType.toString().trim().equalsIgnoreCase("Paratype")){
1064 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.PARATYPE());
1065 }else{
1066 logger.warn("Unhandled type string: " + typeType);
1067 }
1068 Specimen specimen = Specimen.NewInstance();
1069 if (typeText.length() > 255){
1070 specimen.setTitleCache(typeText.substring(0, 252) + "...", true);
1071 }else{
1072 specimen.setTitleCache(typeText, true);
1073 }
1074 specimen.addDefinition(typeText, Language.ENGLISH());
1075 ((SpecimenTypeDesignation) typeDesignation).setTypeSpecimen(specimen);
1076 }
1077
1078 private TypeDesignationBase getTypeDesignationAndReference(StringBuffer typeType) {
1079 TypeDesignationBase result;
1080 ReferenceBase ref = parseTypeDesignationReference(typeType);
1081 if (typeType.indexOf(" species")>-1 || typeType.indexOf("genus")>-1){
1082 if (typeType.indexOf(" species")>-1 ){
1083 result = NameTypeDesignation.NewInstance();
1084 int start = typeType.indexOf(" species");
1085 typeType.replace(start, start + " species".length(), "");
1086 }else {
1087 result = NameTypeDesignation.NewInstance();
1088 int start = typeType.indexOf(" genus");
1089 typeType.replace(start, start + " genus".length(), "");
1090 }
1091 }else{
1092 result = SpecimenTypeDesignation.NewInstance();
1093 }
1094 result.setCitation(ref);
1095 return result;
1096 }
1097
1098
1099 private ReferenceBase parseTypeDesignationReference(StringBuffer typeType) {
1100 ReferenceBase result = null;
1101 String reBracketReference = "\\(.*\\)";
1102 Pattern patBracketReference = Pattern.compile(reBracketReference);
1103 Matcher matcher = patBracketReference.matcher(typeType);
1104 if (matcher.find()){
1105 String refString = matcher.group();
1106 int start = typeType.indexOf(refString);
1107 typeType.replace(start, start + refString.length(), "");
1108 refString = refString.replace("(", "").replace(")", "").trim();
1109 ReferenceBase ref = ReferenceFactory.newGeneric();
1110 ref.setTitleCache(refString, true);
1111 result = ref;
1112 }
1113 return result;
1114 }
1115
1116
1117
1118
1119
1120
1121
1122
1123 private HomotypicalGroup handleNomTaxon(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup, boolean isSynonym) {
1124 NonViralName name = makeName(taxon, homotypicalGroup, isSynonym);
1125 String num = null;
1126
1127 boolean hasGenusInfo = false;
1128 TeamOrPersonBase lastTeam = null;
1129
1130
1131 List<Element> elGenus = XmlHelp.getAttributedChildListWithValue(elNom, "name", "class", "genus");
1132 if (elGenus.size() > 0){
1133 hasGenusInfo = true;
1134 }else{
1135 logger.debug ("No Synonym Genus");
1136 }
1137
1138 List<Element> elInfraRank = XmlHelp.getAttributedChildListWithValue(elNom, "name", "class", "infrank");
1139 Rank infraRank = null;
1140 infraRank = handleInfRank(name, elInfraRank, infraRank);
1141
1142
1143 List<Element> elements = elNom.getChildren();
1144 elements.removeAll(elInfraRank);
1145
1146 for (Element element : elements){
1147 if (element.getName().equals("name")){
1148 String classValue = element.getAttributeValue("class");
1149 String value = element.getValue().trim();
1150 if (classValue.equalsIgnoreCase("genus") || classValue.equalsIgnoreCase("family") ){
1151 name.setGenusOrUninomial(value);
1152 }else if (classValue.equalsIgnoreCase("family") ){
1153 name.setGenusOrUninomial(value);
1154 name.setRank(Rank.FAMILY());
1155 }else if (classValue.equalsIgnoreCase("subgenus")){
1156
1157 name.setNameCache(value.replace(":", "").trim());
1158 name.setRank(Rank.SUBGENUS());
1159 }else if (classValue.equalsIgnoreCase("epithet") ){
1160 if (hasGenusInfo == true){
1161 name.setSpecificEpithet(value);
1162 }else{
1163 handleInfraspecificEpithet(element, classValue, name);
1164 }
1165 }else if (classValue.equalsIgnoreCase("author")){
1166 handleNameAuthors(element, name);
1167 }else if (classValue.equalsIgnoreCase("paraut")){
1168 handleBasionymAuthor(state, element, name, false);
1169 }else if (classValue.equalsIgnoreCase("infrauthor") || classValue.equalsIgnoreCase("infraut")){
1170 handleInfrAuthor(state, element, name, true);
1171 }else if (classValue.equalsIgnoreCase("infrapar") || classValue.equalsIgnoreCase("infrpar") || classValue.equalsIgnoreCase("parauthor") ){
1172 handleBasionymAuthor(state, element, name, true);
1173 }else if (classValue.equalsIgnoreCase("infrepi")){
1174 handleInfrEpi(name, infraRank, value);
1175 }else if (classValue.equalsIgnoreCase("pub")){
1176 lastTeam = handleNomenclaturalReference(name, value);
1177 }else if (classValue.equalsIgnoreCase("usage")){
1178 lastTeam = handleNameUsage(taxon, name, value, lastTeam);
1179 }else if (classValue.equalsIgnoreCase("note")){
1180 handleNameNote(name, value);
1181 }else if (classValue.equalsIgnoreCase("num")){
1182 if (num != null){
1183 logger.warn("Duplicate num: " + value);
1184 }else{
1185 num = value;
1186 }
1187 if (isSynonym == true){
1188 logger.warn("Synonym should not have a num");
1189 }
1190 }else if (classValue.equalsIgnoreCase("typification")){
1191 logger.warn("Typification should not be a nom class");
1192 }else{
1193 logger.warn("Unhandled name class: " + classValue);
1194 }
1195 }else if(element.getName().equals("homonym")){
1196 handleHomonym(state, element, name);
1197 }else{
1198
1199 unhandledNomChildren.add(element.getName());
1200 }
1201 }
1202
1203
1204 if (! isSynonym){
1205 String taxonString = name.getNameCache();
1206
1207 UnmatchedLeadsKey leadsKey = UnmatchedLeadsKey.NewInstance(num, taxonString);
1208 Set<FeatureNode> matchingNodes = handleMatchingNodes(state, taxon, leadsKey);
1209
1210 if (num != null){
1211 UnmatchedLeadsKey noNumLeadsKey = UnmatchedLeadsKey.NewInstance("", taxonString);
1212 handleMatchingNodes(state, taxon, noNumLeadsKey);
1213 }
1214 if (matchingNodes.isEmpty() && num != null){
1215 logger.warn("Taxon has num but no matching nodes exist: " + num+ ", Key: " + leadsKey.toString());
1216 }
1217 }
1218
1219
1220 if (StringUtils.isNotBlank(elNom.getTextNormalize().replace("—", "").replace("\u002d","").replace("\u2013", ""))){
1221 String strElNom = elNom.getTextNormalize();
1222 if ("?".equals(strElNom)){
1223 handleQuestionMark(name, taxon);
1224 }
1225
1226
1227 logger.warn("Nom tag has text: " + strElNom);
1228 }
1229
1230 return name.getHomotypicalGroup();
1231 }
1232
1233
1234 private void handleQuestionMark(NonViralName name, Taxon taxon) {
1235 int count = name.getTaxonBases().size();
1236 if (count != 1){
1237 logger.warn("Name has " + count + " taxa. This is not handled for question mark");
1238 }else{
1239 TaxonBase taxonBase = (TaxonBase)name.getTaxonBases().iterator().next();
1240 taxonBase.setDoubtful(true);
1241 }
1242 }
1243
1244
1245
1246 private void handleHomonym(EfloraImportState state, Element elHomonym, NonViralName upperName) {
1247 verifyNoAttribute(elHomonym);
1248
1249
1250 BotanicalName homonymName = BotanicalName.NewInstance(upperName.getRank());
1251 homonymName.setGenusOrUninomial(upperName.getGenusOrUninomial());
1252 homonymName.setInfraGenericEpithet(upperName.getInfraGenericEpithet());
1253 homonymName.setSpecificEpithet(upperName.getSpecificEpithet());
1254 homonymName.setInfraSpecificEpithet(upperName.getInfraSpecificEpithet());
1255
1256 for (Element elName : (List<Element>)elHomonym.getChildren("name")){
1257 String classValue = elName.getAttributeValue("class");
1258 String value = elName.getValue().trim();
1259 if (classValue.equalsIgnoreCase("genus") ){
1260 homonymName.setGenusOrUninomial(value);
1261 }else if (classValue.equalsIgnoreCase("epithet") ){
1262 homonymName.setSpecificEpithet(value);
1263 }else if (classValue.equalsIgnoreCase("author")){
1264 handleNameAuthors(elName, homonymName);
1265 }else if (classValue.equalsIgnoreCase("paraut")){
1266 handleBasionymAuthor(state, elName, homonymName, true);
1267 }else if (classValue.equalsIgnoreCase("pub")){
1268 handleNomenclaturalReference(homonymName, value);
1269 }else if (classValue.equalsIgnoreCase("note")){
1270 handleNameNote(homonymName, value);
1271 }else{
1272 logger.warn("Unhandled class value: " + classValue);
1273 }
1274 }
1275
1276
1277
1278
1279 boolean homonymIsLater = false;
1280 NameRelationshipType relType = NameRelationshipType.LATER_HOMONYM();
1281 if (upperName.getNomenclaturalReference() != null && homonymName.getNomenclaturalReference() != null){
1282 TimePeriod homonymYear = homonymName.getNomenclaturalReference().getDatePublished();
1283 TimePeriod nameYear = upperName.getNomenclaturalReference().getDatePublished();
1284 homonymIsLater = homonymYear.getStart().compareTo(nameYear.getStart()) > 0;
1285 }else{
1286 if (upperName.getNomenclaturalReference() == null){
1287 logger.warn("Homonym parent does not have a nomenclatural reference or year: " + upperName.getTitleCache());
1288 }
1289 if (homonymName.getNomenclaturalReference() == null){
1290 logger.warn("Homonym does not have a nomenclatural reference or year: " + homonymName.getTitleCache());
1291 }
1292 }
1293 if (homonymIsLater){
1294 homonymName.addRelationshipToName(upperName, relType, null);
1295 }else{
1296 upperName.addRelationshipToName(homonymName, relType, null);
1297 }
1298
1299 }
1300
1301
1302
1303
1304
1305
1306
1307
1308 private Set<FeatureNode> handleMatchingNodes(EfloraImportState state, Taxon taxon, UnmatchedLeadsKey leadsKey) {
1309 Set<FeatureNode> matchingNodes = state.getUnmatchedLeads().getNodes(leadsKey);
1310 for (FeatureNode matchingNode : matchingNodes){
1311 state.getUnmatchedLeads().removeNode(leadsKey, matchingNode);
1312 matchingNode.setTaxon(taxon);
1313 state.getFeatureNodesToSave().add(matchingNode);
1314 }
1315 return matchingNodes;
1316 }
1317
1318
1319 private void handleNameNote(NonViralName name, String value) {
1320 logger.warn("Name note: " + value + ". Available in portal?");
1321 Annotation annotation = Annotation.NewInstance(value, AnnotationType.EDITORIAL(), Language.DEFAULT());
1322 name.addAnnotation(annotation);
1323 }
1324
1325
1326
1327
1328
1329
1330
1331 protected TeamOrPersonBase handleNameUsage(Taxon taxon, NonViralName name, String referenceTitle, TeamOrPersonBase lastTeam) {
1332 ReferenceBase ref = ReferenceFactory.newGeneric();
1333 referenceTitle = removeStartingSymbols(referenceTitle, ref);
1334
1335 ref.setTitleCache(referenceTitle, true);
1336 String microReference = parseReferenceYearAndDetail(ref);
1337 TeamOrPersonBase team = getReferenceAuthor(ref);
1338 parseReferenceType(ref);
1339 if (team == null){
1340 team = lastTeam;
1341 }
1342 ref.setAuthorTeam(team);
1343
1344 TaxonDescription description = getDescription(taxon);
1345 TextData textData = TextData.NewInstance(Feature.CITATION());
1346 textData.addSource(null, null, ref, microReference, name, null);
1347 description.addElement(textData);
1348 return team;
1349 }
1350
1351
1352
1353
1354
1355
1356
1357 private String removeStartingSymbols(String referenceTitle, ReferenceBase ref) {
1358 if (referenceTitle.startsWith(";") || referenceTitle.startsWith(",") || referenceTitle.startsWith(":")){
1359 referenceTitle = referenceTitle.substring(1).trim();
1360 ref.setTitleCache(referenceTitle);
1361 }
1362 return referenceTitle;
1363 }
1364
1365
1366 private void parseReferenceType(ReferenceBase ref) {
1367 String title = ref.getTitle();
1368 if (title == null){
1369 return;
1370 }
1371 title = title.trim();
1372
1373 if (! title.startsWith("in ")){
1374 ref.setType(ReferenceType.Book);
1375 return;
1376 }
1377
1378 title = title.substring(3);
1379
1380
1381 if (title.indexOf(",") == -1){
1382 ref.setType(ReferenceType.Article);
1383 IJournal journal = ReferenceFactory.newJournal();
1384 journal.setTitle(title);
1385 ref.setTitle(null);
1386 ref.setInJournal(journal);
1387
1388 }else{
1389
1390 ref.setType(ReferenceType.BookSection);
1391 String[] split = (title).split(",\\s*[A-Z]");
1392 if (split.length <= 1){
1393 logger.warn("Can not fully decide what reference type. Guess it is a book section: " + title );
1394 }
1395 IBook book = ReferenceFactory.newBook();
1396 Team bookTeam = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1397 try {
1398 title = title.substring(split[0].length() + 1).trim();
1399 } catch (Exception e) {
1400 logger.error("ERROR occurred when trying to split title: " + title + "; split[0]: + " + split[0]);
1401 }
1402 book.setTitle(title);
1403 book.setAuthorTeam(bookTeam);
1404 book.setDatePublished(ref.getDatePublished());
1405 ref.setTitle(null);
1406 ref.setInBook(book);
1407 }
1408 }
1409
1410
1411 protected Team getReferenceAuthor (ReferenceBase ref) {
1412 boolean isCache = false;
1413 String referenceTitle = ref.getTitle();
1414 if (referenceTitle == null){
1415 isCache = true;
1416 referenceTitle = ref.getTitleCache();
1417 }
1418
1419 String[] split = (" " + referenceTitle).split(" in ");
1420 if (split.length > 1){
1421 if (StringUtils.isNotBlank(split[0])){
1422
1423 Team team = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1424 if (! isCache){
1425 ref.setTitle("in " + split[1]);
1426 }
1427 return team;
1428 }else{
1429
1430 return null;
1431 }
1432 }
1433
1434 split = referenceTitle.split(",");
1435 if (split.length < 2){
1436
1437 return null;
1438 }
1439
1440
1441 split = (referenceTitle).split(",\\s*[A-Z]");
1442 if (split.length > 1){
1443 Team team = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1444 if (! isCache){
1445 ref.setTitle(referenceTitle.substring(split[0].length()+1).trim());
1446 }
1447 return team;
1448 }else{
1449 logger.warn("Can't decide if a usage has an author: " + referenceTitle );
1450 return null;
1451 }
1452 }
1453
1454
1455
1456
1457
1458
1459
1460
1461 protected String parseHomonym(String detail, NonViralName name) {
1462 String result;
1463 if (detail == null){
1464 return detail;
1465 }
1466
1467
1468
1469 String reNon = "(\\s|,)non\\s";
1470 Pattern patReference = Pattern.compile(reNon);
1471 Matcher matcher = patReference.matcher(detail);
1472 if (matcher.find()){
1473 int start = matcher.start();
1474 int end = matcher.end();
1475
1476 if (detail != null){
1477 logger.warn("Unhandled non part: " + detail.substring(start));
1478 return detail;
1479 }
1480
1481 result = detail.substring(0, start);
1482
1483
1484 String homonymString = detail.substring(end);
1485
1486
1487 BotanicalName homonymName = BotanicalName.NewInstance(name.getRank());
1488 homonymName.setGenusOrUninomial(name.getGenusOrUninomial());
1489 homonymName.setInfraGenericEpithet(name.getInfraGenericEpithet());
1490 homonymName.setSpecificEpithet(name.getSpecificEpithet());
1491 homonymName.setInfraSpecificEpithet(name.getInfraSpecificEpithet());
1492 ReferenceBase homonymNomRef = ReferenceFactory.newGeneric();
1493 homonymNomRef.setTitleCache(homonymString);
1494 String homonymNomRefDetail = parseReferenceYearAndDetail(homonymNomRef);
1495 homonymName.setNomenclaturalMicroReference(homonymNomRefDetail);
1496 String authorTitle = homonymNomRef.getTitleCache();
1497 Team team = Team.NewTitledInstance(authorTitle, authorTitle);
1498 homonymNomRef.setAuthorTeam(team);
1499 homonymNomRef.setTitle("");
1500 homonymNomRef.setProtectedTitleCache(false);
1501
1502
1503 boolean homonymIsLater = false;
1504 NameRelationshipType relType = NameRelationshipType.LATER_HOMONYM();
1505 TimePeriod homonymYear = homonymNomRef.getDatePublished();
1506 if (name.getNomenclaturalReference() != null){
1507 TimePeriod nameYear = name.getNomenclaturalReference().getDatePublished();
1508 homonymIsLater = homonymYear.getStart().compareTo(nameYear.getStart()) > 0;
1509 }else{
1510 logger.warn("Classification name has no nomenclatural reference");
1511 }
1512 if (homonymIsLater){
1513 homonymName.addRelationshipToName(name, relType, null);
1514 }else{
1515 name.addRelationshipToName(homonymName, relType, null);
1516 }
1517
1518 }else{
1519 return detail;
1520 }
1521 return result;
1522 }
1523
1524
1525
1526
1527
1528
1529
1530 protected TeamOrPersonBase handleNomenclaturalReference(NonViralName name, String value) {
1531 ReferenceBase nomRef = ReferenceFactory.newGeneric();
1532 nomRef.setTitleCache(value, true);
1533 parseNomStatus(nomRef, name);
1534 String microReference = parseReferenceYearAndDetail(nomRef);
1535 name.setNomenclaturalReference(nomRef);
1536 microReference = parseHomonym(microReference, name);
1537 name.setNomenclaturalMicroReference(microReference);
1538 TeamOrPersonBase team = (TeamOrPersonBase)name.getCombinationAuthorTeam();
1539 if (team == null){
1540 logger.warn("Name has nom. ref. but no author team. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
1541 }else{
1542 nomRef.setAuthorTeam(team);
1543 }
1544 return team;
1545 }
1546
1547 private void handleInfrAuthor(EfloraImportState state, Element elAuthor, NonViralName name, boolean overwrite) {
1548 String strAuthor = elAuthor.getValue().trim();
1549 if (strAuthor.endsWith(",")){
1550 strAuthor = strAuthor.substring(0, strAuthor.length() -1);
1551 }
1552 TeamOrPersonBase[] team = getTeam(strAuthor);
1553 if (name.getCombinationAuthorTeam() != null && overwrite == false){
1554 logger.warn("Try to write combination author for a name that already has a combination author. Neglected.");
1555 }else{
1556 name.setCombinationAuthorTeam(team[0]);
1557 name.setExCombinationAuthorTeam(team[1]);
1558 }
1559
1560
1561 }
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572 private Rank handleInfRank(NonViralName name, List<Element> elInfraRank, Rank infraRank) {
1573 if (elInfraRank.size() == 1){
1574 String strRank = elInfraRank.get(0).getTextNormalize();
1575 try {
1576 infraRank = Rank.getRankByNameOrAbbreviation(strRank);
1577 } catch (UnknownCdmTypeException e) {
1578 try{
1579 infraRank = Rank.getRankByNameOrAbbreviation(strRank + ".");
1580 } catch (UnknownCdmTypeException e2) {
1581 logger.warn("Unknown infrank " + strRank + ". Set infraRank to (null).");
1582 }
1583 }
1584 }else if (elInfraRank.size() > 1){
1585 logger.warn ("There is more than 1 infrank");
1586 }
1587 if (infraRank != null){
1588 name.setRank(infraRank);
1589 }
1590 return infraRank;
1591 }
1592
1593
1594 private void handleInfrEpi(NonViralName name, Rank infraRank, String value) {
1595 if (infraRank != null && infraRank.isInfraSpecific()){
1596 name.setInfraSpecificEpithet(value);
1597 if (CdmUtils.isCapital(value)){
1598 logger.warn("Infraspecific epithet starts with a capital letter: " + value);
1599 }
1600 }else if (infraRank != null && infraRank.isInfraGeneric()){
1601 name.setInfraGenericEpithet(value);
1602 if (! CdmUtils.isCapital(value)){
1603 logger.warn("Infrageneric epithet does not start with a capital letter: " + value);
1604 }
1605 }else{
1606 logger.warn("Infrepi could not be handled: " + value);
1607 }
1608 }
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619 private NonViralName makeName(Taxon taxon,HomotypicalGroup homotypicalGroup, boolean isSynonym) {
1620 NonViralName name;
1621 if (isSynonym){
1622 name = BotanicalName.NewInstance(Rank.SPECIES(), homotypicalGroup);
1623 SynonymRelationshipType synonymType = SynonymRelationshipType.HETEROTYPIC_SYNONYM_OF();
1624 if (taxon.getHomotypicGroup().equals(homotypicalGroup)){
1625 synonymType = SynonymRelationshipType.HOMOTYPIC_SYNONYM_OF();
1626 }
1627 taxon.addSynonymName(name, synonymType);
1628 }else{
1629 name = (NonViralName)taxon.getName();
1630 }
1631 return name;
1632 }
1633
1634
1635
1636
1637
1638
1639 private void handleInfraspecificEpithet(Element element, String attrValue, NonViralName name) {
1640 String value = element.getTextNormalize();
1641 if (value.indexOf("subsp.") != -1){
1642
1643 String infrEpi = value.substring(value.indexOf("subsp.") + 6).trim();
1644 name.setInfraSpecificEpithet(infrEpi);
1645 name.setRank(Rank.SUBSPECIES());
1646 }else if (value.indexOf("var.") != -1){
1647
1648 String infrEpi = value.substring(value.indexOf("var.") + 4).trim();
1649 name.setInfraSpecificEpithet(infrEpi);
1650 name.setRank(Rank.VARIETY());
1651 }else{
1652 logger.warn("Unhandled infraspecific type: " + value);
1653 }
1654 }
1655
1656
1657
1658
1659
1660
1661
1662 private void handleBasionymAuthor(EfloraImportState state, Element elBasionymAuthor, NonViralName name, boolean overwrite) {
1663 String strAuthor = elBasionymAuthor.getValue().trim();
1664 Pattern reBasionymAuthor = Pattern.compile("^\\(.*\\)$");
1665 if (reBasionymAuthor.matcher(strAuthor).matches()){
1666 strAuthor = strAuthor.substring(1, strAuthor.length()-1);
1667 }else{
1668 logger.warn("Brackets are missing for original combination author " + strAuthor);
1669 }
1670 TeamOrPersonBase[] basionymTeam = getTeam(strAuthor);
1671 if (name.getBasionymAuthorTeam() != null && overwrite == false){
1672 logger.warn("Try to write basionym author for a name that already has a basionym author. Neglected.");
1673 }else{
1674 name.setBasionymAuthorTeam(basionymTeam[0]);
1675 name.setExBasionymAuthorTeam(basionymTeam[1]);
1676
1677 }
1678 }
1679
1680 private Map<String, UUID> teamMap = new HashMap<String, UUID>();
1681
1682
1683
1684
1685
1686 private void handleNameAuthors(Element elAuthor, NonViralName name) {
1687 if (name.getCombinationAuthorTeam() != null){
1688 logger.warn("Name already has a combination author. Name: " + name.getTitleCache() + ", Author: " + elAuthor.getTextNormalize());
1689 }
1690 String strAuthor = elAuthor.getValue().trim();
1691 if (strAuthor.endsWith(",")){
1692 strAuthor = strAuthor.substring(0, strAuthor.length() -1);
1693 }
1694 if (strAuthor.indexOf("(") > -1 || strAuthor.indexOf(")") > -1){
1695 logger.warn("Author has brackets. Basionym authors should be handled in separate tags: " + strAuthor);
1696 }
1697 TeamOrPersonBase[] team = getTeam(strAuthor);
1698 name.setCombinationAuthorTeam(team[0]);
1699 name.setExCombinationAuthorTeam(team[1]);
1700 }
1701
1702
1703
1704
1705
1706
1707 private TeamOrPersonBase[] getTeam(String strAuthor) {
1708 TeamOrPersonBase[] result = new TeamOrPersonBase[2];
1709 String[] split = strAuthor.split(" ex ");
1710 String strBaseAuthor = null;
1711 String strExAuthor = null;
1712
1713 if (split.length == 2){
1714 strBaseAuthor = split[1];
1715 strExAuthor = split[0];
1716 }else if (split.length == 1){
1717 strBaseAuthor = split[0];
1718 }else{
1719 logger.warn("Could not parse (ex) author: " + strAuthor);
1720 }
1721 result[0] = getUuidTeam(strBaseAuthor);
1722 if (result[0] == null){
1723 result[0] = parseSingleTeam(strBaseAuthor);
1724 teamMap.put(strBaseAuthor, result[0].getUuid());
1725 }
1726 if (strExAuthor != null){
1727 result[1] = getUuidTeam(strExAuthor);
1728 if (result[1] == null){
1729 result[1] = Team.NewInstance();
1730 result[1].setTitleCache(strExAuthor, true);
1731 teamMap.put(strExAuthor, result[1].getUuid());
1732 }
1733
1734 }
1735 return result;
1736 }
1737
1738
1739 protected TeamOrPersonBase parseSingleTeam(String strBaseAuthor) {
1740 TeamOrPersonBase result;
1741 String[] split = strBaseAuthor.split("&");
1742 if (split.length > 1){
1743 result = Team.NewInstance();
1744 for (String personString : split){
1745 Person person = makePerson(personString);
1746 ((Team)result).addTeamMember(person);
1747 }
1748 }else{
1749 result = makePerson(strBaseAuthor.trim());
1750 }
1751 return result;
1752 }
1753
1754
1755
1756
1757
1758
1759 private Person makePerson(String personString) {
1760 personString = personString.trim();
1761 Person person = Person.NewTitledInstance(personString);
1762 person.setNomenclaturalTitle(personString);
1763 return person;
1764 }
1765
1766
1767
1768
1769
1770
1771 private TeamOrPersonBase getUuidTeam(String strBaseAuthor) {
1772 UUID uuidTeam = teamMap.get(strBaseAuthor);
1773 return CdmBase.deproxy(getAgentService().find(uuidTeam), TeamOrPersonBase.class);
1774 }
1775
1776
1777 private void handleDescription(EfloraImportState state, Element elDescription, Taxon taxon, Set<String> unhandledChildren) {
1778 verifyNoAttribute(elDescription);
1779
1780 List<Element> elements = elDescription.getChildren();
1781 for (Element element : elements){
1782 if (element.getName().equalsIgnoreCase("char")){
1783 handleChar(state, element, taxon);
1784 }else{
1785 logger.warn("Unhandled description child: " + element.getName());
1786 }
1787 }
1788
1789 }
1790
1791
1792
1793
1794
1795
1796
1797 private void handleChar(EfloraImportState state, Element element, Taxon taxon) {
1798 List<Attribute> attributes = element.getAttributes();
1799 for (Attribute attribute : attributes){
1800 if (! attribute.getName().equalsIgnoreCase("class")){
1801 logger.warn("Char has unhandled attribute " + attribute.getName());
1802 }else{
1803 String classValue = attribute.getValue();
1804 Feature feature = getFeature(classValue, state);
1805 if (feature == null){
1806 logger.warn("Unhandled feature: " + classValue);
1807 }else{
1808 String value = element.getValue();
1809 addDescriptionElement(state, taxon, value, feature, null);
1810 }
1811
1812 }
1813 }
1814
1815 List<Element> elements = element.getChildren();
1816 if (! elements.isEmpty()){
1817 logger.warn("Char has unhandled children");
1818 }
1819 }
1820
1821
1822
1823
1824
1825
1826 protected TaxonDescription getDescription(Taxon taxon) {
1827 for (TaxonDescription description : taxon.getDescriptions()){
1828 if (! description.isImageGallery()){
1829 return description;
1830 }
1831 }
1832 TaxonDescription newDescription = TaxonDescription.NewInstance(taxon);
1833 return newDescription;
1834 }
1835
1836
1837
1838
1839
1840
1841
1842
1843 private Feature getFeature(String classValue, EfloraImportState state) {
1844 UUID uuid;
1845 try {
1846 uuid = state.getTransformer().getFeatureUuid(classValue);
1847 if (uuid == null){
1848 logger.info("Uuid is null for " + classValue);
1849 }
1850 String featureText = StringUtils.capitalize(classValue);
1851 Feature feature = getFeature(state, uuid, featureText, featureText, classValue);
1852 if (feature == null){
1853 throw new NullPointerException(classValue + " not recognized as a feature");
1854 }
1855 return feature;
1856 } catch (Exception e) {
1857 logger.warn("Could not create feature for " + classValue + ": " + e.getMessage()) ;
1858 return Feature.UNKNOWN();
1859 }
1860 }
1861
1862
1863
1864
1865
1866
1867
1868
1869 private void handleTitle(EfloraImportState state, Element element, Taxon taxon, Set<String> unhandledTitleClassess) {
1870
1871 List<Attribute> attributes = element.getAttributes();
1872 for (Attribute attribute : attributes){
1873 if (! attribute.getName().equalsIgnoreCase("class") ){
1874 if (! attribute.getName().equalsIgnoreCase("num")){
1875 logger.warn("Title has unhandled attribute " + attribute.getName());
1876 }else{
1877
1878 }
1879 }else{
1880 String classValue = attribute.getValue();
1881 try {
1882 Rank rank;
1883 try {
1884 rank = Rank.getRankByNameOrAbbreviation(classValue);
1885 } catch (Exception e) {
1886
1887 rank = Rank.getRankByEnglishName(classValue, NomenclaturalCode.ICBN, false);
1888 }
1889 taxon.getName().setRank(rank);
1890 if (rank.equals(Rank.FAMILY()) || rank.equals(Rank.GENUS())){
1891 handleGenus(element.getValue(), taxon.getName());
1892 }else if (rank.equals(Rank.SUBGENUS())){
1893 handleSubGenus(element.getValue(), taxon.getName());
1894 }else if (rank.equals(Rank.SECTION_BOTANY())){
1895 handleSection(element.getValue(), taxon.getName());
1896 }else if (rank.equals(Rank.SPECIES())){
1897 handleSpecies(element.getValue(), taxon.getName());
1898 }else if (rank.equals(Rank.SUBSPECIES())){
1899 handleSubSpecies(element.getValue(), taxon.getName());
1900 }else if (rank.equals(Rank.VARIETY())){
1901 handleVariety(element.getValue(), taxon.getName());
1902 }else{
1903 logger.warn("Unhandled rank: " + rank.getLabel());
1904 }
1905 } catch (UnknownCdmTypeException e) {
1906 logger.warn("Unknown rank " + classValue);
1907 unhandledTitleClassess.add(classValue);
1908 }
1909 }
1910 }
1911 List<Element> elements = element.getChildren();
1912 if (! elements.isEmpty()){
1913 logger.warn("Title has unexpected children");
1914 }
1915 UUID uuidTitle = EfloraTransformer.uuidTitle;
1916 ExtensionType titleExtension = this.getExtensionType(state, uuidTitle, "title", "title", "title");
1917 taxon.addExtension(element.getTextNormalize(), titleExtension);
1918
1919 }
1920
1921
1922
1923
1924
1925
1926 private void handleSubGenus(String value, TaxonNameBase taxonNameBase) {
1927 String name = value.replace("Subgenus", "").trim();
1928 ((NonViralName)taxonNameBase).setInfraGenericEpithet(name);
1929 }
1930
1931
1932
1933
1934
1935 private void handleSection(String value, TaxonNameBase taxonNameBase) {
1936 String name = value.replace("Section", "").trim();
1937 ((NonViralName)taxonNameBase).setInfraGenericEpithet(name);
1938 }
1939
1940
1941
1942
1943
1944 private void handleSpecies(String value, TaxonNameBase taxonNameBase) {
1945
1946 }
1947
1948
1949
1950
1951
1952 private void handleVariety(String value, TaxonNameBase taxonNameBase) {
1953
1954 }
1955
1956
1957
1958
1959
1960 private void handleSubSpecies(String value, TaxonNameBase taxonNameBase) {
1961
1962 }
1963
1964
1965 private Pattern rexGenusAuthor = Pattern.compile("(\\[|\\().*(\\]|\\))");
1966
1967
1968
1969
1970
1971 protected void handleGenus(String value, TaxonNameBase taxonName) {
1972 Matcher matcher = rexGenusAuthor.matcher(value);
1973 if (matcher.find()){
1974 String author = matcher.group();
1975
1976 author = author.substring(1, author.length() - 1);
1977 Team team = Team.NewInstance();
1978 team.setTitleCache(author, true);
1979 Credit credit = Credit.NewInstance(team, null);
1980 taxonName.addCredit(credit);
1981
1982
1983
1984 }else{
1985 logger.info("No Author match for " + value);
1986 }
1987 }
1988
1989
1990
1991
1992
1993
1994 private void handleTaxonRelation(EfloraImportState state, Taxon taxon, Taxon lastTaxon) {
1995
1996 TaxonomicTree tree = getTree(state);
1997 if (lastTaxon == null){
1998 tree.addChildTaxon(taxon, null, null, null);
1999 return;
2000 }
2001 Rank thisRank = taxon.getName().getRank();
2002 Rank lastRank = lastTaxon.getName().getRank();
2003 if (lastTaxon.getTaxonNodes().size() > 0){
2004 TaxonNode lastNode = lastTaxon.getTaxonNodes().iterator().next();
2005 if (thisRank.isLower(lastRank ) ){
2006 lastNode.addChildTaxon(taxon, null, null, null);
2007 fillMissingEpithetsForTaxa(lastTaxon, taxon);
2008 }else if (thisRank.equals(lastRank)){
2009 TaxonNode parent = lastNode.getParent();
2010 if (parent != null){
2011 parent.addChildTaxon(taxon, null, null, null);
2012 fillMissingEpithetsForTaxa(parent.getTaxon(), taxon);
2013 }else{
2014 tree.addChildTaxon(taxon, null, null, null);
2015 }
2016 }else if (thisRank.isHigher(lastRank)){
2017 handleTaxonRelation(state, taxon, lastNode.getParent().getTaxon());
2018
2019
2020 }
2021 }else{
2022 logger.warn("Last taxon has no node");
2023 }
2024 }
2025
2026
2027
2028
2029
2030
2031
2032 private TaxonomicTree getTree(EfloraImportState state) {
2033 TaxonomicTree result = state.getTree(null);
2034 if (result == null){
2035 UUID uuid = state.getConfig().getTaxonomicTreeUuid();
2036 if (uuid == null){
2037 logger.warn("No classification uuid is defined");
2038 result = getNewClassification(state);
2039 }else{
2040 result = getTaxonTreeService().getTaxonomicTreeByUuid(uuid);
2041 if (result == null){
2042 result = getNewClassification(state);
2043 result.setUuid(uuid);
2044 }
2045 }
2046 state.putTree(null, result);
2047 }
2048 return result;
2049 }
2050
2051
2052 private TaxonomicTree getNewClassification(EfloraImportState state) {
2053 TaxonomicTree result;
2054 result = TaxonomicTree.NewInstance(state.getConfig().getClassificationTitle());
2055 state.putTree(null, result);
2056 return result;
2057 }
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067 private TextData addDescriptionElement(EfloraImportState state, Taxon taxon, String value, Feature feature, String references) {
2068 TextData textData = TextData.NewInstance(feature);
2069 Language textLanguage = getDefaultLanguage(state);
2070 textData.putText(value, textLanguage);
2071 TaxonDescription description = getDescription(taxon);
2072 description.addElement(textData);
2073 if (references != null){
2074 makeOriginalSourceReferences(textData, ";", references);
2075 }
2076 return textData;
2077 }
2078
2079 private Language getDefaultLanguage(EfloraImportState state) {
2080 UUID defaultLanguageUuid = state.getConfig().getDefaultLanguageUuid();
2081 if (defaultLanguageUuid != null){
2082 Language result = state.getDefaultLanguage();
2083 if (result == null || ! result.getUuid().equals(defaultLanguageUuid)){
2084 result = (Language)getTermService().find(defaultLanguageUuid);
2085 state.setDefaultLanguage(result);
2086 if (result == null){
2087 logger.warn("Default language for " + defaultLanguageUuid + " does not exist.");
2088 }
2089 }
2090 return result;
2091 }else{
2092 return Language.DEFAULT();
2093 }
2094 }
2095
2096
2097
2098
2099
2100 private void verifyNoAttribute(Element element) {
2101 List<Attribute> attributes = element.getAttributes();
2102 if (! attributes.isEmpty()){
2103 logger.warn(element.getName() + " has unhandled attributes: " + attributes.get(0).getValue() + "..." );
2104 }
2105 }
2106
2107
2108
2109
2110 protected void verifyNoChildren(Element element) {
2111 verifyNoChildren(element, false);
2112 }
2113
2114
2115
2116
2117 private void verifyNoChildren(Element element, boolean ignoreLineBreak) {
2118 List<Element> children = element.getChildren();
2119 if (! children.isEmpty()){
2120 if (ignoreLineBreak == true){
2121 for (Element child : children){
2122 if (! child.getName().equalsIgnoreCase("BR")){
2123 logger.warn(element.getName() + " has unhandled child: " + child.getName());
2124 }
2125 }
2126 }else{
2127 logger.warn(element.getName() + " has unhandled children");
2128 }
2129 }
2130 }
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141 protected void parseNomStatus(ReferenceBase ref, NonViralName nonViralName) {
2142 String titleToParse = ref.getTitleCache();
2143
2144 String noStatusTitle = parser.parseNomStatus(titleToParse, nonViralName);
2145 if (! noStatusTitle.equals(titleToParse)){
2146 ref.setTitleCache(noStatusTitle, true);
2147 }
2148 }
2149
2150
2151
2152
2153
2154
2155
2156 private String parseReferenceYearAndDetail(ReferenceBase ref){
2157 String detailResult = null;
2158 String titleToParse = ref.getTitleCache();
2159 titleToParse = removeStartingSymbols(titleToParse, ref);
2160 String reReference = "^\\.{1,}";
2161
2162 String oneMonth = "(Feb.|Dec.|March|June|July)";
2163 String reYear = oneMonth + "?\\s?[1-2]\\s?[0-9]\\s?[0-9]\\s?[0-9]\\s?";
2164 String secondYear = "(\\s?[1-2]\\s?[0-9])?\\s?[0-9]\\s?[0-9]\\s?";
2165
2166 String reYearPeriod = "\\(" + reYear + "(\\-" + secondYear + ")?\\)";
2167 String reDetail = "\\.{1,10}$";
2168
2169
2170 Pattern patReference = Pattern.compile(
2171 Matcher matcher = patReference.matcher(titleToParse);
2172 if (matcher.find()){
2173 int start = matcher.start();
2174 int end = matcher.end();
2175
2176
2177 String title = titleToParse.substring(0, start).trim();
2178
2179 String detail = titleToParse.substring(end).trim();
2180
2181
2182 String strPeriod = matcher.group().trim();
2183 strPeriod = strPeriod.substring(1, strPeriod.length()-1);
2184 Pattern patStartMonth = Pattern.compile("^" + oneMonth);
2185 matcher = patStartMonth.matcher(strPeriod);
2186 strPeriod = strPeriod.replace(" ", "");
2187 Integer startMonth = null;
2188 if (matcher.find()){
2189 end = matcher.end();
2190 strPeriod = strPeriod.substring(0, end) + " " + strPeriod.substring(end);
2191 startMonth = getMonth(strPeriod.substring(0, end));
2192 }
2193
2194 TimePeriod datePublished = TimePeriod.parseString(strPeriod);
2195 if (startMonth != null){
2196 datePublished.setStartMonth(startMonth);
2197 }
2198 ref.setDatePublished(datePublished);
2199 ref.setTitle(title);
2200 detailResult = CdmUtils.removeTrailingDot(detail);
2201 if (detailResult.endsWith(".") || detailResult.endsWith(";") || detailResult.endsWith(",") ){
2202 detailResult = detailResult.substring(0, detailResult.length() -1);
2203 }
2204 ref.setProtectedTitleCache(false);
2205 }else{
2206 logger.warn("Could not parse reference: " + titleToParse);
2207 }
2208 return detailResult;
2209
2210 }
2211
2212
2213
2214 private Integer getMonth(String month) {
2215 if (month.startsWith("Jan")){
2216 return 1;
2217 }else if (month.startsWith("Feb")){
2218 return 2;
2219 }else if (month.startsWith("Mar")){
2220 return 3;
2221 }else if (month.startsWith("Apr")){
2222 return 4;
2223 }else if (month.startsWith("May")){
2224 return 5;
2225 }else if (month.startsWith("Jun")){
2226 return 6;
2227 }else if (month.startsWith("Jul")){
2228 return 7;
2229 }else if (month.startsWith("Aug")){
2230 return 8;
2231 }else if (month.startsWith("Sep")){
2232 return 9;
2233 }else if (month.startsWith("Oct")){
2234 return 10;
2235 }else if (month.startsWith("Nov")){
2236 return 11;
2237 }else if (month.startsWith("Dec")){
2238 return 12;
2239 }else{
2240 logger.warn("Month not yet supported: " + month);
2241 return null;
2242 }
2243 }
2244
2245
2246
2247
2248
2249 protected boolean isIgnore(EfloraImportState state){
2250 return ! state.getConfig().isDoTaxa();
2251 }
2252
2253 }