View Javadoc

1   /**
2   * Copyright (C) 2007 EDIT
3   * European Distributed Institute of Taxonomy 
4   * http://www.e-taxonomy.eu
5   * 
6   * The contents of this file are subject to the Mozilla Public License Version 1.1
7   * See LICENSE.TXT at the top of this package for the full license terms.
8   */
9   package eu.etaxonomy.cdm.io.taxonx;
10  
11  import java.util.ArrayList;
12  import java.util.List;
13  
14  import org.apache.log4j.Logger;
15  import org.jdom.Content;
16  import org.jdom.Element;
17  import org.jdom.Namespace;
18  import org.jdom.Text;
19  import org.springframework.stereotype.Component;
20  
21  import eu.etaxonomy.cdm.common.CdmUtils;
22  import eu.etaxonomy.cdm.io.common.CdmIoBase;
23  import eu.etaxonomy.cdm.io.common.ICdmIO;
24  import eu.etaxonomy.cdm.model.agent.Person;
25  import eu.etaxonomy.cdm.model.agent.Team;
26  import eu.etaxonomy.cdm.model.common.Marker;
27  import eu.etaxonomy.cdm.model.common.MarkerType;
28  import eu.etaxonomy.cdm.model.common.TimePeriod;
29  import eu.etaxonomy.cdm.model.reference.IGeneric;
30  import eu.etaxonomy.cdm.model.reference.IPublicationBase;
31  import eu.etaxonomy.cdm.model.reference.ReferenceBase;
32  import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
33  import eu.etaxonomy.cdm.model.reference.ReferenceType;
34  
35  
36  /**
37   * @author a.mueller
38   * @created 29.07.2008
39   * @version 1.0
40   */
41  @Component
42  public class TaxonXModsImport extends CdmIoBase<TaxonXImportState> implements ICdmIO<TaxonXImportState> {
43  	private static final Logger logger = Logger.getLogger(TaxonXModsImport.class);
44  
45  	@SuppressWarnings("unused")
46  	private static int modCount = 10000;
47  	private ReferenceFactory refFactory = ReferenceFactory.newInstance();
48  	public TaxonXModsImport(){
49  		super();
50  	}
51  	
52  	public boolean doCheck(TaxonXImportState state){
53  		boolean result = true;
54  		logger.warn("Checking for TaxonXMods not yet implemented");
55  		//result &= checkArticlesWithoutJournal(bmiConfig);
56  		
57  		return result;
58  	}
59  
60  	public boolean doInvoke(TaxonXImportState state){		
61  		boolean success = true;
62  		logger.info("start make mods reference ...");
63  		TaxonXImportConfigurator config = state.getConfig();
64  		Element root = config.getSourceRoot();
65  		Namespace nsTaxonx = root.getNamespace();
66  		Namespace nsMods = Namespace.getNamespace("mods", "http://www.loc.gov/mods/v3");
67  		
68  		state.setModsReference(null);  //delete old reference
69  		Element elTaxonHeader = root.getChild("taxonxHeader", nsTaxonx);
70  		if (elTaxonHeader != null){
71  			Element elMods = elTaxonHeader.getChild("mods", nsMods);
72  			if (elMods != null){
73  				ReferenceBase<?> ref = refFactory.newGeneric();
74  				//TitleInfo
75  				Element elTitleInfo = elMods.getChild("titleInfo", nsMods);
76  				if (elTitleInfo != null){
77  					success &= makeTitleInfo(elTitleInfo, ref);
78  				}else{
79  					logger.warn("TitleInfo element is missing in " + state.getConfig().getSource());
80  				}
81  				//mods name
82  				Element elModsName = elMods.getChild("name", nsMods);
83  				success &= makeModsName(elModsName, ref);
84  				//origin info
85  				Element elOriginInfo = elMods.getChild("originInfo", nsMods);
86  				success &= makeOriginInfo(elOriginInfo, ref);
87  				
88  				//publish
89  				if (state.getConfig().isPublishReferences()){
90  					boolean publish = false;
91  					ref.addMarker(Marker.NewInstance(MarkerType.IN_BIBLIOGRAPHY(), publish));
92  				}
93  				
94  				//save
95  				state.setModsReference(ref);
96  				
97  			}
98  		}
99  
100 		logger.info("end make mods reference ...");
101 		return true;
102 	}
103 	
104 
105 	/**
106 	 * @param contentList
107 	 * @return
108 	 */
109 	private void removeEmptyContent(List<Content> contentList) {
110 		List<Content> removeList = new ArrayList<Content>();
111 		for (Content content: contentList){
112 			if (content instanceof Text){
113 				if ( CdmUtils.isEmpty(((Text) content).getTextNormalize())){
114 					removeList.add(content);
115 				}
116 			}
117 		}
118 		contentList.removeAll(removeList);
119 	}
120 	
121 	/**
122 	 * @param elModsName
123 	 * @param ref
124 	 */
125 	private boolean makeOriginInfo(Element elOriginInfo, ReferenceBase<?> ref) {
126 		Namespace nsMods = elOriginInfo.getNamespace();
127 		List<Content> contentList = elOriginInfo.getContent();
128 		
129 		//dateIssued
130 		Element elDateIssued = elOriginInfo.getChild("dateIssued", nsMods);
131 		if (elDateIssued != null){
132 			String dateIssued = elDateIssued.getTextNormalize();
133 			contentList.remove(elDateIssued);
134 			
135 			TimePeriod datePublished = TimePeriod.parseString(dateIssued);
136 			if (ref.getType().isPublication()){
137 				((IPublicationBase)ref).setDatePublished(datePublished );
138 			}else{
139 				logger.warn("Reference has issue date but is not of type publication base. Date was not set");
140 			}
141 		}
142 		
143 		//dateIssued
144 		Element elPublisher = elOriginInfo.getChild("publisher", nsMods);
145 		if (elPublisher != null){
146 			String publisher = elPublisher.getTextNormalize();
147 			contentList.remove(elPublisher);
148 			
149 			if (ref.getType().isPublication()){
150 				((IPublicationBase)ref).setPublisher(publisher);
151 			}else{
152 				logger.warn("Reference has publisher but is not of type publication base. Publisher was not set");
153 			}
154 		}
155 		
156 		removeEmptyContent(contentList);
157 		for (Content o: contentList){
158 			logger.warn(o + " (in mods:originInfo) not yet implemented for mods import");
159 		}
160 		return true;
161 	}
162 
163 
164 	/**
165 	 * @param elModsName
166 	 * @param ref
167 	 */
168 	//TODO
169 	//THIS implementation is against the mods semantics but supports the current
170 	//format for palmae taxonX files
171 	//The later has to be changed and this part has to be adapted
172 	private boolean makeModsName(Element elModsName, ReferenceBase<?> ref) {
173 		int UNPARSED = 0;
174 		int PARSED = 1;
175 		Namespace nsMods = elModsName.getNamespace();
176 		List<Content> contentList = elModsName.getContent();
177 		Team authorTeam = Team.NewInstance();
178 		
179 		//name
180 		List<Element> elNameParts = elModsName.getChildren("namePart", nsMods);
181 		int mode = UNPARSED;
182 		if (elNameParts.size() > 0){
183 			if (elNameParts.get(0).getAttributes().size() > 0){
184 				mode = PARSED;
185 			}
186 		}
187 		
188 		if (mode == 0){
189 			Element elNamePart = elNameParts.get(0); 
190 			if (elNamePart != null){
191 				String namePart = elNamePart.getTextNormalize();
192 				contentList.remove(elNamePart);
193 				authorTeam.setTitleCache(namePart, true);
194 			}
195 			if (elNameParts.size()> 1){
196 				logger.warn("Multiple nameparts of unexpected type");
197 			}
198 		}else{
199 			
200 			Person lastTeamMember = Person.NewInstance();
201 			List<Element> tmpNamePartList = new ArrayList<Element>();
202 			tmpNamePartList.addAll(elNameParts);
203 			for (Element elNamePart: tmpNamePartList){
204 				if (elNamePart.getAttributeValue("type").equals("family")){
205 					lastTeamMember = Person.NewInstance();
206 					authorTeam.addTeamMember(lastTeamMember);
207 					lastTeamMember.setLastname(elNamePart.getTextNormalize());
208 				}else if (elNamePart.getAttributeValue("type").equals("given")){
209 					lastTeamMember.setFirstname(elNamePart.getTextNormalize());
210 				}else{
211 					logger.warn("Unsupport name part type");
212 				}
213 				contentList.remove(elNamePart);
214 			}
215 		}
216 		ref.setAuthorTeam(authorTeam);
217 		
218 		removeEmptyContent(contentList);
219 		for (Content o: contentList){
220 			logger.warn(o + " (in mods:name) not yet implemented for mods import");
221 		}
222 		return true;
223 	}
224 
225 	/**
226 	 * @param elTitleInfo
227 	 * @param ref
228 	 */
229 	private boolean makeTitleInfo(Element elTitleInfo, ReferenceBase<?> ref) {
230 		Namespace nsMods = elTitleInfo.getNamespace();
231 		List<Content> contentList = elTitleInfo.getContent();
232 		
233 		//title
234 		Element elTitle = elTitleInfo.getChild("title", nsMods);
235 		if (elTitle != null){
236 			String title = elTitle.getTextNormalize();
237 			contentList.remove(elTitle);
238 			ref.setTitle(title);
239 		}
240 		removeEmptyContent(contentList);
241 		for (Content o: contentList){
242 			logger.warn(o + " (in titleInfo) not yet implemented for mods import");
243 		}
244 		return true;
245 	}
246 	
247 	/* (non-Javadoc)
248 	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
249 	 */
250 	protected boolean isIgnore(TaxonXImportState state){
251 		return ! state.getConfig().isDoMods();
252 	}
253 	
254 }