加入收藏 | 设为首页 | 会员中心 | 我要投稿 李大同 (https://www.lidatong.com.cn/)- 科技、建站、经验、云计算、5G、大数据,站长网!
当前位置: 首页 > 百科 > 正文

xml格式文件(大概有50G)转换为json格式 上传到mongodb数据库中

发布时间:2020-12-16 08:49:47 所属栏目:百科 来源:网络整理
导读:偏小数据的就不做多描述,网上有很多资料,在此有大概50G的xml格式的地理数据转换为json格式的数据,之后上传到mongodb数据库中,有什么好的建议,欢迎指正 解析xml数据 import java.util.ArrayList;import java.util.List;import org.xml.sax.Attributes;im
偏小数据的就不做多描述,网上有很多资料,在此有大概50G的xml格式的地理数据转换为json格式的数据,之后上传到mongodb数据库中,有什么好的建议,欢迎指正
解析xml数据
import java.util.ArrayList;
import java.util.List;

import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;

import com.mongodb.DBObject;
/*
 * @author 
 * @time 2015-11-8
 * 主要是是implements ContentHandler,主要实现接口ContentHandler中的startDocument()、endDocument()、startElement()、endElement()
 * 另外自定义方法writeToMongoDB()、storeDBMongo()
 * 
 */

public class MyContentHandler implements ContentHandler {
	private StringBuffer buf;
	private String ctitle;
	private String cns;
	private String cid;
	private String ctext;
	private String ctimestamp;
	private int idnumber=0;
	List<Data> listdata=new ArrayList<Data>();
	List list=new ArrayList();		
	@Override
	public void setDocumentLocator(Locator locator) {
		// TODO Auto-generated method stub

	}

	@Override
	public void startDocument() throws SAXException {
		// TODO Auto-generated method stub
		buf=new StringBuffer();
        System.out.println("*******解析开始*******");
	}

	@Override
	public void endDocument() throws SAXException {
		// TODO Auto-generated method stub
		try {
			writeToMongoDB();
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		System.out.println("*******解析结束*******");
		
	}
    //把数据导入MongoDB数据库中
	private void writeToMongoDB() throws Exception {
		// TODO Auto-generated method stub
		List<DBObject> dblist=new ArrayList<DBObject>();
		for(Data d:listdata){
			dblist.add(BSONT.mapToBSON(d.toJSONMap()));
		}
		
		MongoDBT.writeListToMongo("IP",27017,"databaseName","collectionName",dblist);
	}

	@Override
	public void startPrefixMapping(String prefix,String uri)
			throws SAXException {
		// TODO Auto-generated method stub
	}

	@Override
	public void endPrefixMapping(String prefix) throws SAXException {
		// TODO Auto-generated method stub
	}

	@Override
	public void startElement(String uri,String localName,String qName,Attributes attributes) throws SAXException {
		// TODO Auto-generated method stub				
	       if(qName=="page"){
	    	   idnumber=1;
	       }
	       if(qName=="title"){
	    	   ctitle=qName;
	       }else if(qName=="ns"){
	    	   cns=qName;
	       }else if(qName=="id"&&idnumber==1){
	    	   cid=qName;
	    	   idnumber=0;
	       }else if(qName=="timestamp"){
	    	   ctimestamp=qName;	    		   
	       }else if(qName=="text"){
	     	   ctext=qName;
	       }
	       
	  }	        					
	@Override
	public void endElement(String uri,String qName)
			throws SAXException {
		// TODO Auto-generated method stub		
		if(ctitle==qName){
			String sss=buf.toString();
        	ctitle="";
        	list.add(sss);
 			buf.setLength(0);
 			
        }else if(cns==qName){
        	cns="";
        	String sss=buf.toString();       	
        	list.add(sss);
 			buf.setLength(0);
 			
        }else if(cid==qName){        	
        	cid="";
        	String sss=buf.toString();
        	list.add(sss);
 			buf.setLength(0);
 			
        }else if(ctimestamp==qName){
        	ctimestamp="";
        	String sss=buf.toString();
        	list.add(sss);
 			buf.setLength(0);
 			
        }else if(ctext==qName){
        	ctext="";
        	String sss=buf.toString();
        	list.add(sss);
 			buf.setLength(0);
 			//有些sss中虽然有重定向标记,但没有“[[”和“]]”,那么就会出现String的index不在范围内的问题
        	if((sss.toUpperCase().contains("#REDIRECT")||sss.contains("#重定向"))&&sss.contains("[[")&&sss.contains("]]")){
        		int i=sss.indexOf("[[");
        		int j=sss.indexOf("]]");
        		String s=sss.substring(i+2,j);
        		list.add(s);
        		list.add("redirect");
        	}else{       		
        		list.add("");
        		list.add("article");
        	}
        }
			if(qName=="page"){
				storeDBMongo(list);								
			}
			   
	}

	private void storeDBMongo(List lt) {
		// TODO Auto-generated method stub
		for(int i=0;i<list.size();i++){
			System.out.println(lt.get(i));
			
		}
		try {
			Data data=new Data();
			data.setTitle(list.get(0).toString());
			data.setNamespace(list.get(1).toString());
			data.setId(list.get(2).toString());
			data.setLastEsited(list.get(3).toString());
			data.setMarkup(list.get(4).toString());
			data.setTarget(list.get(5).toString());
			data.setType(list.get(6).toString());
			listdata.add(data);
			if(listdata.size()>=300){
				writeToMongoDB();
				listdata.clear();
			}
			list.clear();
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}

	@Override
	public void characters(char[] ch,int start,int length)
			throws SAXException {
		// TODO Auto-generated method stub
        if(ctitle=="title"){
        	buf.append(new String(ch,start,length));
        }else if(cns=="ns"){        
        	buf.append(new String(ch,length));       	
        }else if(cid=="id"){       	      	
        	buf.append(new String(ch,length));
        	list.add(new String(ch,length));       	
        }else if(ctimestamp=="timestamp"){
        	buf.append(new String(ch,length));        	
        }else if(ctext=="text"){
        	buf.append(new String(ch,length));        	
        }
        
	}

	@Override
	public void ignorableWhitespace(char[] ch,int length)
			throws SAXException {
		// TODO Auto-generated method stub

	}

	@Override
	public void processingInstruction(String target,String data)
			throws SAXException {
		// TODO Auto-generated method stub

	}

	@Override
	public void skippedEntity(String name) throws SAXException {
		// TODO Auto-generated method stub

	}
	
}

自定义类Data、JSONT

import java.util.HashMap;
import java.util.Map;


public class Data {
	private String id;
	private String namespace;
	private String type;
	private String title;
	private String markup;
	private String lastEsited;
	private String target;
	public String getId() {
		return id;
	}
	public void setId(String id) {
		this.id = id;
	}
	public String getNamespace() {
		return namespace;
	}
	public void setNamespace(String namespace) {
		this.namespace = namespace;
	}
	public String getType() {
		return type;
	}
	public void setType(String type) {
		this.type = type;
	}
	public String getTitle() {
		return title;
	}
	public void setTitle(String title) {
		this.title = title;
	}
	public String getMarkup() {
		return markup;
	}
	public void setMarkup(String markup) {
		this.markup = markup;
	}
	public String getLastEsited() {
		return lastEsited;
	}
	public void setLastEsited(String lastEsited) {
		this.lastEsited = lastEsited;
	}
	public String getTarget() {
		return target;
	}
	public void setTarget(String target) {
		this.target = target;
	}
	public Map<String,Object> toJSONMap(){
		Map<String,Object> jsonmap=new HashMap<String,Object>();
		jsonmap.put("id",this.id);
		jsonmap.put("namespace",this.namespace);
		jsonmap.put("type",this.type);
		jsonmap.put("title",this.title);
		jsonmap.put("markup",this.markup);
		jsonmap.put("lastEsited",this.lastEsited);
		jsonmap.put("target",this.target);		
		return jsonmap;
	}
	
}


/*
 *    NextMap-Crawler Module
 *    
 *    Copyright (C) 2002-2014,Institute of Geographic Sciences and Natural Resources Research,*    Chinese Academy of Sciences
 *
 *    This library is free software; you can redistribute it and/or
 *    modify it under the terms of the GNU Lesser General Public
 *    License as published by the Free Software Foundation;
 *    version 2.1 of the License.
 *
 *    This library is distributed in the hope that it will be useful,*    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *    Lesser General Public License for more details.
 */


import java.io.IOException;
import java.io.StringWriter;
import java.util.List;
import java.util.Map;

import com.fasterxml.jackson.databind.ObjectMapper;

/**
 * 
 * @author zhuhaichuan
 * @date 2015-11-8
 * 
 *   
 */

public class JSONT {
	public static String mapToJSONString(Map map) {
	    StringWriter sw = new StringWriter();
	    try {
	      ObjectMapper mapper = new ObjectMapper();
	      mapper.writeValue(sw,map);
	    } catch (IOException e) {
	      // TODO Auto-generated catch block
	      e.printStackTrace();
	    }
	    return sw.toString();
	  }

	  /**
	   * 
	   * @param list
	   * @return
	   */
	  public static String listToJSONString(List list) {
	    StringWriter sw = new StringWriter();
	    try {
	      ObjectMapper mapper = new ObjectMapper();
	      mapper.writeValue(sw,list);
	    } catch (IOException e) {
	      // TODO Auto-generated catch block
	      e.printStackTrace();
	    }
	    return sw.toString();
	  }

	  /**
	   * 
	   * @param list
	   * @return
	   */
	  public static String beanToJSONString(Object bean) {
	    StringWriter sw = new StringWriter();
	    try {
	      ObjectMapper mapper = new ObjectMapper();
	      mapper.writeValue(sw,bean);
	    } catch (IOException e) {
	      // TODO Auto-generated catch block
	      e.printStackTrace();
	    }
	    return sw.toString();
	  }

	  /**
	   * 
	   * @param jsonstr
	   * @return
	   */
	  public static Map jsonToMap(String jsonstr) {
	    Map map = null;
	    try {
	      ObjectMapper mapper = new ObjectMapper();
	      map = mapper.readValue(jsonstr,Map.class);
	    } catch (IOException e) {
	      // TODO Auto-generated catch block
	      e.printStackTrace();
	    }
	    return map;
	  }

	  /**
	   * 
	   * @param jsonstr
	   * @return
	   */
	  public static List jsonToList(String jsonstr) {
	    List list = null;
	    try {
	      ObjectMapper mapper = new ObjectMapper();
	      list = mapper.readValue(jsonstr,List.class);
	    } catch (IOException e) {
	      // TODO Auto-generated catch block
	      e.printStackTrace();
	    }
	    return list;
	  }
}
还有就是类MyErrorHandler
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;


public class MyErrorHandler implements ErrorHandler {

	@Override
	public void warning(SAXParseException exception) throws SAXException {
		// TODO Auto-generated method stub
		System.out.println("*******WARNING******");
        System.out.println("行号:" + exception.getLineNumber());
        System.out.println("列号:" + exception.getColumnNumber());
        System.out.println("exception信息:" + exception.getMessage());
        System.out.println("********************");
	}

	@Override
	public void error(SAXParseException exception) throws SAXException {
		// TODO Auto-generated method stub
		System.out.println("******* ERROR ******");
        System.out.println("行号:" + exception.getLineNumber());
        System.out.println("列号:" + exception.getColumnNumber());
        System.out.println("exception信息:" + exception.getMessage());
        System.out.println("********************");
	}

	@Override
	public void fatalError(SAXParseException exception) throws SAXException {
		// TODO Auto-generated method stub
		System.out.println("******** FATAL ERROR ********");
        System.out.println("行号:" + exception.getLineNumber());
        System.out.println("列号:" + exception.getColumnNumber());
        System.out.println("exception信息" + exception.getMessage());
        System.out.println("*****************************");
	}

}

自定义MongoDBT类
import java.util.ArrayList;
import java.util.List;

import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBObject;
import com.mongodb.Mongo;


public class MongoDBT {
	public static void writeListToMongo(String ip,int port,String dbname,String collname,List<DBObject> list) throws Exception{
		Mongo mongo=new Mongo(ip,port);		
		DB db=mongo.getDB(dbname);
		DBCollection collection=db.getCollection(collname);
		List<DBObject> dblist=new ArrayList<DBObject>();
		for(int i=0;i<list.size();i++){
			dblist.add(list.get(i));
		}
		collection.insert(dblist);
		mongo.close();
	}
}

(编辑:李大同)

【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!

    推荐文章
      热点阅读