/** * 得到网页中图片的地址 */ public static List<String> getImgStr(String htmlStr){ String img=""; Pattern p_image; Matcher m_image; List<String> pics = new ArrayList<String>(); // String regEx_img = "<img.*src=(.*?)[^>]*?>"; //图片链接地址
String regEx_img = "<img.*srcs*=s*(.*?)[^>]*?>"; p_image = Pattern.compile (regEx_img,Pattern.CASE_INSENSITIVE); m_image = p_image.matcher(htmlStr); while(m_image.find()){ img = img + "," + m_image.group(); // Matcher m = Pattern.compile("src="?(.*?)("|>|s+)").matcher(img); //匹配src
Matcher m = Pattern.compile("srcs*=s*"?(.*?)("|>|s+)").matcher(img);
while(m.find()){ pics.add(m.group(1)); } } return pics; } //重点在于正则表达式 <img.*src=(.*?)[^>]*?> // src="?(.*?)("|>|s+)
private final static String regxpForHtml = "<([^>]*)>"; // 过滤所有以<开头以>结尾的标签
private final static String regxpForImgTag = "<s*imgs+([^>]*)s*>"; // 找出IMG标签
private final static String regxpForImaTagSrcAttrib = "src="([^"]+)""; // 找出IMG标签的SRC属性
String regxp = "<s*" +tag +"s+([^>]*)s*>";红色的 tag 是动态的变(指定标签)
1. public static String getImgStr(String htmlStr){ 2. String img="",tmp=""; 3. java.util.regex.Pattern p_image; 4. java.util.regex.Matcher m_image; 5. 6. String regEx_img = "http://[([a-z0-9]|.|/|-)]+.[(jpg)|(bmp)|(gif)|(png)]";//图片链接地址 7. p_image = java.util.regex.Pattern.compile(regEx_img,java.util.regex.Pattern.CASE_INSENSITIVE); 8. m_image = p_image.matcher(htmlStr); 9. while(m_image.find()){ 10. img = img + "," + m_image.group(); 11. } 12. if(img.indexOf(",")>=0) 13. return img.substring(1); 14. else 15. return img; 16. }
方法一:
http://www.cnblogs.com/jintan/archive/2009/10/31/1593639.html
package com.cn;
import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern;
public class img_src { public static void main(String[] args) { String html = "<html>rn" + "<head><title>test</title><head>rn" + "<body>" + "<P><IMG height="100" src='abc.png' weight="30">abcdefg" + "<img src='http://abc.xyz.com/123/456.jpg' /><br>" + "<IMG height="100" rn" + " src="abc.jpg" rn" + " weight="30">abcdefg rn" + " <img src=ttt.jpg>" + " <img src=123.jpg />" + // "<img alt="src='abc'">" + //这种我也无能为力 "</body></html>"; System.out.println(getImgSrc(html)); } public static final Pattern PATTERN = Pattern.compile("<imgs+(?:[^>]*)srcs*=s*([^>]+)",Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); public static ListgetImgSrc(String html) { Matcher matcher = PATTERN.matcher(html); List list = new ArrayList(); while (matcher.find()) { String group = matcher.group(1); if (group == null) { continue; } // 这里可能还需要更复杂的判断,用以处理src="...."内的一些转义符 if (group.startsWith("'")) { list.add(group.substring(1,group.indexOf("'",1))); } else if (group.startsWith(""")) { list.add(group.substring(1,group.indexOf(""",1))); } else { list.add(group.split("s")[0]); } } return list; }
}
方法二:
import java.util.regex.Matcher; import java.util.regex.Pattern;
public class test { public static void main(String[] args) { String s = "<IMG height=55 src="http://www.gobygo.com/TheGoByGo/images/book-channel.gif" width=210 border=0 />"; Pattern p1 = Pattern.compile("<IMG[wsdp{Punct}]*/>"); Matcher m = p1.matcher(s); while (m.find()) { String str = m.group(); Pattern p = Pattern.compile("src="[wsdp{Punct}]*""); Matcher m1 = p.matcher(s); while (m1.find()) { String str1 = m1.group(); str = str1.substring(5,str1.length() - 1); System.out.println(str); } } } } (编辑:李大同)
【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!
|