ClaptrapBot/src/main/java/net/Broken/Tools/FindContentOnWebPage.java

53 lines
1.7 KiB
Java
Raw Normal View History

package net.Broken.Tools;
2017-05-10 21:43:44 +02:00
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
public class FindContentOnWebPage {
2018-02-28 17:59:09 +01:00
/**
* Find picture URL on webPage
* @param url Web Page URL
* @param divClass Div class where the picture is
* @param htmlType HTML tag of image (img)
* @return Picture URL
* @throws IOException
*/
2017-05-10 21:43:44 +02:00
public static String doYourJob(String url, String divClass, String htmlType) throws IOException {
2018-05-14 09:28:16 +02:00
// System.out.println(url);
2018-02-28 17:59:09 +01:00
String source = getSourceUrl(url);
2017-05-10 21:43:44 +02:00
int divIndex = source.indexOf(divClass);
String sub = source.substring(divIndex);
// System.out.println(sub);
sub = sub.replace(divClass,"");
sub = sub.substring(sub.indexOf(htmlType));
sub = sub.substring(sub.indexOf("src"));
sub = sub.replace("src=\"","");
String[] split = sub.split("\"");
// System.out.println(split[0]);
return split[0];
}
2018-02-28 17:59:09 +01:00
/**
* Get source code of web page
* @param url Web page URL
* @return Web page source as String
* @throws IOException
*/
public static String getSourceUrl(String url) throws IOException {
2017-05-10 21:43:44 +02:00
URL urlC = new URL(url);
URLConnection yc = urlC.openConnection();
2018-05-14 09:28:16 +02:00
yc.setRequestProperty("User-Agent","Googlebot");
2017-05-10 21:43:44 +02:00
BufferedReader in = new BufferedReader(new InputStreamReader(
yc.getInputStream(), "UTF-8"));
String inputLine;
StringBuilder a = new StringBuilder();
while ((inputLine = in.readLine()) != null)
a.append(inputLine);
in.close();
return a.toString();
}
}