html is deprecated: please use read_html() instead.

html(x, ..., encoding = "")

# S3 method for session
read_xml(x, ..., as_html = FALSE)

Arguments

x

A url, a local path, a string containing html, or a response from an httr request.

...

If x is a URL, additional arguments are passed on to httr::GET().

encoding

Specify encoding of document. See iconvlist() for complete list. If you have problems determining the correct encoding, try stringi::stri_enc_detect()

as_html

Optionally parse an xml file as if it's html.

Examples

# From a url: google <- read_html("http://google.com", encoding = "ISO-8859-1") google %>% xml_structure()
#> <html [itemscope, itemtype, lang]> #> <head> #> <meta [content, name]> #> <meta [content, name]> #> <meta [content, http-equiv]> #> <meta [content, itemprop]> #> <title> #> {text} #> <script [nonce]> #> {cdata} #> <style> #> {cdata} #> <style> #> {cdata} #> <script [nonce]> #> <body [bgcolor]> #> <script [nonce]> #> {cdata} #> <div [id]> #> {text} #> <div [id]> #> <nobr> #> <b [class]> #> {text} #> {text} #> <a [class, href]> #> {text} #> {text} #> <a [class, href]> #> {text} #> {text} #> <a [class, href]> #> {text} #> {text} #> <a [class, href]> #> {text} #> {text} #> <a [class, href]> #> {text} #> {text} #> <a [class, href]> #> {text} #> {text} #> <a [class, href]> #> {text} #> {text} #> <a [class, style, href]> #> <u> #> {text} #> {text} #> <div [id, width]> #> <nobr> #> <span [id, class]> #> <span [id, class]> #> <span [id]> #> <a [href, class]> #> {text} #> {text} #> <a [href, class]> #> {text} #> {text} #> <a [target, id, href, class]> #> {text} #> <div [class, style]> #> <div [class, style]> #> {text} #> <center> #> <br [clear, id]> #> <div [id]> #> <img [alt, height, src, style, width, id]> #> <br> #> <br> #> <form [action, name]> #> <table [cellpadding, cellspacing]> #> <tr [valign]> #> <td [width]> #> {text} #> <td [align, nowrap]> #> <input [name, value, type]> #> <input [value, name, type]> #> <input [name, type, value]> #> <input [name, type]> #> <input [name, type]> #> <div [class, style]> #> <input [class, style, autocomplete, value, title, maxlength, name, size]> #> <br [style]> #> <span [class]> #> <span [class]> #> <input [class, value, name, type]> #> <span [class]> #> <span [class]> #> <input [class, id, value, name, type]> #> <script [nonce]> #> {cdata} #> <input [value, name, type]> #> <td [class, align, nowrap, width]> #> <a [href]> #> {text} #> <input [id, name, type, value]> #> <script [nonce]> #> {cdata} #> <div [id]> #> <div [style]> #> <br> #> <span [id]> #> <div [style]> #> <div [style, id]> #> <a [href]> #> {text} #> <a [href]> #> {text} #> <a [href]> #> {text} #> <p [style]> #> {text} #> <a [href]> #> {text} #> {text} #> <a [href]> #> {text} #> <script [nonce]> #> {cdata}
google %>% html_nodes("div")
#> {xml_nodeset (11)} #> [1] <div id="mngb"> <div id="gbar"><nobr><b class="gb1">Search</b> <a class= ... #> [2] <div id="gbar"><nobr><b class="gb1">Search</b> <a class="gb1" href="http ... #> [3] <div id="guser" width="100%"><nobr><span id="gbn" class="gbi"></span><sp ... #> [4] <div class="gbh" style="left:0"></div>\n #> [5] <div class="gbh" style="right:0"></div> #> [6] <div id="lga">\n<img alt="Google" height="92" src="/images/branding/goog ... #> [7] <div class="ds" style="height:32px;margin:4px 0"><input class="lst" styl ... #> [8] <div id="gac_scont"></div>\n #> [9] <div style="font-size:83%;min-height:3.5em"><br></div>\n #> [10] <div style="font-size:10pt"><div style="margin:19px auto;text-align:cent ... #> [11] <div style="margin:19px auto;text-align:center" id="fll">\n<a href="/int ...
# From a string: (minimal html 5 document) # http://www.brucelawson.co.uk/2010/a-minimal-html5-document/ minimal <- read_html("<!doctype html> <meta charset=utf-8> <title>blah</title> <p>I'm the content") minimal
#> {html_document} #> <html> #> [1] <head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8 ... #> [2] <body><p>I'm the content</p></body>
minimal %>% xml_structure()
#> <html> #> <head> #> <meta [http-equiv, content]> #> <meta [charset]> #> <title> #> {text} #> <body> #> <p> #> {text}
# From an httr request google2 <- read_html(httr::GET("http://google.com"))