search for: parse_html

Displaying 2 results from an estimated 2 matches for "parse_html".

2009 Feb 02
2
Ticket #282: omindex-assorted-enhancements.patch woes
..."text/rtf") { // The --text option unhelpfully converts all non-ASCII characters to // "?" so we use --html instead, which produces HTML entities. - string cmd = "unrtf --nopict --html 2>/dev/null " + shell_protect(file); MyHtmlParser p; try { - p.parse_html(stdout_to_string(cmd)); } catch (ReadError) { cout << "\"" << cmd << "\" failed - skipping\n"; return; --- 426,435 ---- } else if (mimetype == "text/rtf") { // The --text option unhelpfully converts all non-ASCII ch...
2009 Feb 03
1
PowerPoint 2007 filter
.../* string cmd = "unzip -p " + safefile + " ppt/slides/slide*.xml ppt/notesSlides/notesSlide*.xml ppt/comments/comment*.xml"; */ string cmd = "unzip -p " + safefile + " ppt/slides/slide*.xml"; try { XmlParser xmlparser; xmlparser.parse_html(stdout_to_string(cmd)); dump = xmlparser.dump; } catch (ReadError) { cout << "\"" << cmd << "\" failed - skipping\n"; return; } // End: PowerPoint 2007 .pptx FYI the mime type I entered was: mime_map["pptx&...