Displaying 2 results from an estimated 2 matches for "parse_html".
2009 Feb 02
2
Ticket #282: omindex-assorted-enhancements.patch woes
..."text/rtf") {
// The --text option unhelpfully converts all non-ASCII characters to
// "?" so we use --html instead, which produces HTML entities.
- string cmd = "unrtf --nopict --html 2>/dev/null " +
shell_protect(file);
MyHtmlParser p;
try {
- p.parse_html(stdout_to_string(cmd));
} catch (ReadError) {
cout << "\"" << cmd << "\" failed - skipping\n";
return;
--- 426,435 ----
} else if (mimetype == "text/rtf") {
// The --text option unhelpfully converts all non-ASCII ch...
2009 Feb 03
1
PowerPoint 2007 filter
.../* string cmd = "unzip -p " + safefile + " ppt/slides/slide*.xml
ppt/notesSlides/notesSlide*.xml ppt/comments/comment*.xml"; */
string cmd = "unzip -p " + safefile + " ppt/slides/slide*.xml";
try {
XmlParser xmlparser;
xmlparser.parse_html(stdout_to_string(cmd));
dump = xmlparser.dump;
} catch (ReadError) {
cout << "\"" << cmd << "\" failed - skipping\n";
return;
}
// End: PowerPoint 2007 .pptx
FYI the mime type I entered was:
mime_map["pptx&...