mirror of
https://github.com/rkd77/elinks.git
synced 2024-11-04 08:17:17 -05:00
Add support for more graceful detection of HTML content
It will grab at the first fragment of the cache entry and try to detect the content-type by looking for valid HTML. It is very stupid for now, simply searching for "<html>", which may be bogus in certain circumstances. And I am not sure if this is better left out and up to the scripting backends, e.g. SMJS can now modify the cache entry. A feable fix for bug 396.
This commit is contained in:
parent
58c83a9f48
commit
3ea73da7df
@ -224,6 +224,34 @@ get_cache_header_content_type(struct cache_entry *cached)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
unsigned char *
|
||||
get_fragment_content_type(struct cache_entry *cached)
|
||||
{
|
||||
struct fragment *fragment;
|
||||
size_t length;
|
||||
unsigned char *sample;
|
||||
unsigned char *ctype;
|
||||
|
||||
if (list_empty(cached->frag))
|
||||
return NULL;
|
||||
|
||||
fragment = cached->frag.next;
|
||||
if (fragment->offset)
|
||||
return NULL;
|
||||
|
||||
length = fragment->length > 1024 ? 1024 : fragment->length;
|
||||
sample = memacpy(fragment->data, length);
|
||||
if (!sample)
|
||||
return NULL;
|
||||
|
||||
if (strcasestr(sample, "<html>"))
|
||||
ctype = stracpy("text/html");
|
||||
|
||||
mem_free(sample);
|
||||
|
||||
return ctype;
|
||||
}
|
||||
|
||||
unsigned char *
|
||||
get_content_type(struct cache_entry *cached)
|
||||
{
|
||||
@ -269,6 +297,12 @@ get_content_type(struct cache_entry *cached)
|
||||
mem_free_if(ctype);
|
||||
}
|
||||
|
||||
ctype = get_fragment_content_type(cached);
|
||||
if (ctype && *ctype) {
|
||||
cached->content_type = ctype;
|
||||
return ctype;
|
||||
}
|
||||
|
||||
debug_ctype(get_default_mime_type());
|
||||
|
||||
/* Fallback.. use some hardwired default */
|
||||
|
Loading…
Reference in New Issue
Block a user