Skip to content

Commit

Permalink
languages - move language associations closer to languages registry (m…
Browse files Browse the repository at this point in the history
  • Loading branch information
bpasero authored Dec 30, 2021
1 parent 4cd3294 commit d2b7550
Show file tree
Hide file tree
Showing 10 changed files with 408 additions and 389 deletions.
228 changes: 1 addition & 227 deletions src/vs/base/common/mime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,7 @@
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/

import { ParsedPattern, parse } from 'vs/base/common/glob';
import { Schemas } from 'vs/base/common/network';
import { basename, extname, posix } from 'vs/base/common/path';
import { DataUri } from 'vs/base/common/resources';
import { startsWithUTF8BOM } from 'vs/base/common/strings';
import { URI } from 'vs/base/common/uri';
import { extname } from 'vs/base/common/path';

export namespace Mimes {
export const text = 'text/plain';
Expand All @@ -18,227 +13,6 @@ export namespace Mimes {
export const latex = 'text/latex';
}

export interface ITextMimeAssociation {
readonly id: string;
readonly mime: string;
readonly filename?: string;
readonly extension?: string;
readonly filepattern?: string;
readonly firstline?: RegExp;
readonly userConfigured?: boolean;
}

interface ITextMimeAssociationItem extends ITextMimeAssociation {
readonly filenameLowercase?: string;
readonly extensionLowercase?: string;
readonly filepatternLowercase?: ParsedPattern;
readonly filepatternOnPath?: boolean;
}

let registeredAssociations: ITextMimeAssociationItem[] = [];
let nonUserRegisteredAssociations: ITextMimeAssociationItem[] = [];
let userRegisteredAssociations: ITextMimeAssociationItem[] = [];

/**
* Associate a text mime to the registry.
*/
export function registerTextMime(association: ITextMimeAssociation, warnOnOverwrite = false): void {

// Register
const associationItem = toTextMimeAssociationItem(association);
registeredAssociations.push(associationItem);
if (!associationItem.userConfigured) {
nonUserRegisteredAssociations.push(associationItem);
} else {
userRegisteredAssociations.push(associationItem);
}

// Check for conflicts unless this is a user configured association
if (warnOnOverwrite && !associationItem.userConfigured) {
registeredAssociations.forEach(a => {
if (a.mime === associationItem.mime || a.userConfigured) {
return; // same mime or userConfigured is ok
}

if (associationItem.extension && a.extension === associationItem.extension) {
console.warn(`Overwriting extension <<${associationItem.extension}>> to now point to mime <<${associationItem.mime}>>`);
}

if (associationItem.filename && a.filename === associationItem.filename) {
console.warn(`Overwriting filename <<${associationItem.filename}>> to now point to mime <<${associationItem.mime}>>`);
}

if (associationItem.filepattern && a.filepattern === associationItem.filepattern) {
console.warn(`Overwriting filepattern <<${associationItem.filepattern}>> to now point to mime <<${associationItem.mime}>>`);
}

if (associationItem.firstline && a.firstline === associationItem.firstline) {
console.warn(`Overwriting firstline <<${associationItem.firstline}>> to now point to mime <<${associationItem.mime}>>`);
}
});
}
}

function toTextMimeAssociationItem(association: ITextMimeAssociation): ITextMimeAssociationItem {
return {
id: association.id,
mime: association.mime,
filename: association.filename,
extension: association.extension,
filepattern: association.filepattern,
firstline: association.firstline,
userConfigured: association.userConfigured,
filenameLowercase: association.filename ? association.filename.toLowerCase() : undefined,
extensionLowercase: association.extension ? association.extension.toLowerCase() : undefined,
filepatternLowercase: association.filepattern ? parse(association.filepattern.toLowerCase()) : undefined,
filepatternOnPath: association.filepattern ? association.filepattern.indexOf(posix.sep) >= 0 : false
};
}

/**
* Clear text mimes from the registry.
*/
export function clearTextMimes(onlyUserConfigured?: boolean): void {
if (!onlyUserConfigured) {
registeredAssociations = [];
nonUserRegisteredAssociations = [];
userRegisteredAssociations = [];
} else {
registeredAssociations = registeredAssociations.filter(a => !a.userConfigured);
userRegisteredAssociations = [];
}
}

/**
* Given a file, return the best matching mime type for it
*/
export function guessMimeTypes(resource: URI | null, firstLine?: string): string[] {
let path: string | undefined;
if (resource) {
switch (resource.scheme) {
case Schemas.file:
path = resource.fsPath;
break;
case Schemas.data: {
const metadata = DataUri.parseMetaData(resource);
path = metadata.get(DataUri.META_DATA_LABEL);
break;
}
default:
path = resource.path;
}
}

if (!path) {
return [Mimes.unknown];
}

path = path.toLowerCase();

const filename = basename(path);

// 1.) User configured mappings have highest priority
const configuredMime = guessMimeTypeByPath(path, filename, userRegisteredAssociations);
if (configuredMime) {
return [configuredMime, Mimes.text];
}

// 2.) Registered mappings have middle priority
const registeredMime = guessMimeTypeByPath(path, filename, nonUserRegisteredAssociations);
if (registeredMime) {
return [registeredMime, Mimes.text];
}

// 3.) Firstline has lowest priority
if (firstLine) {
const firstlineMime = guessMimeTypeByFirstline(firstLine);
if (firstlineMime) {
return [firstlineMime, Mimes.text];
}
}

return [Mimes.unknown];
}

function guessMimeTypeByPath(path: string, filename: string, associations: ITextMimeAssociationItem[]): string | null {
let filenameMatch: ITextMimeAssociationItem | null = null;
let patternMatch: ITextMimeAssociationItem | null = null;
let extensionMatch: ITextMimeAssociationItem | null = null;

// We want to prioritize associations based on the order they are registered so that the last registered
// association wins over all other. This is for https://github.com/microsoft/vscode/issues/20074
for (let i = associations.length - 1; i >= 0; i--) {
const association = associations[i];

// First exact name match
if (filename === association.filenameLowercase) {
filenameMatch = association;
break; // take it!
}

// Longest pattern match
if (association.filepattern) {
if (!patternMatch || association.filepattern.length > patternMatch.filepattern!.length) {
const target = association.filepatternOnPath ? path : filename; // match on full path if pattern contains path separator
if (association.filepatternLowercase?.(target)) {
patternMatch = association;
}
}
}

// Longest extension match
if (association.extension) {
if (!extensionMatch || association.extension.length > extensionMatch.extension!.length) {
if (filename.endsWith(association.extensionLowercase!)) {
extensionMatch = association;
}
}
}
}

// 1.) Exact name match has second highest priority
if (filenameMatch) {
return filenameMatch.mime;
}

// 2.) Match on pattern
if (patternMatch) {
return patternMatch.mime;
}

// 3.) Match on extension comes next
if (extensionMatch) {
return extensionMatch.mime;
}

return null;
}

function guessMimeTypeByFirstline(firstLine: string): string | null {
if (startsWithUTF8BOM(firstLine)) {
firstLine = firstLine.substr(1);
}

if (firstLine.length > 0) {

// We want to prioritize associations based on the order they are registered so that the last registered
// association wins over all other. This is for https://github.com/microsoft/vscode/issues/20074
for (let i = registeredAssociations.length - 1; i >= 0; i--) {
const association = registeredAssociations[i];
if (!association.firstline) {
continue;
}

const matches = firstLine.match(association.firstline);
if (matches && matches.length > 0) {
return association.mime;
}
}
}

return null;
}

interface MapExtToMediaMimes {
[index: string]: string;
}
Expand Down
121 changes: 1 addition & 120 deletions src/vs/base/test/common/mime.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,129 +4,10 @@
*--------------------------------------------------------------------------------------------*/

import * as assert from 'assert';
import { guessMimeTypes, normalizeMimeType, registerTextMime } from 'vs/base/common/mime';
import { URI } from 'vs/base/common/uri';
import { normalizeMimeType } from 'vs/base/common/mime';

suite('Mime', () => {

test('Dynamically Register Text Mime', () => {
let guess = guessMimeTypes(URI.file('foo.monaco'));
assert.deepStrictEqual(guess, ['application/unknown']);

registerTextMime({ id: 'monaco', extension: '.monaco', mime: 'text/monaco' });
guess = guessMimeTypes(URI.file('foo.monaco'));
assert.deepStrictEqual(guess, ['text/monaco', 'text/plain']);

guess = guessMimeTypes(URI.file('.monaco'));
assert.deepStrictEqual(guess, ['text/monaco', 'text/plain']);

registerTextMime({ id: 'codefile', filename: 'Codefile', mime: 'text/code' });
guess = guessMimeTypes(URI.file('Codefile'));
assert.deepStrictEqual(guess, ['text/code', 'text/plain']);

guess = guessMimeTypes(URI.file('foo.Codefile'));
assert.deepStrictEqual(guess, ['application/unknown']);

registerTextMime({ id: 'docker', filepattern: 'Docker*', mime: 'text/docker' });
guess = guessMimeTypes(URI.file('Docker-debug'));
assert.deepStrictEqual(guess, ['text/docker', 'text/plain']);

guess = guessMimeTypes(URI.file('docker-PROD'));
assert.deepStrictEqual(guess, ['text/docker', 'text/plain']);

registerTextMime({ id: 'niceregex', mime: 'text/nice-regex', firstline: /RegexesAreNice/ });
guess = guessMimeTypes(URI.file('Randomfile.noregistration'), 'RegexesAreNice');
assert.deepStrictEqual(guess, ['text/nice-regex', 'text/plain']);

guess = guessMimeTypes(URI.file('Randomfile.noregistration'), 'RegexesAreNotNice');
assert.deepStrictEqual(guess, ['application/unknown']);

guess = guessMimeTypes(URI.file('Codefile'), 'RegexesAreNice');
assert.deepStrictEqual(guess, ['text/code', 'text/plain']);
});

test('Mimes Priority', () => {
registerTextMime({ id: 'monaco', extension: '.monaco', mime: 'text/monaco' });
registerTextMime({ id: 'foobar', mime: 'text/foobar', firstline: /foobar/ });

let guess = guessMimeTypes(URI.file('foo.monaco'));
assert.deepStrictEqual(guess, ['text/monaco', 'text/plain']);

guess = guessMimeTypes(URI.file('foo.monaco'), 'foobar');
assert.deepStrictEqual(guess, ['text/monaco', 'text/plain']);

registerTextMime({ id: 'docker', filename: 'dockerfile', mime: 'text/winner' });
registerTextMime({ id: 'docker', filepattern: 'dockerfile*', mime: 'text/looser' });
guess = guessMimeTypes(URI.file('dockerfile'));
assert.deepStrictEqual(guess, ['text/winner', 'text/plain']);

registerTextMime({ id: 'azure-looser', mime: 'text/azure-looser', firstline: /azure/ });
registerTextMime({ id: 'azure-winner', mime: 'text/azure-winner', firstline: /azure/ });
guess = guessMimeTypes(URI.file('azure'), 'azure');
assert.deepStrictEqual(guess, ['text/azure-winner', 'text/plain']);
});

test('Specificity priority 1', () => {
registerTextMime({ id: 'monaco2', extension: '.monaco2', mime: 'text/monaco2' });
registerTextMime({ id: 'monaco2', filename: 'specific.monaco2', mime: 'text/specific-monaco2' });

assert.deepStrictEqual(guessMimeTypes(URI.file('specific.monaco2')), ['text/specific-monaco2', 'text/plain']);
assert.deepStrictEqual(guessMimeTypes(URI.file('foo.monaco2')), ['text/monaco2', 'text/plain']);
});

test('Specificity priority 2', () => {
registerTextMime({ id: 'monaco3', filename: 'specific.monaco3', mime: 'text/specific-monaco3' });
registerTextMime({ id: 'monaco3', extension: '.monaco3', mime: 'text/monaco3' });

assert.deepStrictEqual(guessMimeTypes(URI.file('specific.monaco3')), ['text/specific-monaco3', 'text/plain']);
assert.deepStrictEqual(guessMimeTypes(URI.file('foo.monaco3')), ['text/monaco3', 'text/plain']);
});

test('Mimes Priority - Longest Extension wins', () => {
registerTextMime({ id: 'monaco', extension: '.monaco', mime: 'text/monaco' });
registerTextMime({ id: 'monaco', extension: '.monaco.xml', mime: 'text/monaco-xml' });
registerTextMime({ id: 'monaco', extension: '.monaco.xml.build', mime: 'text/monaco-xml-build' });

let guess = guessMimeTypes(URI.file('foo.monaco'));
assert.deepStrictEqual(guess, ['text/monaco', 'text/plain']);

guess = guessMimeTypes(URI.file('foo.monaco.xml'));
assert.deepStrictEqual(guess, ['text/monaco-xml', 'text/plain']);

guess = guessMimeTypes(URI.file('foo.monaco.xml.build'));
assert.deepStrictEqual(guess, ['text/monaco-xml-build', 'text/plain']);
});

test('Mimes Priority - User configured wins', () => {
registerTextMime({ id: 'monaco', extension: '.monaco.xnl', mime: 'text/monaco', userConfigured: true });
registerTextMime({ id: 'monaco', extension: '.monaco.xml', mime: 'text/monaco-xml' });

let guess = guessMimeTypes(URI.file('foo.monaco.xnl'));
assert.deepStrictEqual(guess, ['text/monaco', 'text/plain']);
});

test('Mimes Priority - Pattern matches on path if specified', () => {
registerTextMime({ id: 'monaco', filepattern: '**/dot.monaco.xml', mime: 'text/monaco' });
registerTextMime({ id: 'other', filepattern: '*ot.other.xml', mime: 'text/other' });

let guess = guessMimeTypes(URI.file('/some/path/dot.monaco.xml'));
assert.deepStrictEqual(guess, ['text/monaco', 'text/plain']);
});

test('Mimes Priority - Last registered mime wins', () => {
registerTextMime({ id: 'monaco', filepattern: '**/dot.monaco.xml', mime: 'text/monaco' });
registerTextMime({ id: 'other', filepattern: '**/dot.monaco.xml', mime: 'text/other' });

let guess = guessMimeTypes(URI.file('/some/path/dot.monaco.xml'));
assert.deepStrictEqual(guess, ['text/other', 'text/plain']);
});

test('Data URIs', () => {
registerTextMime({ id: 'data', extension: '.data', mime: 'text/data' });

assert.deepStrictEqual(guessMimeTypes(URI.parse(`data:;label:something.data;description:data,`)), ['text/data', 'text/plain']);
});

test('normalize', () => {
assert.strictEqual(normalizeMimeType('invalid'), 'invalid');
assert.strictEqual(normalizeMimeType('invalid', true), undefined);
Expand Down
Loading

0 comments on commit d2b7550

Please sign in to comment.